예제 #1
0
def main():
    print("Reading dataset...")
    X_train, Y_train, Y_train_raw = read_train()
    print("Dataset ready.")

    print("Start training...")
    learning_rate = 0.045
    num_iterations = 2500
    parameters, costs = nn_model(X_train,
                                 Y_train,
                                 num_iterations=num_iterations,
                                 learning_rate=learning_rate,
                                 print_cost=True)
    print("Traning finished.")

    print("Predicting train set...")
    Y_prediction_train = predict(X_train, parameters)
    precision, recall, fscore, _ = precision_recall_fscore_support(
        Y_train_raw.reshape((X_train.shape[1], 1)),
        Y_prediction_train,
        labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    print("Precistion:", precision)
    print("Recall:", recall)
    print("Fscore:", fscore)

    print("Reading test set...")
    X_test = read_test()
    print("Predicting test set...")
    Y_prediction_test = predict(X_test, parameters)
    print("Predicted.")

    costs = np.squeeze(costs)
    plt.plot(costs)
    plt.ylabel("cost")
    plt.xlabel("iterations")
    plt.title("Learning rate = " + str(learning_rate))
    plt.show()

    d = {
        "costs": costs.tolist(),
        "Y_prediction_test": Y_prediction_test.tolist(),
        "Y_prediction_train": Y_prediction_train.tolist(),
        "W1": parameters["W1"].tolist(),
        "W2": parameters["W2"].tolist(),
        "b1": parameters["b1"].tolist(),
        "b2": parameters["b2"].tolist(),
        "learning_rate": learning_rate,
        "num_iterations": num_iterations
    }

    print("Saving model...")
    jd = json.dumps(d)
    with open("model.json", "w") as f:
        f.write(jd)
    print("Model saved.")
def classifyRF(train_file="train.csv", test_file ="test.csv", trees=70):
    #So, let's classifiy this thing. Reading the Features and then the test.
    print("Reading train data")
    X,y = rd.read_train(file_name=train_file)
    print("Augmenting dataset")
    X,y = rd.nudge_dataset(X,y)
    print("Reading test data")
    test_data = rd.read_test(file_name=test_file)

    #Creating the classifier. It has a ton of parameters, but since this a hard and fast one, here you go.
    rfc = RandomForestClassifier(trees)
    #Train with the data we have. Cry a little inside.
    print("Training classifier")
    rfc.fit(X, y)
    predictions = rfc.predict(test_data)

    #Most submitions are cute with a CSV. Might as well learn how to do it.
    pd.DataFrame({"ImageId": range(1,len(predictions)+1), "Label": predictions}).to_csv('submit.csv', index=False, header=True)
예제 #3
0
def classifyRF(train_file="train.csv", test_file="test.csv", trees=70):
    #So, let's classifiy this thing. Reading the Features and then the test.
    print("Reading train data")
    X, y = rd.read_train(file_name=train_file)
    print("Augmenting dataset")
    X, y = rd.nudge_dataset(X, y)
    print("Reading test data")
    test_data = rd.read_test(file_name=test_file)

    #Creating the classifier. It has a ton of parameters, but since this a hard and fast one, here you go.
    rfc = RandomForestClassifier(trees)
    #Train with the data we have. Cry a little inside.
    print("Training classifier")
    rfc.fit(X, y)
    predictions = rfc.predict(test_data)

    #Most submitions are cute with a CSV. Might as well learn how to do it.
    pd.DataFrame({
        "ImageId": range(1,
                         len(predictions) + 1),
        "Label": predictions
    }).to_csv('submit.csv', index=False, header=True)
def get_train_and_test_data(train_limit=-1,test_limit=-1):
    X,y = get_train_data(train_limit)
    print('Loading test data')
    test_X = rd.read_test(limit=test_limit)
    test_X = scale(test_X)
    return X,y,test_X
예제 #5
0
def get_train_and_test_data(train_limit=-1, test_limit=-1):
    X, y = get_train_data(train_limit)
    print('Loading test data')
    test_X = rd.read_test(limit=test_limit)
    test_X = scale(test_X)
    return X, y, test_X