def main(): print("Reading dataset...") X_train, Y_train, Y_train_raw = read_train() print("Dataset ready.") print("Start training...") learning_rate = 0.045 num_iterations = 2500 parameters, costs = nn_model(X_train, Y_train, num_iterations=num_iterations, learning_rate=learning_rate, print_cost=True) print("Traning finished.") print("Predicting train set...") Y_prediction_train = predict(X_train, parameters) precision, recall, fscore, _ = precision_recall_fscore_support( Y_train_raw.reshape((X_train.shape[1], 1)), Y_prediction_train, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) print("Precistion:", precision) print("Recall:", recall) print("Fscore:", fscore) print("Reading test set...") X_test = read_test() print("Predicting test set...") Y_prediction_test = predict(X_test, parameters) print("Predicted.") costs = np.squeeze(costs) plt.plot(costs) plt.ylabel("cost") plt.xlabel("iterations") plt.title("Learning rate = " + str(learning_rate)) plt.show() d = { "costs": costs.tolist(), "Y_prediction_test": Y_prediction_test.tolist(), "Y_prediction_train": Y_prediction_train.tolist(), "W1": parameters["W1"].tolist(), "W2": parameters["W2"].tolist(), "b1": parameters["b1"].tolist(), "b2": parameters["b2"].tolist(), "learning_rate": learning_rate, "num_iterations": num_iterations } print("Saving model...") jd = json.dumps(d) with open("model.json", "w") as f: f.write(jd) print("Model saved.")
def classifyRF(train_file="train.csv", test_file ="test.csv", trees=70): #So, let's classifiy this thing. Reading the Features and then the test. print("Reading train data") X,y = rd.read_train(file_name=train_file) print("Augmenting dataset") X,y = rd.nudge_dataset(X,y) print("Reading test data") test_data = rd.read_test(file_name=test_file) #Creating the classifier. It has a ton of parameters, but since this a hard and fast one, here you go. rfc = RandomForestClassifier(trees) #Train with the data we have. Cry a little inside. print("Training classifier") rfc.fit(X, y) predictions = rfc.predict(test_data) #Most submitions are cute with a CSV. Might as well learn how to do it. pd.DataFrame({"ImageId": range(1,len(predictions)+1), "Label": predictions}).to_csv('submit.csv', index=False, header=True)
def classifyRF(train_file="train.csv", test_file="test.csv", trees=70): #So, let's classifiy this thing. Reading the Features and then the test. print("Reading train data") X, y = rd.read_train(file_name=train_file) print("Augmenting dataset") X, y = rd.nudge_dataset(X, y) print("Reading test data") test_data = rd.read_test(file_name=test_file) #Creating the classifier. It has a ton of parameters, but since this a hard and fast one, here you go. rfc = RandomForestClassifier(trees) #Train with the data we have. Cry a little inside. print("Training classifier") rfc.fit(X, y) predictions = rfc.predict(test_data) #Most submitions are cute with a CSV. Might as well learn how to do it. pd.DataFrame({ "ImageId": range(1, len(predictions) + 1), "Label": predictions }).to_csv('submit.csv', index=False, header=True)
def get_train_and_test_data(train_limit=-1,test_limit=-1): X,y = get_train_data(train_limit) print('Loading test data') test_X = rd.read_test(limit=test_limit) test_X = scale(test_X) return X,y,test_X
def get_train_and_test_data(train_limit=-1, test_limit=-1): X, y = get_train_data(train_limit) print('Loading test data') test_X = rd.read_test(limit=test_limit) test_X = scale(test_X) return X, y, test_X