# # # SEARCH OPTIMAL PARAMETERS FOR Kneighbours # kNModel = models.NearestNeighboursModel(scores, X_train, y_train, y_test, X_test) # param_grid = kNModel.grid_search_setup() # svm_clf = kNModel.getClassifier() # kNModel.find_optimal_parameters(svm_clf, param_grid, 7) # # # # # SEARCH OPTIMAL PARAMETERS FOR Random Forests # randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test) # param_grid = randomModel.grid_search_setup() # rndf_clf = randomModel.getClassifier() # randomModel.find_optimal_parameters(rndf_clf, param_grid, 6) # Try the model and print results #predictedSet = clf.predict(X_test) # print("Model results on the test part of the data\n") # print(classification_report(predictedSet, y_test)) # # Load Test set file rawDataTest = input_output.load_data("test.csv") procDataTest = scaler.fit_transform(preprocess.prepare_data(rawDataTest, train=False)) predictedSetTest = clf.predict(procDataTest).astype(int) binarizedSet = preprocess.binarize_output(predictedSetTest) # Convert to pandas DataFrame df = pd.DataFrame(binarizedSet) input_output.export_for_submission("sanfoutput.csv", df, rawData['Category'])
# Choose scoring metrics scores = ['precision', 'recall'] # # SEARCH OPTIMAL PARAMETERS FOR SVM # svmModel = models.SVMModel(scores, X_train, y_train, y_test, X_test) # param_grid = svmModel.grid_search_setup() # svm_clf = svmModel.getClassifier() # svmModel.find_optimal_parameters(svm_clf, param_grid, 6) # # SEARCH OPTIMAL PARAMETERS FOR Random Forests # randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test) # param_grid = randomModel.grid_search_setup() # rndf_clf = randomModel.getClassifier() # randomModel.find_optimal_parameters(rndf_clf, param_grid, 6) # Try the model and print results predictedSet = clf.predict(X_test) print("Model results on the test part of the data\n") print(classification_report(predictedSet, y_test)) # Load Test set file rawDataTest = input_output.load_data("test.csv") ids = rawDataTest['PassengerId'].values procDataTest = preprocess.prepare_data(rawDataTest) predictedSetTest = clf.predict(procDataTest).astype(int) input_output.export_for_submission("titanic_output.csv", ids, predictedSetTest)