예제 #1
0
# # # SEARCH OPTIMAL PARAMETERS FOR Kneighbours
# kNModel = models.NearestNeighboursModel(scores, X_train, y_train, y_test, X_test)
# param_grid = kNModel.grid_search_setup()
# svm_clf = kNModel.getClassifier()
# kNModel.find_optimal_parameters(svm_clf, param_grid, 7)

#
# # # # SEARCH OPTIMAL PARAMETERS FOR Random Forests
# randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test)
# param_grid = randomModel.grid_search_setup()
# rndf_clf = randomModel.getClassifier()
# randomModel.find_optimal_parameters(rndf_clf, param_grid, 6)

# Try the model and print results
#predictedSet = clf.predict(X_test)
# print("Model results on the test part of the data\n")
# print(classification_report(predictedSet, y_test))

# # Load Test set file
rawDataTest = input_output.load_data("test.csv")

procDataTest = scaler.fit_transform(preprocess.prepare_data(rawDataTest, train=False))
predictedSetTest = clf.predict(procDataTest).astype(int)

binarizedSet = preprocess.binarize_output(predictedSetTest)

# Convert to pandas DataFrame
df = pd.DataFrame(binarizedSet)
input_output.export_for_submission("sanfoutput.csv", df, rawData['Category'])
예제 #2
0
# Choose scoring metrics
scores = ['precision', 'recall']

# # SEARCH OPTIMAL PARAMETERS FOR SVM
# svmModel = models.SVMModel(scores, X_train, y_train, y_test, X_test)
# param_grid = svmModel.grid_search_setup()
# svm_clf = svmModel.getClassifier()
# svmModel.find_optimal_parameters(svm_clf, param_grid, 6)

# # SEARCH OPTIMAL PARAMETERS FOR Random Forests
# randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test)
# param_grid = randomModel.grid_search_setup()
# rndf_clf = randomModel.getClassifier()
# randomModel.find_optimal_parameters(rndf_clf, param_grid, 6)

# Try the model and print results
predictedSet = clf.predict(X_test)
print("Model results on the test part of the data\n")
print(classification_report(predictedSet, y_test))

# Load Test set file

rawDataTest = input_output.load_data("test.csv")
ids = rawDataTest['PassengerId'].values

procDataTest = preprocess.prepare_data(rawDataTest)

predictedSetTest = clf.predict(procDataTest).astype(int)
input_output.export_for_submission("titanic_output.csv", ids, predictedSetTest)