Example #1
0
# Convert output to binarized array
numbers = np.reshape((processedData[:, 2]), (len(processedData[:, 2]), 1))
predOutput = multiBinarizer.fit_transform(numbers)

X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE,
                                                    random_state=RANDOM_STATE_SPLIT)

# Try to load classifier from file
clf = input_output.load_classifier("sanfCrimeCLF.pkl")
if not clf:
    # If no file is present, train the classifier using the best known parameters and save the classifier
    print("There is no saved classifier!")
    print("Training Model...")
    clf = KNeighborsClassifier(n_neighbors=26, weights='distance', algorithm='kd_tree')
    clf.fit(X_train, y_train)
    input_output.save_classifier("sanfCrimeCLF.pkl", clf)
    print("Training Complete!")

# # Choose scoring metric
scores = ['f1']

# print(X_train)

# Find optimal model
# generalModel = models.MLModel(scores, X_train, y_train, y_test, X_test)
# generalModel.find_optimal_model()

# # # SEARCH OPTIMAL PARAMETERS FOR Kneighbours
# kNModel = models.NearestNeighboursModel(scores, X_train, y_train, y_test, X_test)
# param_grid = kNModel.grid_search_setup()
# svm_clf = kNModel.getClassifier()
Example #2
0
# Separate features and output + scale data
trainData = (processedData[:, 1:])
predOutput = processedData[:, 0]

X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE,
                                                    random_state=RANDOM_STATE_SPLIT)

# Try to load classifier from file
clf = input_output.load_classifier("titanicCLF.pkl")
if not clf:
    # If no file is present, train the classifier using the best known parameters and save the classifier
    print("There is no saved classifier!")
    print("Training Model...")
    clf = svm.SVC(C=1, kernel="rbf", gamma=0.1)
    clf.fit(X_train, y_train)
    input_output.save_classifier("titanicCLF.pkl", clf)
    print("Training Complete!")

# Choose scoring metrics
scores = ['precision', 'recall']

# # SEARCH OPTIMAL PARAMETERS FOR SVM
# svmModel = models.SVMModel(scores, X_train, y_train, y_test, X_test)
# param_grid = svmModel.grid_search_setup()
# svm_clf = svmModel.getClassifier()
# svmModel.find_optimal_parameters(svm_clf, param_grid, 6)

# # SEARCH OPTIMAL PARAMETERS FOR Random Forests
# randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test)
# param_grid = randomModel.grid_search_setup()
# rndf_clf = randomModel.getClassifier()