# Convert output to binarized array numbers = np.reshape((processedData[:, 2]), (len(processedData[:, 2]), 1)) predOutput = multiBinarizer.fit_transform(numbers) X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE, random_state=RANDOM_STATE_SPLIT) # Try to load classifier from file clf = input_output.load_classifier("sanfCrimeCLF.pkl") if not clf: # If no file is present, train the classifier using the best known parameters and save the classifier print("There is no saved classifier!") print("Training Model...") clf = KNeighborsClassifier(n_neighbors=26, weights='distance', algorithm='kd_tree') clf.fit(X_train, y_train) input_output.save_classifier("sanfCrimeCLF.pkl", clf) print("Training Complete!") # # Choose scoring metric scores = ['f1'] # print(X_train) # Find optimal model # generalModel = models.MLModel(scores, X_train, y_train, y_test, X_test) # generalModel.find_optimal_model() # # # SEARCH OPTIMAL PARAMETERS FOR Kneighbours # kNModel = models.NearestNeighboursModel(scores, X_train, y_train, y_test, X_test) # param_grid = kNModel.grid_search_setup() # svm_clf = kNModel.getClassifier()
# Separate features and output + scale data trainData = (processedData[:, 1:]) predOutput = processedData[:, 0] X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE, random_state=RANDOM_STATE_SPLIT) # Try to load classifier from file clf = input_output.load_classifier("titanicCLF.pkl") if not clf: # If no file is present, train the classifier using the best known parameters and save the classifier print("There is no saved classifier!") print("Training Model...") clf = svm.SVC(C=1, kernel="rbf", gamma=0.1) clf.fit(X_train, y_train) input_output.save_classifier("titanicCLF.pkl", clf) print("Training Complete!") # Choose scoring metrics scores = ['precision', 'recall'] # # SEARCH OPTIMAL PARAMETERS FOR SVM # svmModel = models.SVMModel(scores, X_train, y_train, y_test, X_test) # param_grid = svmModel.grid_search_setup() # svm_clf = svmModel.getClassifier() # svmModel.find_optimal_parameters(svm_clf, param_grid, 6) # # SEARCH OPTIMAL PARAMETERS FOR Random Forests # randomModel = models.RandomForestModel(scores, X_train, y_train, y_test, X_test) # param_grid = randomModel.grid_search_setup() # rndf_clf = randomModel.getClassifier()