# Separate features and output + scale data # pandas has some weird column counting scaler = StandardScaler() multiBinarizer = MultiLabelBinarizer() trainData = scaler.fit_transform(processedData[:, 0:2]) # Convert output to binarized array numbers = np.reshape((processedData[:, 2]), (len(processedData[:, 2]), 1)) predOutput = multiBinarizer.fit_transform(numbers) X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE, random_state=RANDOM_STATE_SPLIT) # Try to load classifier from file clf = input_output.load_classifier("sanfCrimeCLF.pkl") if not clf: # If no file is present, train the classifier using the best known parameters and save the classifier print("There is no saved classifier!") print("Training Model...") clf = KNeighborsClassifier(n_neighbors=26, weights='distance', algorithm='kd_tree') clf.fit(X_train, y_train) input_output.save_classifier("sanfCrimeCLF.pkl", clf) print("Training Complete!") # # Choose scoring metric scores = ['f1'] # print(X_train) # Find optimal model
# Define constants TEST_SIZE_SAMPLE = 0.25 RANDOM_STATE_SPLIT = 40 rawData = input_output.load_data("train.csv") processedData = preprocess.prepare_data(rawData) # Separate features and output + scale data trainData = (processedData[:, 1:]) predOutput = processedData[:, 0] X_train, X_test, y_train, y_test = train_test_split(trainData, predOutput, test_size=TEST_SIZE_SAMPLE, random_state=RANDOM_STATE_SPLIT) # Try to load classifier from file clf = input_output.load_classifier("titanicCLF.pkl") if not clf: # If no file is present, train the classifier using the best known parameters and save the classifier print("There is no saved classifier!") print("Training Model...") clf = svm.SVC(C=1, kernel="rbf", gamma=0.1) clf.fit(X_train, y_train) input_output.save_classifier("titanicCLF.pkl", clf) print("Training Complete!") # Choose scoring metrics scores = ['precision', 'recall'] # # SEARCH OPTIMAL PARAMETERS FOR SVM # svmModel = models.SVMModel(scores, X_train, y_train, y_test, X_test) # param_grid = svmModel.grid_search_setup()