def perceptron(training, test, output): training_data = np.loadtxt(training, delimiter=',') test_data = np.loadtxt(test, delimiter=',') per = Perceptron() per.fit(training_data[:, :-1], training_data[:, -1]) test_prediction = per.predict(test_data[:, :-1]) helper.output_computed_metrics(test_data, test_prediction, output)
def base_decision_tree(training, test, output): training_data = np.loadtxt(training, delimiter=',') test_data = np.loadtxt(test, delimiter=',') dt = DecisionTreeClassifier(criterion='entropy') dt.fit(training_data[:, :-1], training_data[:, -1]) test_prediction = dt.predict(test_data[:, :-1]) helper.output_computed_metrics(test_data, test_prediction, output)
def gaussian_naive_bayes(training, test, output): training_data = np.loadtxt(training, delimiter=',') test_data = np.loadtxt(test, delimiter=',') gnb = GaussianNB() gnb.fit(training_data[:, :-1], training_data[:, -1]) test_prediction = gnb.predict(test_data[:, :-1]).astype(int) helper.output_computed_metrics(test_data, test_prediction, output)
def base_mlp(training, test, output): training_data = np.loadtxt(training, delimiter=',') test_data = np.loadtxt(test, delimiter=',') mlp = MLPClassifier(hidden_layer_sizes=(1, 100), activation='logistic', solver='sgd') mlp.fit(training_data[:, :-1], training_data[:, -1]) test_prediction = mlp.predict(test_data[:, :-1]) helper.output_computed_metrics(test_data, test_prediction, output)
def best_decision_tree(training, validation, test, output): training_data = np.loadtxt(training, delimiter=',') validation_data = np.loadtxt(validation, delimiter=',') test_data = np.loadtxt(test, delimiter=',') dt = DecisionTreeClassifier() dt.fit(training_data[:, :-1], training_data[:, -1]) params = {'criterion': ['gini', 'entropy'], 'max_depth': [10, None], 'min_samples_split': [0.01, 0.1, 0.5, 0.9, 1.0], 'min_impurity_decrease': [0.0, 0.01, 0.1, 0.5, 0.9, 1.0], 'class_weight': [None, 'balanced']} tuned_model = GridSearchCV(estimator=dt, param_grid=params, n_jobs=-1, cv=3) tuned_model.fit(validation_data[:, :-1], validation_data[:, -1]) print(tuned_model.best_params_) test_prediction = tuned_model.best_estimator_.predict(test_data[:, :-1]) helper.output_computed_metrics(test_data, test_prediction, output)
def best_mlp(training, validation, test, output): training_data = np.loadtxt(training, delimiter=',') validation_data = np.loadtxt(validation, delimiter=',') test_data = np.loadtxt(test, delimiter=',') mlp = MLPClassifier() mlp.fit(training_data[:, :-1], training_data[:, -1]) params = { 'activation': ['logistic', 'tanh', 'relu', 'identity'], 'hidden_layer_sizes': [(30, 50), (10, 10, 10)], 'solver': ['adam', 'sgd'] } tuned_model = GridSearchCV(estimator=mlp, param_grid=params, n_jobs=-1, cv=3) tuned_model.fit(validation_data[:, :-1], validation_data[:, -1]) print(tuned_model.best_params_) test_prediction = tuned_model.best_estimator_.predict(test_data[:, :-1]) helper.output_computed_metrics(test_data, test_prediction, output)