def base_dt(ver, inname, outname, testname): X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) dtc = tree.DecisionTreeClassifier() dtc.fit(X, Y) output_arr = [dtc.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)
def gnb_predictor(ver, inname, outname, testname): X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) gnb = GaussianNB() gnb.fit(X, Y) output_arr = [gnb.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)
def default_perceptron(ver, inname, outname, testname): X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) per = Perceptron() # Default params per.fit(X, Y) output_arr = [per.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)
def base_multi_layered_perceptron(ver, inname, outname, testname): X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) mlp = MLPClassifier(hidden_layer_sizes=(100, ), activation="logistic", solver='sgd', max_iter=400) mlp.fit(X, Y) output_arr = [mlp.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)
def best_dt(ver, inname, outname, testname): """ splitting criterion: entropy maximum depth of the tree: 85 minimum number of samples to split an internal node: experiment with values of your choice minimum impurity decrease: experiment with values of your choice class weight: None and balanced """ X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) dtc = tree.DecisionTreeClassifier() dtc.max_depth = None dtc.criterion = "entropy" dtc.min_samples_split = 5 dtc.min_impurity_decrease = 0.00025 dtc.class_weight = None dtc.fit(X, Y) output_arr = [dtc.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)
def best_multi_layered_perceptron(ver, inname, outname, testname): X, Y = data_manip.read_indexed(inname) X_test, Y_test = data_manip.read_indexed(testname) # for both datasets this is the opmimal hyper parameters mlp = MLPClassifier(hidden_layer_sizes=(50, 50), activation="relu", solver='adam', max_iter=400) # THIS IS USED TO FIND THE GOOD HYPERPARAMS # mlp = MLPClassifier(max_iter=400) # parameters = { # 'hidden_layer_sizes': [(30,50), (50,50)], # 'activation': ['identity', 'logistic', 'tanh', 'relu'], # 'solver': ['sgd','adam'] # } # gridMLP = GridSearchCV(mlp, parameters) # gridMLP.fit(X, Y) # print("BEST PARAMS FOR: "+str(ver)) # print(gridMLP.get_params()) # output_arr = [gridMLP.predict([X])[0] for X in X_test] mlp.fit(X, Y) output_arr = [mlp.predict([X])[0] for X in X_test] data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr})) calculate_metrics(Y_test, output_arr, ver, outname)