Beispiel #1
0
def base_dt(ver, inname, outname, testname):
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    dtc = tree.DecisionTreeClassifier()
    dtc.fit(X, Y)
    output_arr = [dtc.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #2
0
def gnb_predictor(ver, inname, outname, testname):
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    gnb = GaussianNB()
    gnb.fit(X, Y)
    output_arr = [gnb.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #3
0
def default_perceptron(ver, inname, outname, testname):
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    per = Perceptron()  # Default params
    per.fit(X, Y)
    output_arr = [per.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #4
0
def base_multi_layered_perceptron(ver, inname, outname, testname):
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    mlp = MLPClassifier(hidden_layer_sizes=(100, ),
                        activation="logistic",
                        solver='sgd',
                        max_iter=400)
    mlp.fit(X, Y)
    output_arr = [mlp.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #5
0
def best_dt(ver, inname, outname, testname):
    """
    splitting criterion:  entropy
    maximum depth of the tree: 85
    minimum number of samples to split an internal node: experiment with values of your choice
    minimum impurity decrease: experiment with values of your choice
    class weight: None and balanced
    """
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    dtc = tree.DecisionTreeClassifier()
    dtc.max_depth = None
    dtc.criterion = "entropy"
    dtc.min_samples_split = 5
    dtc.min_impurity_decrease = 0.00025
    dtc.class_weight = None
    dtc.fit(X, Y)
    output_arr = [dtc.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #6
0
def best_multi_layered_perceptron(ver, inname, outname, testname):
    X, Y = data_manip.read_indexed(inname)
    X_test, Y_test = data_manip.read_indexed(testname)
    # for both datasets this is the opmimal hyper parameters
    mlp = MLPClassifier(hidden_layer_sizes=(50, 50),
                        activation="relu",
                        solver='adam',
                        max_iter=400)
    # THIS IS USED TO FIND THE GOOD HYPERPARAMS
    # mlp = MLPClassifier(max_iter=400)
    # parameters = {
    #     'hidden_layer_sizes': [(30,50), (50,50)],
    #     'activation': ['identity', 'logistic', 'tanh', 'relu'],
    #     'solver': ['sgd','adam']
    # }
    # gridMLP = GridSearchCV(mlp, parameters)
    # gridMLP.fit(X, Y)
    # print("BEST PARAMS FOR: "+str(ver))
    # print(gridMLP.get_params())
    # output_arr = [gridMLP.predict([X])[0] for X in X_test]
    mlp.fit(X, Y)
    output_arr = [mlp.predict([X])[0] for X in X_test]
    data_manip.write_indexed(outname, pd.DataFrame({'index': output_arr}))
    calculate_metrics(Y_test, output_arr, ver, outname)
Beispiel #7
0
def calculate_distribution(ver):
    X, Y = data_manip.read_indexed("./data/train_" + str(ver) + ".csv")
    df = pd.DataFrame({'index': Y})
    return df['index'].value_counts()