import pandas as pd
import json

if __name__ == "__main__":

    #python decisiontree.py [input_file] [output.json] [--addTreeFeatures]
    if len(sys.argv) >= 3:
        add_features = False;
        if len(sys.argv) == 4:
            if sys.argv[3] == "--addTreeFeatures":
                add_features = True;
 

        df = pd.read_csv(sys.argv[1], sep=',', header=0)

        X = df.iloc[:,:-1].as_matrix()
        y = df.iloc[:,-1].as_matrix().astype('U')

        param_grid = {'criterion':['gini', 'entropy'], 'splitter': ['best', 'random'],
                    'max_depth': [3,4,5,6,7,8,9,10,11,12,13,14,15], 'min_samples_split': [2,3,4,5],
                    'min_samples_leaf': [1,2,3,4,5,6]}

        classifier = DecisionTreeClassifier()

        report_map = compute_metrics_classifier(classifier=classifier, param_grid = param_grid, 
            X = X, y = y, cv_parameter_estimation = 3, add_features=add_features)

        f = open(sys.argv[2], "w")
        json.dump(report_map, f)
        f.close()
Ejemplo n.º 2
0
    if len(sys.argv) >= 3:
        add_features = False
        if len(sys.argv) == 4:
            if sys.argv[3] == "--addTreeFeatures":
                add_features = True

        df = pd.read_csv(sys.argv[1], sep=',', header=0)

        X = df.iloc[:, :-1].as_matrix()
        y = df.iloc[:, -1].as_matrix().astype('U')

        param_grid = {
            'n_estimators': [5, 10, 15, 20, 25, 30, 40, 50],
            'criterion': ['gini', 'entropy'],
            'max_features': ['auto', 'sqrt', 'log2', None],
            'max_depth': [10, 20, 30, 40, 50, 100, None]
        }

        classifier = RandomForestClassifier()

        report_map = compute_metrics_classifier(classifier=classifier,
                                                param_grid=param_grid,
                                                X=X,
                                                y=y,
                                                cv_parameter_estimation=3,
                                                add_features=add_features)

        f = open(sys.argv[2], "w")
        json.dump(report_map, f)
        f.close()
             "classifier":RandomForestClassifier(),
             "classifier_name":"RandomForestClassifier"
            },
            {
             "feature":"Features/Lower_res/features_all.csv",
             "use_tree":True,
             "classifier_name":"SVC",
             "classifier":SVC()
            },
           ]



    for id_experiment in range(len(to_do)):
        print "========== running experiment " +str(id_experiment) +" of "+str(len(to_do))
        df = pd.read_csv(to_do[id_experiment]["feature"], sep=',', header=0)
        to_do[id_experiment]["metric_tests"] = []
        X = df.iloc[:,:-1].as_matrix()
        y = df.iloc[:,-1].as_matrix().astype('U')
        for id_run in range(n_runs):
            print("id run: "+ str(id_run))
            report_map = compute_metrics_classifier(classifier=to_do[id_experiment]["classifier"], param_grid = param_grids[to_do[id_experiment]["classifier_name"]], 
            X = X, y = y, cv_parameter_estimation = 3, add_features = to_do[id_experiment]["use_tree"])
            to_do[id_experiment]["metric_tests"].append(report_map["metric_test"])
        del to_do[id_experiment]["classifier"]

    f = open("summary_plots_18_runs.json", "w")
    json.dump(to_do, f)
    f.close()