import pandas as pd import json if __name__ == "__main__": #python decisiontree.py [input_file] [output.json] [--addTreeFeatures] if len(sys.argv) >= 3: add_features = False; if len(sys.argv) == 4: if sys.argv[3] == "--addTreeFeatures": add_features = True; df = pd.read_csv(sys.argv[1], sep=',', header=0) X = df.iloc[:,:-1].as_matrix() y = df.iloc[:,-1].as_matrix().astype('U') param_grid = {'criterion':['gini', 'entropy'], 'splitter': ['best', 'random'], 'max_depth': [3,4,5,6,7,8,9,10,11,12,13,14,15], 'min_samples_split': [2,3,4,5], 'min_samples_leaf': [1,2,3,4,5,6]} classifier = DecisionTreeClassifier() report_map = compute_metrics_classifier(classifier=classifier, param_grid = param_grid, X = X, y = y, cv_parameter_estimation = 3, add_features=add_features) f = open(sys.argv[2], "w") json.dump(report_map, f) f.close()
if len(sys.argv) >= 3: add_features = False if len(sys.argv) == 4: if sys.argv[3] == "--addTreeFeatures": add_features = True df = pd.read_csv(sys.argv[1], sep=',', header=0) X = df.iloc[:, :-1].as_matrix() y = df.iloc[:, -1].as_matrix().astype('U') param_grid = { 'n_estimators': [5, 10, 15, 20, 25, 30, 40, 50], 'criterion': ['gini', 'entropy'], 'max_features': ['auto', 'sqrt', 'log2', None], 'max_depth': [10, 20, 30, 40, 50, 100, None] } classifier = RandomForestClassifier() report_map = compute_metrics_classifier(classifier=classifier, param_grid=param_grid, X=X, y=y, cv_parameter_estimation=3, add_features=add_features) f = open(sys.argv[2], "w") json.dump(report_map, f) f.close()
"classifier":RandomForestClassifier(), "classifier_name":"RandomForestClassifier" }, { "feature":"Features/Lower_res/features_all.csv", "use_tree":True, "classifier_name":"SVC", "classifier":SVC() }, ] for id_experiment in range(len(to_do)): print "========== running experiment " +str(id_experiment) +" of "+str(len(to_do)) df = pd.read_csv(to_do[id_experiment]["feature"], sep=',', header=0) to_do[id_experiment]["metric_tests"] = [] X = df.iloc[:,:-1].as_matrix() y = df.iloc[:,-1].as_matrix().astype('U') for id_run in range(n_runs): print("id run: "+ str(id_run)) report_map = compute_metrics_classifier(classifier=to_do[id_experiment]["classifier"], param_grid = param_grids[to_do[id_experiment]["classifier_name"]], X = X, y = y, cv_parameter_estimation = 3, add_features = to_do[id_experiment]["use_tree"]) to_do[id_experiment]["metric_tests"].append(report_map["metric_test"]) del to_do[id_experiment]["classifier"] f = open("summary_plots_18_runs.json", "w") json.dump(to_do, f) f.close()