def get_the_plots(tups): ratios = [] rf_scores = [] lr_scores = [] tree_scores = [] for i in xrange(len(tups)): n,k,p = tups[i] X,y = yield_points(n,k,p) d = {} d['X'] = X d['y'] = y data = split_into_folds(d) rfmodel = RandomForest_c(10, 'gini') score = np.mean([AUC(rfmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']]) rf_scores.append(score) print score logregmodel = LogisticRegression_c(C=0.1, penalty='l2') score = np.mean([AUC(logregmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']]) lr_scores.append(score) print score dtmodel = Tree_c(10, 'gini') score = np.mean([AUC(dtmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']]) tree_scores.append(score) print score ratios.append(p) line1, = plot(ratios,rf_scores,'r', label='RF') line1, = plot(ratios, lr_scores, 'g', label='LR') line1, = plot(ratios, tree_scores, 'k', label='Tree') legend() xlabel('Number of points in the dataset') ylabel('Performance score') show()
def evaluate_and_save(method, is_method, data_file, save_file_name): data = split_into_folds(standardise_outputs(standardise_inputs(load_dictionary(data_file)))) #print data if is_method: score = np.mean([AUC(method.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']]) else: score = np.mean([method.get_stat(fold['X_train']) for fold in data['folds']]) save_file_dir = os.path.split(save_file_name)[0] if not os.path.isdir(save_file_dir): os.makedirs(save_file_dir) with open(save_file_name, 'w') as save_file: save_file.write('%s' % score)