Example #1
0
def get_the_plots(tups):
	ratios = []
	rf_scores = []
	lr_scores = []
	tree_scores = []
	for i in xrange(len(tups)):
		n,k,p = tups[i]
		X,y = yield_points(n,k,p)
		d = {}
		d['X'] = X
		d['y'] = y
		data = split_into_folds(d)
		rfmodel = RandomForest_c(10, 'gini')
		score = np.mean([AUC(rfmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']])
		rf_scores.append(score)
		print score

		logregmodel = LogisticRegression_c(C=0.1, penalty='l2')
		score = np.mean([AUC(logregmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']])
		lr_scores.append(score)
		print score

		dtmodel = Tree_c(10, 'gini')
		score = np.mean([AUC(dtmodel.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']])
		tree_scores.append(score)
		print score

		ratios.append(p)
	line1, = plot(ratios,rf_scores,'r', label='RF')
	line1, = plot(ratios, lr_scores, 'g', label='LR')
	line1, = plot(ratios, tree_scores, 'k', label='Tree')
	legend()
	xlabel('Number of points in the dataset')
	ylabel('Performance score')
	show()
def evaluate_and_save(method, is_method, data_file, save_file_name):
    data = split_into_folds(standardise_outputs(standardise_inputs(load_dictionary(data_file))))
    #print data
    if is_method:
        score = np.mean([AUC(method.predict_p(fold['X_train'], fold['y_train'], fold['X_test']), fold['y_test']) for fold in data['folds']])
    else:
        score = np.mean([method.get_stat(fold['X_train']) for fold in data['folds']])
    save_file_dir = os.path.split(save_file_name)[0]
    if not os.path.isdir(save_file_dir):
        os.makedirs(save_file_dir)
    with open(save_file_name, 'w') as save_file:
        save_file.write('%s' % score)