Ejemplo n.º 1
0
def cv10_ensemble(known_dataset, known_targets, known_dataset_scaled, dt, knn, svm, fusion_algorithm, ids, prt=False, file_name=None):
	error_rates = 0
	hp_rates = 0
	hr_rates = 0
	hf_rates = 0
	cp_rates = 0
	cr_rates = 0
	cf_rates = 0
	for i in range(NR_FOLDS):
		error, hp, hr, hf, cp, cr, cf = cross_validation_ensemble(known_dataset, known_targets, known_dataset_scaled, dt, knn, svm, fusion_algorithm, ids, prt, file_name)
		error_rates += error
		
		hp_rates += hp
		hr_rates += hr
		hf_rates += hf
		cp_rates += cp
		cr_rates += cr
		cf_rates += cf

	if prt and (float(error_rates) / NR_FOLDS) <= 0.4:
		save_output(file_name, error_rates, hp_rates, hr_rates, hf_rates, cp_rates, cr_rates, cf_rates, NR_FOLDS)	

	print 'Final error %f' % (float(error_rates) / NR_FOLDS)
	print 'Final accuracy %f' % (1 - (float(error_rates) / NR_FOLDS))

	print 'Highval precision %f' % (float(hp_rates) / NR_FOLDS)
	print 'Highval recall %f' % (float(hr_rates) / NR_FOLDS)
	print 'Highval f1 %f' % (float(hf_rates) / NR_FOLDS)
	print 'Civil precision %f' % (float(cp_rates) / NR_FOLDS)
	print 'Civil recall %f' % (float(cr_rates) / NR_FOLDS)
	print 'Civil f1 %f' % (float(cf_rates) / NR_FOLDS)	
Ejemplo n.º 2
0
def main():
    '''User-Inputted Recipie Title'''
    #url = generateURL()
    '''Hard Coded URLs'''
    # url = "http://allrecipes.com/recipe/brown-rice-and-quinoa-sushi-rolls/"
    # url = "http://allrecipes.com/recipe/Boilermaker-Tailgate-Chili/"
    #url = "http://allrecipes.com/recipe/jerk-chicken/"
    #link = urllib.urlopen(url)
    #page = link.read()
    '''Local Cached Webpages'''
    # url = "../data/Burger"
    # url = "../data/Cake"
    url = "../data/Stir-Fry"
    f = open(url + ".html")
    page = f.read()

    if len(sys.argv) < 2:
        recipe = parse_recipe(page)
        recipe = unabridgeMeasure(recipe)
        prettyPrintRecipe(recipe)
        save_output(url, recipe)
    elif len(sys.argv) == 2:
        case_num = int(sys.argv[1])
        if case_num < 1 or case_num > 8:
            print('You can only pick from [1-8]')
            return
        recipe = parse_recipe(page)
        recipe = unabridgeMeasure(recipe)
        kb = KnowledgeBase()
        if case_num == 1:
            tf_recipe = kb.transform_cuisine("italian", recipe)
        elif case_num == 2:
            tf_recipe = kb.transform_cuisine("chinese", recipe)
        elif case_num == 3:
            tf_recipe = kb.transform_diet("vegetarian", recipe)
        elif case_num == 4:
            tf_recipe = kb.transform_diet("pescatarian", recipe)
        elif case_num == 5:
            tf_recipe = kb.transform_healthy("low-fat", recipe)
        elif case_num == 6:
            tf_recipe = kb.transform_healthy("low-sodium", recipe)
        elif case_num == 7:
            tf_recipe = transformQty(2, recipe)
        elif case_num == 8:
            tf_recipe = transformQty(3, recipe)

        prettyPrintRecipe(tf_recipe)
        save_output(url, tf_recipe)
    else:
        print(
            'Too many arguments. You can either either just call main.py to see the recipe or pass in a single integer to select a transformation.'
        )
Ejemplo n.º 3
0
	print 'Highval f1 %f' % hf
	print 'Civil precision %f' % cp
	print 'Civil recall %f' % cr
	print 'Civil f1 %f' % cf

	return error_rate, f1, model, (hp, hr, hf), (cp, cr, cf)

if __name__ == "__main__":

	training_spreadsheet = Spreadsheet(project_data_file)
	training_data = Data(training_spreadsheet)
	training_targets = training_data.targets

	testing_spreadsheet = Spreadsheet(addendum_data_file, upsampling=False)
	testing_data = Data(testing_spreadsheet, upsampling=False)
	testing_targets = testing_data.targets

	[training_data, features] = parse_theme('all')
	[testing_data, feats] = parse_theme_from_file('all', addendum_data_file)
	assert features == feats

	tech = raw_input("Enter algorithm. Choose between lr, dt, knn, svm")

	file_name = "new_single_" + tech + ".txt"
	for i in range(100):
		error_rate, f1, model, (hp, hr, hf), (cp, cr, cf) = new_data_single(training_data, training_targets, testing_data, testing_targets, tech)
		save_output(file_name, error_rate, hp, hr, hf, cp, cr, cf, 1)

	

def generate_models():
    '''
    This function generates 26 neural network models; 13 for each type of wine:
    red and white. 11 of those models utilize PCA data, 1 is trained on all raw
    feature data, and 1 is trained on 4 sensory features to equal 13 per type.
    '''

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    filepath_red = './raw_data/winequality-red.csv'
    filepath_white = './raw_data/winequality-white.csv'

    red_wine_df = di(filepath_red)
    red_pc_dict = pc_analysis(red_wine_df)
    red_pc_model_list = nn(red_pc_dict)

    red_raw_quality = pd.DataFrame(red_wine_df['quality'])
    red_raw_quality.columns = ['quality']
    del red_wine_df['quality']

    red_raw_data_dict = {
        '11 Raw Data - Red': [None, red_wine_df, red_raw_quality]
    }

    red_raw_data_model = nn(red_raw_data_dict)

    red_4_columns = pd.DataFrame(
        (red_wine_df['fixed acidity'], red_wine_df['citric acid'],
         red_wine_df['residual sugar'], red_wine_df['alcohol'])).transpose()

    red_4_columns_dict = {
        '4 Columns Raw Data - Red': [None, red_4_columns, red_raw_quality]
    }

    red_4_columns_model = nn(red_4_columns_dict)

    red_model_list = (red_pc_model_list + red_raw_data_model +
                      red_4_columns_model)

    save_output(red_model_list, 'red_performance.txt', red_pc_dict)

    white_wine_df = di(filepath_white)
    white_pc_dict = pc_analysis(white_wine_df)
    white_pc_model_list = nn(white_pc_dict)

    white_raw_quality = pd.DataFrame(white_wine_df['quality'])
    white_raw_quality.columns = ['quality']
    del white_wine_df['quality']

    white_raw_data_dict = {
        '11 Raw Data - White': [None, white_wine_df, white_raw_quality]
    }

    white_raw_data_model = nn(white_raw_data_dict)

    white_4_columns = pd.DataFrame(
        (white_wine_df['fixed acidity'], white_wine_df['citric acid'],
         white_wine_df['residual sugar'],
         white_wine_df['alcohol'])).transpose()

    white_4_columns_dict = {
        '4 Columns Raw Data - White':
        [None, white_4_columns, white_raw_quality]
    }

    white_4_columns_model = nn(white_4_columns_dict)

    white_model_list = (white_pc_model_list + white_raw_data_model +
                        white_4_columns_model)

    save_output(white_model_list, 'white_performance.txt', white_pc_dict)