コード例 #1
0
ファイル: adaboost_main.py プロジェクト: goparajug/ADV-ML
def main():
    ''' Load data, split data, creates adaboost algorithm 
        with decision stump, calculates errors, save final file'''
  
    classifier = AdaBoost(DecisionStump)

    num_sets = 50
    T = 100  
    percentage = 0.9 

    all_errors_train = []
    all_errors_test = []    
    aver_error_train = []
    aver_error_test = []


    # split data in the # of datasets
    split_data(percentage, num_sets)


    # run  for all datasets, for boosting interations = T 
    for i in range(num_sets):
        data_split_train = './data/bupa_train' + str(i) + ".txt"
        data_split_test = './data/' + "bupa_test" + str(i) + ".txt"
        X_train, Y_train = load_data(data_split_train)
        X_test, Y_test = load_data(data_split_test)

        score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)

	    error_train = calculate_error(T, score_train, Y_train)
        error_test = calculate_error(T, score_test, Y_test)
	
	   all_errors_train.append(error_train)
	   all_errors_test.append(error_test)
コード例 #2
0
def main():
    classifier = AdaBoost(DecisionStump)

    num_sets = 50
    T = 100  
    percentage = 0.9 

    all_errors_train = []
    all_errors_test = []    
    aver_error_train = []
    aver_error_test = []

    split_data(percentage, num_sets)

    for i in range(num_sets):
        data_split_train = './data/bupa_train' + str(i) + ".txt"
        data_split_test = './data/' + "bupa_test" + str(i) + ".txt"
        X_train, Y_train = load_data(data_split_train)
        X_test, Y_test = load_data(data_split_test)
        
        score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
        error_train = calculate_error(T, score_train, Y_train)
        error_test = calculate_error(T, score_test, Y_test)
        all_errors_train.append(error_train)
        all_errors_test.append(error_test)
   

    # calculates the average errors
    for j in range(T):
        a_e_train = 0
        a_e_test = 0
        for i in range(num_sets):
            a_e_train += all_errors_train[i][j]
            a_e_test += all_errors_test[i][j]
            aver_error_train.append(a_e_train/num_sets)
            aver_error_test.append(a_e_test/num_sets)
  
    save_result_final(aver_error_train, 'train')
    save_result_final(aver_error_test, 'test')

    dataset_here = "./data/bupa.data" 
    X_all, Y_all = load_data(dataset_here)
    score_optional = classifier.run_adaboost(X_all, Y_all, T, None, True)
    save_result_final(score_optional, 'empirical')
コード例 #3
0
def main():
    ''' Load data, split data, creates adaboost algorithm 
    with decision stump, calculates errors, save final file.
    Since this is a binary classifier, we will do for each of the 
    4 networks, one at time'''

    classification = []
    ada_folder = OUTPUT_FOLDER + 'adaboost/'
    if not os.path.exists(ada_folder):
        os.makedirs(ada_folder) 
    output_file = ada_folder + 'results.out'
    with open(output_file, "w") as f:
        f.write("# ADABOOST RESULTS, TRAIN/TEST FRACTION: " + str(PERCENTAGE)  + "\n")
        f. write("# Net   Norm   Set   OL?   Accu. Train   Accu Test\n")

    # chose classifier
    classifier = AdaBoost(DecisionStump)

    # for each normalization:
    for norm in NORM:

        # for each set
        for number in range(1, NUM_SETS+1):

            ''' with with_outlier '''
            with_outlier = True
            # get input and output file paths
            input_train =  get_input_data('train', number, norm,  with_outlier)
            input_test = get_input_data('test', number, norm,  with_outlier)


            # for each network type:
            for net_name in NET_NAMES:
                # get data
                X_train, Y_train = one_against_all.load_data(input_train, net_name)
                X_test, Y_test = one_against_all.load_data(input_test, net_name)

                print 'Calculating adaboost for net ' + net_name + ' with  normalization ' + norm + ' and set ' + str(number)
                score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
     
                error_train = calculate_error(T, score_train, Y_train)    
                error_test = calculate_error(T, score_test, Y_test)

                error_train_total = sum(error_train)/len(error_train)
                error_test_total = sum(error_test)/len(error_test)    
                
                save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier)
                classification.append(str(round(error_test_total,3))  +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') 

            ''' with no outlier '''
            with_outlier = False
            # get input and output file paths
            input_train =  get_input_data('train', number, norm,  with_outlier)
            input_test = get_input_data('test', number, norm,  with_outlier)


            # for each network type:
            for net_name in NET_NAMES:
                # get data
                X_train, Y_train = one_against_all.load_data(input_train, net_name)
                X_test, Y_test = one_against_all.load_data(input_test, net_name)

                score_train, score_test = classifier.run_adaboost(X_train, Y_train, T, X_test)
      
                error_train = calculate_error(T, score_train, Y_train)    
                error_test = calculate_error(T, score_test, Y_test)

                error_train_total = sum(error_train)/len(error_train)
                error_test_total = sum(error_test)/len(error_test)    
                
                save_result_final(error_train_total, error_test_total, output_file, net_name, norm, number, with_outlier)                
                classification.append(str(round(error_test_total,3))  +', ' + str(norm) + ', ' + str(number) + ', ' + str(with_outlier)[0] + '\n') 


    #find best classfiers
    classification.sort()
    with open(output_file + 'good_classification', "w") as f:
        f.write("\n\n\nClassification\n\n")
        for feat in classification:
            f.write(feat + '\n')
        f.write("\n")


    print 'Results saved at ' +  ada_folder
    print 'Done!!!'