Python Utilities.write_result_to_file 예제들, Utilities.write_result_to_file, facebook_page_scraper Python 예제들

예제 #1

0

파일 보기

파일: train_housing_LiR.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    result_path = 'results/housingLiR_1.mse'
    model_name = 'housing_shiftAndScale'
    # normalization = Preprocess.zero_mean_unit_var
    normalization = Preprocess.shift_and_scale
    # cols_not_norm = (0,7,12)
    cols_not_norm = []

    # laod and preprocess training data
    training_data = loader.load_dataset('data/housing_train.txt')
    testing_data = loader.load_dataset('data/housing_test.txt')
    Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], cols_not_norm)


    # start training
    model = rm.LinearRegression()
    model.build(training_data[0], training_data[1])
    training_mse = model.test(training_data[0], training_data[1], util.mse)
    testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
    print 'Error for training data is:'
    print training_mse
    print 'Error for testing data is:'
    print testing_mse

    result = {}
    result['TrainingMSE'] = str(training_mse)
    result['TestingMSE'] = str(testing_mse)
    result['Theta'] = str(model.theta)

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result)

예제 #2

0

파일 보기

파일: PB2_A_polluted_NB_Gaussian.py 프로젝트: Juncai/CS6140

def main():
    st = time.time()
    # training parameter
    result_path = 'results/PB2_A_spam_polluted_NB_Gaussian.acc'
    model_name = 'spam_'
    train_data_path = 'data/spam_polluted/train/data.pickle'
    test_data_path = 'data/spam_polluted/test/data.pickle'

    tr_data = loader.load_pickle_file(train_data_path)
    te_data = loader.load_pickle_file(test_data_path)
    print('{:.2f} Data loaded!'.format(time.time() - st))

    # start training
    print('{:.2f} Building model...'.format(time.time() - st))
    model = m.NBGaussian()
    model.build(tr_data[0], tr_data[1])

    print('{:.2f} Predicting...'.format(time.time() - st))
    tr_pred = model.predict(tr_data[0])
    te_pred = model.predict(te_data[0])

    print('{:.2f} Calculating results...'.format(time.time() - st))
    tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
    te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]


    print('{} Final results. Train acc: {}, Test acc: {}'.format(time.time() - st, tr_acc, te_acc))

    result = {}
    result['TrainingAcc'] = tr_acc
    result['TestingAcc'] = te_acc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #3

0

파일 보기

파일: PB4_A_missing_NB_Bern.py 프로젝트: Juncai/CS6140

def main():
    st = time.time()
    # training parameter
    result_path = 'results/PB4_spam_polluted_missing_NB_Bern.acc'
    model_name = 'spam_'
    mean_path = 'data/spam_polluted_missing/train/f_mean.pickle'
    train_data_path = 'data/spam_polluted_missing/train/data.pickle'
    test_data_path = 'data/spam_polluted_missing/test/data.pickle'

    # laod and preprocess training data
    tr_data = loader.load_pickle_file(train_data_path)
    te_data = loader.load_pickle_file(test_data_path)
    print('{:.2f} Data loaded!'.format(time.time() - st))

    # load means
    means = loader.load_pickle_file(mean_path)
    print('{:.2f} Means loaded!'.format(time.time() - st))

    # start training
    roc = []
    auc = 0.0

    tr_n, f_d = np.shape(tr_data[0])
    te_n, = np.shape(te_data[1])
    te_auc = 2.
    round = 0
    model = m.NBBernoulli(means)
    model.build(tr_data[0], tr_data[1])

    training_acc = model.test(tr_data[0], tr_data[1], util.acc)
    # training_cms.append(training_test_res[1])
    testing_acc = model.test(te_data[0], te_data[1], util.acc)
    # testing_cms.append(testing_test_res[1])


    print('Final results. Train acc: {}, Test acc: {}'.format(training_acc, testing_acc))

    result = {}
    result['TrainingAcc'] = training_acc
    result['TestingAcc'] = testing_acc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #4

0

파일 보기

파일: PB1_B_polluted_data.py 프로젝트: Juncai/CS6140

def main():
    st = time.time()
    # training parameter
    round_limit = 15
    result_path = 'results/PB1_B_spam_2.acc'
    model_name = 'spam_'
    model_path = result_path + '.model'
    threshes_path = 'data/spambase_polluted.threshes'
    train_data_path = 'data/spam_polluted/train/data.pickle'
    test_data_path = 'data/spam_polluted/test/data.pickle'

    # laod and preprocess training data
    tr_data = loader.load_pickle_file(train_data_path)
    te_data = loader.load_pickle_file(test_data_path)
    print('{:.2f} Data loaded!'.format(time.time() - st))
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(tr_data)
    util.replace_zero_label_with_neg_one(te_data)
    print('{:.2f} Label converted!'.format(time.time() - st))

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)
    print('{:.2f} Thresholds loaded!'.format(time.time() - st))
    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    ranked_f = None
    roc = []
    auc = 0.0
    thresh_cs = None

    tr_n, f_d = np.shape(tr_data[0])
    te_n, = np.shape(te_data[1])
    # TODO prepare distribution
    d = util.init_distribution(len(tr_data[0]))

    # TODO compute thresholds cheat sheet (not a solution due to huge thresh_cs table)
    # thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
    # print('{:.2f} Thresholds cheat sheet computed!'.format(time.time() - st))

    boost = b.Boosting(d)
    testing_predict = np.zeros((1, te_n)).tolist()[0]
    training_predict = np.zeros((1, tr_n)).tolist()[0]
    round_tr_err = []
    round_te_err = []
    round_model_err = []
    round_te_auc = []
    converged = False
    tol = 1e-5
    te_auc = 2.
    round = 0
    while round < round_limit:  # and not converged:
        round += 1
        boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
        boost.update_predict(tr_data[0], training_predict)
        boost.update_predict(te_data[0], testing_predict)
        c_model_err = boost.model[-1].w_err
        round_model_err.append(c_model_err)
        c_f_ind = boost.model[-1].f_ind
        c_thresh = boost.model[-1].thresh
        c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
        c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
        # TODO calculate the AUC for testing results
        # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
        round_tr_err.append(c_tr_err)
        round_te_err.append(c_te_err)
        # round_te_auc.append(c_te_auc)
        print('{:.2f} Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(time.time() - st, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
        # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
        # te_auc = c_te_auc

    training_errs.append(round_tr_err[-1])
    testing_errs.append(round_te_err[-1])
    # TODO get feature ranking from the predictions
    ranked_f = util.get_f_ranking_from_predictions(boost, threshes)
    round_err_1st_boost = round_model_err
    tr_errs_1st_boost = round_tr_err
    te_errs_1st_boost = round_te_err
    # te_auc_1st_boost = round_te_auc

    # _, te_roc_1st_boost = util.get_auc_from_predict(testing_predict, te_data[1], True)

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, mean_testing_err))
    print('Top 10 features: ')
    # print(ranked_f[:10])

    result = {}
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost
    result['rankedFeatures'] = ranked_f

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # store the model
    loader.save(model_path, boost)
    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #5

0

파일 보기

파일: PB3_random_data_spam.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    round_limit = 50
    result_path = 'results/spamActive_random_final_1.acc'
    model_name = 'spam_active'
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(training_data)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    # round_err_1st_boost = None
    # tr_errs_1st_boost = None
    # te_errs_1st_boost = None
    # te_auc_1st_boost = None
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, 5)
    tr_data_pool, te_data = Preprocess.get_i_fold(k_folds, 1)
    data_set = DataSet.DataSet(tr_data_pool)
    data_rates = (5, 10, 15, 20, 30, 50)
    for c in data_rates:
        tr_data = data_set.random_pick(c, False)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
        d = util.init_distribution(len(tr_data[0]))
        # TODO compute thresholds cheat sheet
        thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
        boost = b.Boosting(d)
        testing_predict = np.zeros((1, te_n)).tolist()[0]
        training_predict = np.zeros((1, tr_n)).tolist()[0]
        round_tr_err = []
        round_te_err = []
        round_model_err = []
        round_te_auc = []
        converged = False
        tol = 1e-5
        te_auc = 2.
        round = 0
        while round < round_limit: # and not converged:
            round += 1
            boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
            boost.update_predict(tr_data[0], training_predict)
            boost.update_predict(te_data[0], testing_predict)
            c_model_err = boost.model[-1].w_err
            round_model_err.append(c_model_err)
            c_f_ind = boost.model[-1].f_ind
            c_thresh = boost.model[-1].thresh
            c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
            c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
            # TODO calculate the AUC for testing results
            # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
            round_tr_err.append(c_tr_err)
            round_te_err.append(c_te_err)
            # round_te_auc.append(c_te_auc)
            print('Data {}% Round: {} Feature: {} Threshold: {:.3f} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {}'.format(c, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
            # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
            # te_auc = c_te_auc

        training_errs.append(round_tr_err[-1])
        testing_errs.append(round_te_err[-1])
        # break      # for testing


    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print('Training errs are:')
    print(training_errs)
    print('Mean training err is:')
    print(mean_training_err)
    print('Testing errs are:')
    print(testing_errs)
    print('Mean testing err is:')
    print(mean_testing_err)

    result = {}
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err

    # result['ROC'] = str(roc)
    result['AUC'] = auc



    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #6

0

파일 보기

파일: PB6_boosting_spam.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    k = 10  # fold
    layer_thresh = 2
    T = 50
    result_path = 'results/spamDT_final.acc'
    model_name = 'spam_' + str(k) + 'fold'
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(1):
        st = time.time()
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        t = dt.DecisionTree()
        t.build(tr_data[0], tr_data[1], threshes, layer_thresh)
        # test the bagging model and compute testing acc
        training_errs.append(t.test(tr_data[0], tr_data[1], util.acc))
        testing_errs.append(t.test(te_data[0], te_data[1], util.acc))
        print('Round {} finishes, time used: {}'.format(i, time.time() - st))


    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print(str(k) + '-fold validation done. Training errs are:')
    print(training_errs)
    print('Mean training err is:')
    print(mean_training_err)
    print('Testing errs are:')
    print(testing_errs)
    print('Mean testing err is:')
    print(mean_testing_err)

    result = {}
    result['Fold'] = k
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err

    result['ROC'] = roc
    result['AUC'] = auc



    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #7

0

파일 보기

파일: spam_NBBern.py 프로젝트: Juncai/CS6140

print mean_training_acc
print 'Testing accs are:'
print testing_accs
print 'Mean testing acc is:'
print mean_testing_acc
print 'Mean Training Confusion Matrix is:'
print mean_training_cm
print 'Mean Testing Confusion Matrix is:'
print mean_testing_cm
print 'AOC for fold 1 is:'
print auc

result = {}
result['Fold'] = str(k)
result['TrainingAccs'] = str(training_accs)
result['MeanTrainingAcc'] = str(mean_training_acc)
result['TestingAccs'] = str(testing_accs)
result['MeanTestingAcc'] = str(mean_testing_acc)

result['TrainingCMs'] = str(training_cms)
result['TestingCMs'] = str(testing_cms)
result['MeanTrainingCM'] = str(mean_training_cm)
result['MeanTestingCM'] = str(mean_testing_cm)
result['ROC'] = str(roc)
result['AUC'] = str(auc)



# log the training result to file
util.write_result_to_file(result_path, model_name, result)

예제 #8

0

파일 보기

파일: PB1_A_feature_analysis.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    k = 10  # fold
    round_limit = 300
    result_path = 'results/PB1_A_spam_final.acc'
    model_name = 'spam_' + str(k) + 'fold'
    threshes_path = 'data/spambase.threshes'
    data_path = 'data/spam/data.pickle'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(training_data)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    ranked_f = None
    roc = []
    auc = 0.0

    tr_data = training_data
    tr_n, f_d = np.shape(tr_data[0])
    # TODO prepare distribution
    d = util.init_distribution(len(tr_data[0]))
    # TODO compute thresholds cheat sheet
    thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
    boost = b.Boosting(d)
    training_predict = np.zeros((1, tr_n)).tolist()[0]
    round_tr_err = []
    round_te_err = []
    round_model_err = []
    round = 0
    while round < round_limit:  # and not converged:
        round += 1
        boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
        boost.update_predict(tr_data[0], training_predict)
        c_model_err = boost.model[-1].w_err
        round_model_err.append(c_model_err)
        c_f_ind = boost.model[-1].f_ind
        c_thresh = boost.model[-1].thresh
        c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
        # TODO calculate the AUC for testing results
        round_tr_err.append(c_tr_err)
        print('Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(round, c_f_ind, c_thresh, c_model_err, c_tr_err, 0, 0))

        training_errs.append(round_tr_err[-1])
    ranked_f = util.get_f_ranking_from_predictions(boost, threshes)


        # break      # for testing

    mean_training_err = np.mean(training_errs)

    print('Final results. Mean Train err: {}, Mean Test err: {}'.format(mean_training_err, 0))
    print('Top 10 features: ')
    print(ranked_f[:10])

    result = {}
    result['Fold'] = k
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost
    result['rankedFeatures'] = ranked_f

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)

예제 #9

0

파일 보기

파일: PB2_adaboost_10fold.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    target = 'crx'
    # target = 'vote'
    k = 10  # fold
    round_limit = 150

    if target == 'crx':
        result_path = 'results/crxBoosting_final_1.acc'
        model_name = 'crx_' + str(k) + 'fold'
        threshes_path = 'data/crx.threshes'
        data_path = 'data/crx_parsed.data'
    else:
        result_path = 'results/voteBoosting_final.acc'
        model_name = 'vote_' + str(k) + 'fold'
        threshes_path = 'data/vote.threshes'
        data_path = 'data/vote_parsed.data'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(k):
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
        d = util.init_distribution(len(tr_data[0]))
        # TODO compute thresholds cheat sheet
        thresh_cs = util.pre_compute_threshes_uci(tr_data[0], tr_data[1], threshes)
        boost = b.Boosting(d)
        testing_predict = np.zeros((1, te_n)).tolist()[0]
        training_predict = np.zeros((1, tr_n)).tolist()[0]
        round_tr_err = []
        round_te_err = []
        round_model_err = []
        round_te_auc = []
        converged = False
        tol = 1e-5
        te_auc = 2.
        round = 0
        while round < round_limit: # and not converged:
            round += 1
            boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
            boost.update_predict(tr_data[0], training_predict)
            boost.update_predict(te_data[0], testing_predict)
            c_model_err = boost.model[-1].w_err
            round_model_err.append(c_model_err)
            c_f_ind = boost.model[-1].f_ind
            c_thresh = boost.model[-1].thresh
            c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
            c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
            # TODO calculate the AUC for testing results
            # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
            # round_tr_err.append(c_tr_err)
            # round_te_err.append(c_te_err)
            # round_te_auc.append(c_te_auc)
            print('Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f}'.format(round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err))
            # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
            # te_auc = c_te_auc

        training_errs.append(c_tr_err)
        testing_errs.append(c_te_err)
        # if k == 0:
        #     round_err_1st_boost = round_model_err
        #     tr_errs_1st_boost = round_tr_err
        #     te_errs_1st_boost = round_te_err
            # te_auc_1st_boost = round_te_auc

        # break      # for testing


    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print(str(k) + '-fold validation done. Training errs are:')
    print(training_errs)
    print('Mean training err is:')
    print(mean_training_err)
    print('Testing errs are:')
    print(testing_errs)
    print('Mean testing err is:')
    print(mean_testing_err)

    result = {}
    result['Fold'] = str(k)
    result['Trainingerrs'] = str(training_errs)
    result['MeanTrainingAcc'] = str(mean_training_err)
    result['Testingerrs'] = str(testing_errs)
    result['MeanTestingAcc'] = str(mean_testing_err)
    result['1stBoostTrainingError'] = str(tr_errs_1st_boost)
    result['1stBoostTestingError'] = str(te_errs_1st_boost)
    result['1stBoostModelError'] = str(round_err_1st_boost)
    result['1stBoostTestingAUC'] = str(te_auc_1st_boost)

    # result['ROC'] = str(roc)
    result['AUC'] = str(auc)



    # log the training result to file
    util.write_result_to_file(result_path, model_name, result)

예제 #10

0

파일 보기

파일: PB1_adaboost_spam_optimal.py 프로젝트: Juncai/CS6140

def main():
    # training parameter
    k = 10  # fold
    round_limit = 100
    result_path = 'results/spamODSBoosting_final.acc'
    model_name = 'spam_' + str(k) + 'fold'
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')
    # TODO convert labels from {0, 1} to {-1, 1}
    util.replace_zero_label_with_neg_one(training_data)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    te_roc_1st_boost = None
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(1):
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
        d = util.init_distribution(len(tr_data[0]))
        # TODO compute thresholds cheat sheet
        thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
        boost = b.Boosting(d)
        testing_predict = np.zeros((1, te_n)).tolist()[0]
        training_predict = np.zeros((1, tr_n)).tolist()[0]
        round_tr_err = []
        round_te_err = []
        round_model_err = []
        round_te_auc = []
        converged = False
        tol = 1e-5
        te_auc = 2.
        round = 0
        while round < round_limit:  # and not converged:
            round += 1
            boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
            boost.update_predict(tr_data[0], training_predict)
            boost.update_predict(te_data[0], testing_predict)
            c_model_err = boost.model[-1].w_err
            round_model_err.append(c_model_err)
            c_f_ind = boost.model[-1].f_ind
            c_thresh = boost.model[-1].thresh
            c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
            c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
            # TODO calculate the AUC for testing results
            c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
            round_tr_err.append(c_tr_err)
            round_te_err.append(c_te_err)
            round_te_auc.append(c_te_auc)
            print('Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {:.12f}'.format(round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, c_te_auc))
            converged =  abs(c_te_auc - te_auc) / te_auc <= tol
            te_auc = c_te_auc

        training_errs.append(round_tr_err[-1])
        testing_errs.append(round_te_err[-1])
        if i == 0:
            round_err_1st_boost = round_model_err
            tr_errs_1st_boost = round_tr_err
            te_errs_1st_boost = round_te_err
            te_auc_1st_boost = round_te_auc
            _, te_roc_1st_boost = util.get_auc_from_predict(testing_predict, te_data[1], True)

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print(str(k) + '-fold validation done. Training errs are:')
    print(training_errs)
    print('Mean training err is:')
    print(mean_training_err)
    print('Testing errs are:')
    print(testing_errs)
    print('Mean testing err is:')
    print(mean_testing_err)

    result = {}
    result['Fold'] = k
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err
    result['1stBoostTrainingError'] = tr_errs_1st_boost
    result['1stBoostTestingError'] = te_errs_1st_boost
    result['1stBoostModelError'] = round_err_1st_boost
    result['1stBoostTestingAUC'] = te_auc_1st_boost
    result['1stBoostTestingROC'] = te_roc_1st_boost

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)