Example #1
0
def compare_with_default_hyper_params(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, use_cache=False, hyper_params=prj['params'])
    l.get_predicted_data()
    l.print_scores()

    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, use_cache=False, hyper_params={})
    l.get_predicted_data()
    l.print_scores()

    print()
def get_number(prj, num_of_learn_days):
    l = IncrementalLearningModel(prj, 'DecisionTree', num_of_learn_days, 1)
    y_proba, y_test = l.get_predicted_data()

    b = RiskIsolate(y_proba, y_test, test_all_after_size_4=False)
    b.get_num_of_exec_per_batch_size()
    return b.get_max_improvement()
Example #3
0
def batch_stop_4():
    output = "\n{:28} | {} | {}\n".format('Project',
                                          'Number of build execution',
                                          'Improvement over TestAll')
    output += '-' * 83
    output += '\n'

    print('Creating learning model for each project ...')

    for prj in project_list:
        l = IncrementalLearningModel(prj['name'],
                                     'RF',
                                     30,
                                     1,
                                     hyper_params={'n_estimators': 10})
        y_proba, y_test = l.get_predicted_data()

        b = BatchBisect(y_test, stop_at_4=True, batch_size_max=8)
        b.get_num_of_exec_per_batch_size()

        output += '{:28} | {} | {} \n'.format(
            prj['name'],
            str(b.lowest_num_of_exec).center(25),
            '{:.2f} %'.format(b.max_improvement).center(22))

    print(output)
Example #4
0
def get_results(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    b = BatchBisect(y_test, stop_at_4=False, batch_size_max=20)
    b.get_num_of_exec_per_batch_size()

    return b
Example #5
0
def get_results(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    b = RiskTopN(y_proba, y_test, top_n=2, batch_size_max=8)
    b.get_num_of_exec_per_batch_size()

    return b
Example #6
0
def get_number(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    b = BatchBisect(y_test, no_bisect=True)
    x, y = b.get_num_of_exec_per_batch_size()

    print(b.max_improvement)

    return x, y
Example #7
0
def get_number(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()
    # l.print_scores()

    b = RiskTopN(y_proba, y_test, top_n=2)
    x, y = b.get_num_of_exec_per_batch_size()

    print(b.lowest_num_of_exec)

    return x, y
Example #8
0
def find_optimum_hyper_params_for_random_forest(prj):
    for n_estimators in [10, 50, 100, 200, 400]:
        hyper_params = {'n_estimators': n_estimators}
        for _ in ['RF']:
            l = IncrementalLearningModel(prj, _, 30, 1, use_cache=False, hyper_params=hyper_params)
            l.get_predicted_data()
            l.print_scores()

    if prj['params']['n_estimators'] == 10:
        return

    for max_depth in [10, 20, 50, 100, 200, None]:
        hyper_params = {'max_depth': max_depth, 'n_estimators': prj['params']['n_estimators']}
        for _ in ['RF']:
            l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params)
            l.get_predicted_data()
            l.print_scores()

    for criterion in ['gini', 'entropy']:
        hyper_params = {'criterion': criterion, 'n_estimators': prj['params']['n_estimators']}
        for _ in ['RF']:
            l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params)
            l.get_predicted_data()
            l.print_scores()

    for min_samples_split in [2, 5, 10, 20, 50, 100]:
        hyper_params = {'min_samples_split': min_samples_split, 'n_estimators': prj['params']['n_estimators']}
        for _ in ['RF']:
            l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params)
            l.get_predicted_data()
            l.print_scores()

    for min_samples_leaf in [1, 2, 5, 10, 20, 50, 100]:
        hyper_params = {'min_samples_leaf': min_samples_leaf, 'n_estimators': prj['params']['n_estimators']}
        for _ in ['RF']:
            l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params)
            l.get_predicted_data()
            l.print_scores()
Example #9
0
def get_number(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    b = BatchBisect(y_test, stop_at_4=True, batch_size_max=8)
    x, y = b.get_num_of_exec_per_batch_size()

    # TestAll Num of Executions
    print(len(y_test))

    # BatchBisect Num of Execution
    print()

    # print(b.lowest_num_of_exec / min(b.batch2_num_of_exec, b.batch4_num_of_exec, b.batch6_num_of_exec, b.batch8_num_of_exec, b.batch10_num_of_exec))

    return x, y
Example #10
0
def test_all():
    output = "\n{:28} | {}\n".format('Project', 'Number of build execution')
    output += '-' * 56
    output += '\n'

    print('Creating learning model for each project ...')

    for prj in project_list:
        l = IncrementalLearningModel(prj['name'],
                                     'RF',
                                     30,
                                     1,
                                     hyper_params={'n_estimators': 10})
        y_proba, y_test = l.get_predicted_data()
        output += '{:28} | {}\n'.format(prj['name'],
                                        str(len(y_test)).center(25))

    print(output)
Example #11
0
def get_number(prj, approach):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    if approach == 'BatchBisect':
        b = BatchBisect(y_test, stop_at_4=False, batch_size_max=MAX_BATCH_SIZE)
    elif approach == 'BatchStop4':
        b = BatchBisect(y_test, stop_at_4=True, batch_size_max=MAX_BATCH_SIZE)
    elif approach == 'RiskTopN':
        b = RiskTopN(y_proba,
                     y_test,
                     top_n=RISK_TOP_N,
                     batch_size_max=MAX_BATCH_SIZE)
    else:
        raise NotImplementedError

    x, y = b.get_num_of_exec_per_batch_size()

    print(b.lowest_num_of_exec)

    return x, y
Example #12
0
def get_y_test_y_proba(prj):
    learning_model = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = learning_model.get_predicted_data()
    y_test = [True if item == 'passed' else False for item in y_test]
    return y_test, y_proba
Example #13
0
def get_testing_dataset_size(prj):
    l = IncrementalLearningModel(prj['name'], 'RF', 30, 1)
    y_proba, y_test = l.get_predicted_data()

    return len(y_test)
Example #14
0
def compare_classifiers(prj):
    for _ in ['DT', 'RF', 'NB', 'MLP', 'LR', 'SGD']:
        l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params={})
        l.get_predicted_data()
        l.print_scores()