def batch_stop_4(): output = "\n{:28} | {} | {}\n".format('Project', 'Number of build execution', 'Improvement over TestAll') output += '-' * 83 output += '\n' print('Creating learning model for each project ...') for prj in project_list: l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, hyper_params={'n_estimators': 10}) y_proba, y_test = l.get_predicted_data() b = BatchBisect(y_test, stop_at_4=True, batch_size_max=8) b.get_num_of_exec_per_batch_size() output += '{:28} | {} | {} \n'.format( prj['name'], str(b.lowest_num_of_exec).center(25), '{:.2f} %'.format(b.max_improvement).center(22)) print(output)
def get_number(prj, num_of_learn_days): l = IncrementalLearningModel(prj, 'DecisionTree', num_of_learn_days, 1) y_proba, y_test = l.get_predicted_data() b = RiskIsolate(y_proba, y_test, test_all_after_size_4=False) b.get_num_of_exec_per_batch_size() return b.get_max_improvement()
def get_results(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() b = BatchBisect(y_test, stop_at_4=False, batch_size_max=20) b.get_num_of_exec_per_batch_size() return b
def get_results(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() b = RiskTopN(y_proba, y_test, top_n=2, batch_size_max=8) b.get_num_of_exec_per_batch_size() return b
def get_number(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() b = BatchBisect(y_test, no_bisect=True) x, y = b.get_num_of_exec_per_batch_size() print(b.max_improvement) return x, y
def get_number(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() # l.print_scores() b = RiskTopN(y_proba, y_test, top_n=2) x, y = b.get_num_of_exec_per_batch_size() print(b.lowest_num_of_exec) return x, y
def get_number(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() b = BatchBisect(y_test, stop_at_4=True, batch_size_max=8) x, y = b.get_num_of_exec_per_batch_size() # TestAll Num of Executions print(len(y_test)) # BatchBisect Num of Execution print() # print(b.lowest_num_of_exec / min(b.batch2_num_of_exec, b.batch4_num_of_exec, b.batch6_num_of_exec, b.batch8_num_of_exec, b.batch10_num_of_exec)) return x, y
def test_all(): output = "\n{:28} | {}\n".format('Project', 'Number of build execution') output += '-' * 56 output += '\n' print('Creating learning model for each project ...') for prj in project_list: l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, hyper_params={'n_estimators': 10}) y_proba, y_test = l.get_predicted_data() output += '{:28} | {}\n'.format(prj['name'], str(len(y_test)).center(25)) print(output)
def get_number(prj, approach): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() if approach == 'BatchBisect': b = BatchBisect(y_test, stop_at_4=False, batch_size_max=MAX_BATCH_SIZE) elif approach == 'BatchStop4': b = BatchBisect(y_test, stop_at_4=True, batch_size_max=MAX_BATCH_SIZE) elif approach == 'RiskTopN': b = RiskTopN(y_proba, y_test, top_n=RISK_TOP_N, batch_size_max=MAX_BATCH_SIZE) else: raise NotImplementedError x, y = b.get_num_of_exec_per_batch_size() print(b.lowest_num_of_exec) return x, y
def get_y_test_y_proba(prj): learning_model = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = learning_model.get_predicted_data() y_test = [True if item == 'passed' else False for item in y_test] return y_test, y_proba
def get_testing_dataset_size(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1) y_proba, y_test = l.get_predicted_data() return len(y_test)
def compare_with_default_hyper_params(prj): l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, use_cache=False, hyper_params=prj['params']) l.get_predicted_data() l.print_scores() l = IncrementalLearningModel(prj['name'], 'RF', 30, 1, use_cache=False, hyper_params={}) l.get_predicted_data() l.print_scores() print()
def compare_classifiers(prj): for _ in ['DT', 'RF', 'NB', 'MLP', 'LR', 'SGD']: l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params={}) l.get_predicted_data() l.print_scores()
def find_optimum_hyper_params_for_random_forest(prj): for n_estimators in [10, 50, 100, 200, 400]: hyper_params = {'n_estimators': n_estimators} for _ in ['RF']: l = IncrementalLearningModel(prj, _, 30, 1, use_cache=False, hyper_params=hyper_params) l.get_predicted_data() l.print_scores() if prj['params']['n_estimators'] == 10: return for max_depth in [10, 20, 50, 100, 200, None]: hyper_params = {'max_depth': max_depth, 'n_estimators': prj['params']['n_estimators']} for _ in ['RF']: l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params) l.get_predicted_data() l.print_scores() for criterion in ['gini', 'entropy']: hyper_params = {'criterion': criterion, 'n_estimators': prj['params']['n_estimators']} for _ in ['RF']: l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params) l.get_predicted_data() l.print_scores() for min_samples_split in [2, 5, 10, 20, 50, 100]: hyper_params = {'min_samples_split': min_samples_split, 'n_estimators': prj['params']['n_estimators']} for _ in ['RF']: l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params) l.get_predicted_data() l.print_scores() for min_samples_leaf in [1, 2, 5, 10, 20, 50, 100]: hyper_params = {'min_samples_leaf': min_samples_leaf, 'n_estimators': prj['params']['n_estimators']} for _ in ['RF']: l = IncrementalLearningModel(prj['name'], _, 30, 1, use_cache=False, hyper_params=hyper_params) l.get_predicted_data() l.print_scores()