def run_validation_spbn(train_data, folds, patience, result_folder, idx_fold): hc = GreedyHillClimbing() pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()]) for k in folds: vl = ValidatedLikelihood(train_data, k=k, seed=0) for p in patience: fold_folder = result_folder + '/HillClimbing/SPBN_CKDE/Validation_' + str( k) + '_' + str(p) + '/' + str(idx_fold) pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(fold_folder + '/end.lock'): continue cb_save = SaveModel(fold_folder) node_types = [(name, NodeType.CKDE) for name in train_data.columns.values] start_model = SemiparametricBN(list(train_data.columns.values), node_types) bn = hc.estimate(pool, vl, start_model, callback=cb_save, patience=p, verbose=True) iters = sorted(glob.glob(fold_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(fold_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(fold_folder + '/end.lock', 'w') as f: pass
small_results = pd.DataFrame() medium_results = pd.DataFrame() large_results = pd.DataFrame() for n in experiments_helper.INSTANCES: df = pd.read_csv('data/small_' + str(n) + ".csv") executions = np.empty((800, )) for i in range(800): if i % 10 == 0: print(str(i) + " executions") vl = ValidatedLikelihood(df, k=10, seed=i) start_model = SemiparametricBN(list(df.columns.values)) hc = GreedyHillClimbing() pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()]) start = time.time() bn = hc.estimate(pool, vl, start_model, patience=0) end = time.time() executions[i] = end - start small_results['SPBN_' + str(n)] = pd.Series(executions, name="SPBN_" + str(n)) print("Small " + str(n) + " -- Time: " + str(executions.mean()) + ", std: " + str(np.std(executions, ddof=1))) df = pd.read_csv('data/medium_' + str(n) + ".csv") executions = np.empty((200, ))