def run_validation_spbn(train_data, folds, patience, result_folder, idx_fold): hc = GreedyHillClimbing() pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()]) for k in folds: vl = ValidatedLikelihood(train_data, k=k, seed=0) for p in patience: fold_folder = result_folder + '/HillClimbing/SPBN_CKDE/Validation_' + str( k) + '_' + str(p) + '/' + str(idx_fold) pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(fold_folder + '/end.lock'): continue cb_save = SaveModel(fold_folder) node_types = [(name, NodeType.CKDE) for name in train_data.columns.values] start_model = SemiparametricBN(list(train_data.columns.values), node_types) bn = hc.estimate(pool, vl, start_model, callback=cb_save, patience=p, verbose=True) iters = sorted(glob.glob(fold_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(fold_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(fold_folder + '/end.lock', 'w') as f: pass
def find_node_types(df, dag, model_folder, type_of_dag_string, patience): vl = ValidatedLikelihood(df, k=10, seed=experiments_helper.SEED) hc = GreedyHillClimbing() change_node_type = ChangeNodeTypeSet() for p in patience: result_folder = model_folder + '/PC/SPBN_CKDE/' + type_of_dag_string + '/' + str( p) pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(result_folder + '/end.lock'): continue cb_save = SaveModel(result_folder) node_types = [(name, NodeType.CKDE) for name in df.columns.values] start_model = SemiparametricBN(dag, node_types) bn = hc.estimate(change_node_type, vl, start_model, callback=cb_save, patience=p) iters = sorted(glob.glob(result_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(result_folder + '/end.lock', 'w') as f: pass
def train_gbn(dataset, instances): df = pd.read_csv(dataset + "_" + str(instances) + '.csv') hc = GreedyHillClimbing() arc_set = ArcOperatorSet() result_folder = 'models/' + dataset + '/' + str( instances) + '/HillClimbing/GBN_BIC/' pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) if not os.path.exists(result_folder + '/end.lock'): bic = BIC(df) cb_save = SaveModel(result_folder) start_model = GaussianNetwork(list(df.columns.values)) bn = hc.estimate(arc_set, bic, start_model, callback=cb_save) iters = sorted(glob.glob(result_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(result_folder + '/end.lock', 'w') as f: pass hc = GreedyHillClimbing() arc_set = ArcOperatorSet() result_folder = 'models/' + dataset + '/' + str( instances) + '/HillClimbing/GBN_BGe/' pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) if not os.path.exists(result_folder + '/end.lock'): bge = BGe(df) cb_save = SaveModel(result_folder) start_model = GaussianNetwork(list(df.columns.values)) bn = hc.estimate(arc_set, bge, start_model, callback=cb_save) iters = sorted(glob.glob(result_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(result_folder + '/end.lock', 'w') as f: pass
def run_pc_lc_spbn(train_data, folds, patience, result_folder, idx_fold): hc = GreedyHillClimbing() change_node_type = ChangeNodeTypeSet() pdag = load(result_folder + '/PC/graph-lc-' + str(idx_fold) + ".pickle") try: dag = pdag.to_dag() except ValueError: dag = pdag.to_approximate_dag() for k in folds: vl = ValidatedLikelihood(train_data, k=k, seed=experiments_helper.SEED) for p in patience: fold_folder = result_folder + '/PC/SPBN_CKDE/LinearCorrelation/Validation_' + str( k) + '_' + str(p) + '/' + str(idx_fold) pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(fold_folder + '/end.lock'): continue cb_save = SaveModel(fold_folder) node_types = [(name, NodeType.CKDE) for name in dag.nodes()] start_model = SemiparametricBN(dag, node_types) bn = hc.estimate(change_node_type, vl, start_model, callback=cb_save, patience=p, verbose=True) iters = sorted(glob.glob(fold_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(fold_folder + '/' + str(number + 1).zfill(6) + ".pickle") with open(fold_folder + '/end.lock', 'w') as f: pass
def run_bge_gaussian(train_data, result_folder, idx_fold): fold_folder = result_folder + '/HillClimbing/Gaussian/BGe/' + str(idx_fold) pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(fold_folder + '/end.lock'): return hc = GreedyHillClimbing() arc_set = ArcOperatorSet() bge = BGe(train_data) cb_save = SaveModel(fold_folder) start_model = GaussianNetwork(list(train_data.columns.values)) bn = hc.estimate(arc_set, bge, start_model, callback=cb_save, verbose=True) iters = sorted(glob.glob(fold_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(fold_folder + '/' + str(number+1).zfill(6) + ".pickle") with open(fold_folder + '/end.lock', 'w') as f: pass
def run_validation_kdebn(train_data, folds, patience, result_folder, idx_fold): hc = GreedyHillClimbing() arc_set = ArcOperatorSet() for k in folds: vl = ValidatedLikelihood(train_data, k=k, seed=0) for p in patience: fold_folder = result_folder + '/HillClimbing/KDEBN/Validation_' + str(k) + '_' + str(p) + '/' + str(idx_fold) pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True) if os.path.exists(fold_folder + '/end.lock'): continue cb_save = SaveModel(fold_folder) start_model = KDENetwork(list(train_data.columns.values)) bn = hc.estimate(arc_set, vl, start_model, callback=cb_save, patience=p, verbose=True) iters = sorted(glob.glob(fold_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(fold_folder + '/' + str(number+1).zfill(6) + ".pickle") with open(fold_folder + '/end.lock', 'w') as f: pass
from pybnesian.learning.operators import OperatorPool, ArcOperatorSet, ChangeNodeTypeSet from pybnesian.learning.scores import ValidatedLikelihood import pathlib import os import experiments_helper hc = GreedyHillClimbing() pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()]) for d in experiments_helper.DATASETS: for i in experiments_helper.INSTANCES: df = pd.read_csv(d + "_" + str(i) + '.csv') vl = ValidatedLikelihood(df, k=10, seed=experiments_helper.SEED) for p in experiments_helper.PATIENCE: result_folder = 'models/' + d + '/' + str(i) + '/HillClimbing/SPBN/' + str(p) pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) if not os.path.exists(result_folder + '/end.lock'): cb_save = SaveModel(result_folder) start_model = SemiparametricBN(list(df.columns.values)) bn = hc.estimate(pool, vl, start_model, callback=cb_save, patience=p) iters = sorted(glob.glob(result_folder + '/*.pickle')) last_file = os.path.basename(iters[-1]) number = int(os.path.splitext(last_file)[0]) bn.save(result_folder + '/' + str(number+1).zfill(6) + ".pickle") with open(result_folder + '/end.lock', 'w') as f: pass