Beispiel #1
0
def run_validation_spbn(train_data, folds, patience, result_folder, idx_fold):
    hc = GreedyHillClimbing()
    pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()])

    for k in folds:
        vl = ValidatedLikelihood(train_data, k=k, seed=0)

        for p in patience:
            fold_folder = result_folder + '/HillClimbing/SPBN_CKDE/Validation_' + str(
                k) + '_' + str(p) + '/' + str(idx_fold)
            pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True)

            if os.path.exists(fold_folder + '/end.lock'):
                continue

            cb_save = SaveModel(fold_folder)
            node_types = [(name, NodeType.CKDE)
                          for name in train_data.columns.values]
            start_model = SemiparametricBN(list(train_data.columns.values),
                                           node_types)
            bn = hc.estimate(pool,
                             vl,
                             start_model,
                             callback=cb_save,
                             patience=p,
                             verbose=True)
            iters = sorted(glob.glob(fold_folder + '/*.pickle'))
            last_file = os.path.basename(iters[-1])
            number = int(os.path.splitext(last_file)[0])
            bn.save(fold_folder + '/' + str(number + 1).zfill(6) + ".pickle")
            with open(fold_folder + '/end.lock', 'w') as f:
                pass
def find_node_types(df, dag, model_folder, type_of_dag_string, patience):
    vl = ValidatedLikelihood(df, k=10, seed=experiments_helper.SEED)

    hc = GreedyHillClimbing()
    change_node_type = ChangeNodeTypeSet()

    for p in patience:
        result_folder = model_folder + '/PC/SPBN_CKDE/' + type_of_dag_string + '/' + str(
            p)
        pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)

        if os.path.exists(result_folder + '/end.lock'):
            continue

        cb_save = SaveModel(result_folder)
        node_types = [(name, NodeType.CKDE) for name in df.columns.values]
        start_model = SemiparametricBN(dag, node_types)
        bn = hc.estimate(change_node_type,
                         vl,
                         start_model,
                         callback=cb_save,
                         patience=p)

        iters = sorted(glob.glob(result_folder + '/*.pickle'))
        last_file = os.path.basename(iters[-1])
        number = int(os.path.splitext(last_file)[0])
        bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle")

        with open(result_folder + '/end.lock', 'w') as f:
            pass
def train_gbn(dataset, instances):
    df = pd.read_csv(dataset + "_" + str(instances) + '.csv')

    hc = GreedyHillClimbing()
    arc_set = ArcOperatorSet()
    result_folder = 'models/' + dataset + '/' + str(
        instances) + '/HillClimbing/GBN_BIC/'
    pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)
    if not os.path.exists(result_folder + '/end.lock'):
        bic = BIC(df)
        cb_save = SaveModel(result_folder)
        start_model = GaussianNetwork(list(df.columns.values))
        bn = hc.estimate(arc_set, bic, start_model, callback=cb_save)

        iters = sorted(glob.glob(result_folder + '/*.pickle'))
        last_file = os.path.basename(iters[-1])
        number = int(os.path.splitext(last_file)[0])
        bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle")

        with open(result_folder + '/end.lock', 'w') as f:
            pass

    hc = GreedyHillClimbing()
    arc_set = ArcOperatorSet()
    result_folder = 'models/' + dataset + '/' + str(
        instances) + '/HillClimbing/GBN_BGe/'
    pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)
    if not os.path.exists(result_folder + '/end.lock'):
        bge = BGe(df)
        cb_save = SaveModel(result_folder)
        start_model = GaussianNetwork(list(df.columns.values))
        bn = hc.estimate(arc_set, bge, start_model, callback=cb_save)

        iters = sorted(glob.glob(result_folder + '/*.pickle'))
        last_file = os.path.basename(iters[-1])
        number = int(os.path.splitext(last_file)[0])
        bn.save(result_folder + '/' + str(number + 1).zfill(6) + ".pickle")

        with open(result_folder + '/end.lock', 'w') as f:
            pass
def run_pc_lc_spbn(train_data, folds, patience, result_folder, idx_fold):
    hc = GreedyHillClimbing()
    change_node_type = ChangeNodeTypeSet()

    pdag = load(result_folder + '/PC/graph-lc-' + str(idx_fold) + ".pickle")

    try:
        dag = pdag.to_dag()
    except ValueError:
        dag = pdag.to_approximate_dag()

    for k in folds:
        vl = ValidatedLikelihood(train_data, k=k, seed=experiments_helper.SEED)

        for p in patience:
            fold_folder = result_folder + '/PC/SPBN_CKDE/LinearCorrelation/Validation_' + str(
                k) + '_' + str(p) + '/' + str(idx_fold)
            pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True)

            if os.path.exists(fold_folder + '/end.lock'):
                continue

            cb_save = SaveModel(fold_folder)

            node_types = [(name, NodeType.CKDE) for name in dag.nodes()]
            start_model = SemiparametricBN(dag, node_types)

            bn = hc.estimate(change_node_type,
                             vl,
                             start_model,
                             callback=cb_save,
                             patience=p,
                             verbose=True)
            iters = sorted(glob.glob(fold_folder + '/*.pickle'))
            last_file = os.path.basename(iters[-1])
            number = int(os.path.splitext(last_file)[0])
            bn.save(fold_folder + '/' + str(number + 1).zfill(6) + ".pickle")
            with open(fold_folder + '/end.lock', 'w') as f:
                pass
Beispiel #5
0
def run_bge_gaussian(train_data, result_folder, idx_fold):
    fold_folder = result_folder + '/HillClimbing/Gaussian/BGe/' + str(idx_fold)
    pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True)

    if os.path.exists(fold_folder + '/end.lock'):
        return

    hc = GreedyHillClimbing()
    arc_set = ArcOperatorSet()

    bge = BGe(train_data)

    cb_save = SaveModel(fold_folder)
    start_model = GaussianNetwork(list(train_data.columns.values))
    
    bn = hc.estimate(arc_set, bge, start_model, callback=cb_save, verbose=True)
    iters = sorted(glob.glob(fold_folder + '/*.pickle'))
    last_file = os.path.basename(iters[-1])
    number = int(os.path.splitext(last_file)[0])
    bn.save(fold_folder + '/' + str(number+1).zfill(6) + ".pickle")
    with open(fold_folder + '/end.lock', 'w') as f:
        pass
def run_validation_kdebn(train_data, folds, patience, result_folder, idx_fold):
    hc = GreedyHillClimbing()
    arc_set = ArcOperatorSet()

    for k in folds:
        vl = ValidatedLikelihood(train_data, k=k, seed=0)

        for p in patience:
            fold_folder = result_folder + '/HillClimbing/KDEBN/Validation_' + str(k) + '_' + str(p) + '/' + str(idx_fold)
            pathlib.Path(fold_folder).mkdir(parents=True, exist_ok=True)

            if os.path.exists(fold_folder + '/end.lock'):
                continue

            cb_save = SaveModel(fold_folder)
            start_model = KDENetwork(list(train_data.columns.values))
            bn = hc.estimate(arc_set, vl, start_model, callback=cb_save, patience=p, verbose=True)
            iters = sorted(glob.glob(fold_folder + '/*.pickle'))
            last_file = os.path.basename(iters[-1])
            number = int(os.path.splitext(last_file)[0])
            bn.save(fold_folder + '/' + str(number+1).zfill(6) + ".pickle")
            with open(fold_folder + '/end.lock', 'w') as f:
                pass
from pybnesian.learning.operators import OperatorPool, ArcOperatorSet, ChangeNodeTypeSet
from pybnesian.learning.scores import ValidatedLikelihood
import pathlib
import os
import experiments_helper

hc = GreedyHillClimbing()
pool = OperatorPool([ArcOperatorSet(), ChangeNodeTypeSet()])

for d in experiments_helper.DATASETS:
    for i in experiments_helper.INSTANCES:
        df = pd.read_csv(d + "_"  + str(i) + '.csv')

        vl = ValidatedLikelihood(df, k=10, seed=experiments_helper.SEED)

        for p in experiments_helper.PATIENCE:
            result_folder = 'models/' + d + '/' + str(i) + '/HillClimbing/SPBN/' + str(p)
            pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)

            if not os.path.exists(result_folder + '/end.lock'):
                cb_save = SaveModel(result_folder)
                start_model = SemiparametricBN(list(df.columns.values))
                bn = hc.estimate(pool, vl, start_model, callback=cb_save, patience=p)

                iters = sorted(glob.glob(result_folder + '/*.pickle'))
                last_file = os.path.basename(iters[-1])
                number = int(os.path.splitext(last_file)[0])
                bn.save(result_folder + '/' + str(number+1).zfill(6) + ".pickle")
                
                with open(result_folder + '/end.lock', 'w') as f:
                    pass