def compare_models(true_model, trained_models_folder, training_data,
                   test_data):
    ground_truth_slogl = true_model.slogl(test_data)

    print("Ground truth loglik: " + str(ground_truth_slogl))

    gbn_bic_folder = trained_models_folder + '/HillClimbing/GBN_BIC/'
    all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle'))
    final_model = all_models[-1]

    gbn = load(final_model)
    gbn.fit(training_data)

    slogl = gbn.slogl(test_data)
    print("GBN BIC results:")
    print("Loglik: " + str(slogl))
    print("SHD: " + str(experiments_helper.shd(gbn, true_model)))
    print("Hamming: " + str(experiments_helper.hamming(gbn, true_model)))
    print()

    gbn_bge_folder = trained_models_folder + '/HillClimbing/GBN_BGe/'
    all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle'))
    final_model = all_models[-1]

    gbn = load(final_model)
    gbn.fit(training_data)

    slogl = gbn.slogl(test_data)
    print("GBN BGe results:")
    print("Loglik: " + str(slogl))
    print("SHD: " + str(experiments_helper.shd(gbn, true_model)))
    print("Hamming: " + str(experiments_helper.hamming(gbn, true_model)))
    print()
def compare_models(true_model, trained_models_folder, training_data,
                   test_data):
    ground_truth_slogl = true_model.slogl(test_data)

    print("Ground truth loglik: " + str(ground_truth_slogl))

    gbn_bic_folder = trained_models_folder + '/PC/GBN/LinearCorrelation'
    all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle'))
    final_model = all_models[-1]

    gbn = load(final_model)
    gbn.fit(training_data)

    slogl = gbn.slogl(test_data)
    print("GBN LinearCorrelation results:")
    print("Loglik: " + str(slogl))
    print("SHD: " + str(experiments_helper.shd(gbn, true_model)))
    print("Hamming: " + str(experiments_helper.hamming(gbn, true_model)))
    print()

    gbn_bge_folder = trained_models_folder + '/PC/GBN/RCoT'
    all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle'))
    final_model = all_models[-1]

    gbn = load(final_model)
    gbn.fit(training_data)

    slogl = gbn.slogl(test_data)
    print("GBN RCoT results:")
    print("Loglik: " + str(slogl))
    print("SHD: " + str(experiments_helper.shd(gbn, true_model)))
    print("Hamming: " + str(experiments_helper.hamming(gbn, true_model)))
    print()
Beispiel #3
0
def compare_models(true_model, trained_models_folder, training_data, test_data,
                   patience):
    ground_truth_slogl = true_model.slogl(test_data)

    print("Ground truth loglik: " + str(ground_truth_slogl))
    print("SPBN results:")
    for p in patience:
        ckde_folder = trained_models_folder + '/HillClimbing/SPBN/' + str(p)

        all_models = sorted(glob.glob(ckde_folder + '/*.pickle'))
        final_model = all_models[-1]

        spbn = load(final_model)
        spbn.fit(training_data)

        logl = spbn.slogl(test_data)

        print("Loglik, p " + str(p) + ": " + str(logl))
        print("SHD, p " + str(p) + ": " +
              str(experiments_helper.shd(spbn, true_model)))
        print("Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming(spbn, true_model)))
        print("Type Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming_type(spbn)))
        print()
def compare_models(true_model, trained_models_folder, training_data,
                   test_data):
    ground_truth_slogl = true_model.slogl(test_data)

    print("Ground truth loglik: " + str(ground_truth_slogl))

    for p in experiments_helper.PATIENCE:
        folder = trained_models_folder + '/PC/SPBN/LinearCorrelation/' + str(p)
        all_models = sorted(glob.glob(folder + '/*.pickle'))
        final_model = all_models[-1]

        spbn = load(final_model)
        spbn.fit(training_data)

        slogl = spbn.slogl(test_data)
        print("SPBN LinearCorrelation results:")
        print("Loglik, p " + str(p) + ": " + str(slogl))
        print("SHD, p " + str(p) + ": " +
              str(experiments_helper.shd(spbn, true_model)))
        print("Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming(spbn, true_model)))
        print("Type Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming_type(spbn)))
        print()

    for p in experiments_helper.PATIENCE:
        folder = trained_models_folder + '/PC/SPBN/RCoT/' + str(p)
        all_models = sorted(glob.glob(folder + '/*.pickle'))
        final_model = all_models[-1]

        spbn = load(final_model)
        spbn.fit(training_data)

        slogl = spbn.slogl(test_data)
        print("SPBN RCoT results:")
        print("Loglik, p " + str(p) + ": " + str(slogl))
        print("SHD, p " + str(p) + ": " +
              str(experiments_helper.shd(spbn, true_model)))
        print("Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming(spbn, true_model)))
        print("Type Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming_type(spbn)))
        print()
def test_spbn(df, model_folder, patience, dag_type):
    print("Dag Type " + dag_type)
    for p in patience:
        result_folder = model_folder + '/PC/SPBN/' + dag_type + '/' + str(p)
        pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)

        all_models = sorted(glob.glob(result_folder + '/*.pickle'))
        final_model = load(all_models[-1])
        final_model.fit(df)

        slogl = final_model.slogl(df_test)

        print("Loglik, p " + str(p) + ": " + str(slogl))
        print("SHD, p " + str(p) + ": " +
              str(experiments_helper.shd(final_model, true_model)))
        print("Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming(final_model, true_model)))
        print("Hamming type, p " + str(p) + ": " +
              str(experiments_helper.hamming_type(final_model, true_model)))

        print()
Beispiel #6
0
df_200 = pd.read_csv('synthetic_200.csv')
df_2000 = pd.read_csv('synthetic_2000.csv')
df_10000 = pd.read_csv('synthetic_10000.csv')
df_test = pd.read_csv('synthetic_test.csv')

print("True model logl: " + str(slogl_model(df_test)))

patience = experiments_helper.PATIENCE

for df, model_folder in [(df_200, 'models/200'), (df_2000, 'models/2000'), (df_10000, 'models/10000')]:
    print("Folder " + model_folder)
    for p in patience:
        result_folder = model_folder + '/HillClimbing/SPBN_CKDE/' + str(p)
        pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)

        all_models = sorted(glob.glob(result_folder + '/*.pickle'))
        final_model = load(all_models[-1])
        final_model.fit(df)

        slogl = final_model.slogl(df_test)

        print("Loglik, p " + str(p) + ": " + str(slogl))
        print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(final_model, true_model)))
        print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(final_model, true_model)))
        print("Hamming type, p " + str(p) + ": " + str(experiments_helper.hamming_type(final_model, true_model)))

        print()



def extract_info(train_datasets, test_datasets, model_folders, true_models):
    patience = experiments_helper.PATIENCE

    tests = experiments_helper.TESTS

    slogl_true = np.empty((len(train_datasets, )))
    slogl_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0])))
    slogl_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0])))
    slogl_hc_spbn = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    slogl_hc_spbn_ckde = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    slogl_pc_gbn = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(tests)))
    slogl_pc_spbn = np.empty((len(train_datasets), len(train_datasets[0]),
                              len(tests), len(patience)))
    slogl_pc_spbn_ckde = np.empty((len(train_datasets), len(train_datasets[0]),
                                   len(tests), len(patience)))

    hmd_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0])))
    hmd_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0])))
    hmd_hc_spbn = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    hmd_hc_spbn_ckde = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    hmd_pc = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(tests)))

    shd_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0])))
    shd_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0])))
    shd_hc_spbn = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    shd_hc_spbn_ckde = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    shd_pc = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(tests)))

    thd_hc_spbn = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    thd_hc_spbn_ckde = np.empty(
        (len(train_datasets), len(train_datasets[0]), len(patience)))
    thd_pc_spbn = np.empty((len(train_datasets), len(train_datasets[0]),
                            len(tests), len(patience)))
    thd_pc_spbn_ckde = np.empty((len(train_datasets), len(train_datasets[0]),
                                 len(tests), len(patience)))

    for idx_dataset, (instance_datasets, test_data, dataset_folders,
                      true_model) in enumerate(
                          zip(train_datasets, test_datasets, model_folders,
                              true_models)):
        for idx_instances, (training_data, folder) in enumerate(
                zip(instance_datasets, dataset_folders)):

            slogl_true[idx_dataset] = true_model.slogl(test_data)

            ###########################
            # GBN BIC
            ###########################
            gbn_bic_folder = folder + '/HillClimbing/GBN_BIC/'

            all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle'))
            final_model = all_models[-1]

            bic = load(final_model)
            bic.fit(training_data)

            slogl_hc_gbn_bic[idx_dataset, idx_instances] = bic.slogl(test_data)
            hmd_hc_gbn_bic[idx_dataset,
                           idx_instances] = experiments_helper.hamming(
                               bic, true_model)
            shd_hc_gbn_bic[idx_dataset,
                           idx_instances] = experiments_helper.shd(
                               bic, true_model)

            ###########################
            # GBN BGe
            ###########################
            gbn_bge_folder = folder + '/HillClimbing/GBN_BGe/'

            all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle'))
            final_model = all_models[-1]

            bge = load(final_model)
            bge.fit(training_data)

            slogl_hc_gbn_bge[idx_dataset, idx_instances] = bge.slogl(test_data)
            hmd_hc_gbn_bge[idx_dataset,
                           idx_instances] = experiments_helper.hamming(
                               bge, true_model)
            shd_hc_gbn_bge[idx_dataset,
                           idx_instances] = experiments_helper.shd(
                               bge, true_model)

            ###########################
            # HC SPBN
            ###########################
            for idx_p, p in enumerate(patience):
                spbn_hc_folder = folder + '/HillClimbing/SPBN/' + str(p)

                all_models = sorted(glob.glob(spbn_hc_folder + '/*.pickle'))
                final_model = all_models[-1]

                spbn = load(final_model)
                spbn.fit(training_data)

                slogl_hc_spbn[idx_dataset, idx_instances,
                              idx_p] = spbn.slogl(test_data)
                hmd_hc_spbn[idx_dataset, idx_instances,
                            idx_p] = experiments_helper.hamming(
                                spbn, true_model)
                shd_hc_spbn[idx_dataset, idx_instances,
                            idx_p] = experiments_helper.shd(spbn, true_model)
                thd_hc_spbn[idx_dataset, idx_instances,
                            idx_p] = experiments_helper.hamming_type(spbn)

            ###########################
            # HC SPBN CKDE
            ###########################
            for idx_p, p in enumerate(patience):
                spbn_ckde_hc_folder = folder + '/HillClimbing/SPBN_CKDE/' + str(
                    p)

                all_models = sorted(
                    glob.glob(spbn_ckde_hc_folder + '/*.pickle'))
                final_model = all_models[-1]

                spbn_ckde = load(final_model)
                spbn_ckde.fit(training_data)

                slogl_hc_spbn_ckde[idx_dataset, idx_instances,
                                   idx_p] = spbn_ckde.slogl(test_data)
                hmd_hc_spbn_ckde[idx_dataset, idx_instances,
                                 idx_p] = experiments_helper.hamming(
                                     spbn_ckde, true_model)
                shd_hc_spbn_ckde[idx_dataset, idx_instances,
                                 idx_p] = experiments_helper.shd(
                                     spbn_ckde, true_model)
                thd_hc_spbn_ckde[idx_dataset, idx_instances,
                                 idx_p] = experiments_helper.hamming_type(
                                     spbn_ckde)

            ###########################
            # PC GBN and PC Graph
            ###########################
            for idx_t, test in enumerate(tests):
                gbn_pc_folder = folder + '/PC/GBN/' + test

                all_models = sorted(glob.glob(gbn_pc_folder + '/*.pickle'))
                final_model = all_models[-1]

                gbn_pc = load(final_model)
                gbn_pc.fit(training_data)

                slogl_pc_gbn[idx_dataset, idx_instances,
                             idx_t] = gbn_pc.slogl(test_data)
                hmd_pc[idx_dataset, idx_instances,
                       idx_t] = experiments_helper.hamming(gbn_pc, true_model)
                shd_pc[idx_dataset, idx_instances,
                       idx_t] = experiments_helper.shd(gbn_pc, true_model)

            ###########################
            # PC SPBN
            ###########################
            for idx_t, test in enumerate(tests):
                for idx_p, p in enumerate(patience):
                    spbn_pc_folder = folder + '/PC/SPBN/' + test + '/' + str(p)

                    all_models = sorted(glob.glob(spbn_pc_folder +
                                                  '/*.pickle'))
                    final_model = all_models[-1]

                    spbn_pc = load(final_model)
                    spbn_pc.fit(training_data)

                    slogl_pc_spbn[idx_dataset, idx_instances, idx_t,
                                  idx_p] = spbn_pc.slogl(test_data)
                    thd_pc_spbn[idx_dataset, idx_instances, idx_t,
                                idx_p] = experiments_helper.hamming_type(
                                    spbn_pc)

            ###########################
            # PC SPBN CKDE
            ###########################
            for idx_t, test in enumerate(tests):
                for idx_p, p in enumerate(patience):
                    spbn_ckde_pc_folder = folder + '/PC/SPBN_CKDE/' + test + '/' + str(
                        p)

                    all_models = sorted(
                        glob.glob(spbn_ckde_pc_folder + '/*.pickle'))
                    final_model = all_models[-1]

                    spbn_ckde_pc = load(final_model)
                    spbn_ckde_pc.fit(training_data)

                    slogl_pc_spbn_ckde[idx_dataset, idx_instances, idx_t,
                                       idx_p] = spbn_ckde_pc.slogl(test_data)
                    thd_pc_spbn_ckde[idx_dataset, idx_instances, idx_t,
                                     idx_p] = experiments_helper.hamming_type(
                                         spbn_ckde_pc)


    return (slogl_true, slogl_hc_gbn_bic, slogl_hc_gbn_bge, slogl_hc_spbn, slogl_hc_spbn_ckde, slogl_pc_gbn, slogl_pc_spbn, slogl_pc_spbn_ckde), \
           (hmd_hc_gbn_bic, hmd_hc_gbn_bge, hmd_hc_spbn, hmd_hc_spbn_ckde, hmd_pc), \
           (shd_hc_gbn_bic, shd_hc_gbn_bge, shd_hc_spbn, shd_hc_spbn_ckde, shd_pc),\
           (thd_hc_spbn, thd_hc_spbn_ckde, thd_pc_spbn, thd_pc_spbn_ckde)
df_2000 = pd.read_csv('synthetic_2000.csv')
df_10000 = pd.read_csv('synthetic_10000.csv')
df_test = pd.read_csv('synthetic_test.csv')

print("True model logl: " + str(slogl_model(df_test)))

patience = experiments_helper.PATIENCE

for df, model_folder in [(df_200, 'models/200'), (df_2000, 'models/2000'),
                         (df_10000, 'models/10000')]:
    print("Folder " + model_folder)
    for p in patience:
        result_folder = model_folder + '/HillClimbing/SPBN/' + str(p)
        pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True)

        all_models = sorted(glob.glob(result_folder + '/*.pickle'))
        final_model = load(all_models[-1])
        final_model.fit(df)

        slogl = final_model.slogl(df_test)

        print("Loglik, p " + str(p) + ": " + str(slogl))
        print("SHD, p " + str(p) + ": " +
              str(experiments_helper.shd(final_model, true_model)))
        print("Hamming, p " + str(p) + ": " +
              str(experiments_helper.hamming(final_model, true_model)))
        print("Hamming type, p " + str(p) + ": " +
              str(experiments_helper.hamming_type(final_model, true_model)))

        print()