def compare_models(true_model, trained_models_folder, training_data, test_data): ground_truth_slogl = true_model.slogl(test_data) print("Ground truth loglik: " + str(ground_truth_slogl)) gbn_bic_folder = trained_models_folder + '/HillClimbing/GBN_BIC/' all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle')) final_model = all_models[-1] gbn = load(final_model) gbn.fit(training_data) slogl = gbn.slogl(test_data) print("GBN BIC results:") print("Loglik: " + str(slogl)) print("SHD: " + str(experiments_helper.shd(gbn, true_model))) print("Hamming: " + str(experiments_helper.hamming(gbn, true_model))) print() gbn_bge_folder = trained_models_folder + '/HillClimbing/GBN_BGe/' all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle')) final_model = all_models[-1] gbn = load(final_model) gbn.fit(training_data) slogl = gbn.slogl(test_data) print("GBN BGe results:") print("Loglik: " + str(slogl)) print("SHD: " + str(experiments_helper.shd(gbn, true_model))) print("Hamming: " + str(experiments_helper.hamming(gbn, true_model))) print()
def compare_models(true_model, trained_models_folder, training_data, test_data): ground_truth_slogl = true_model.slogl(test_data) print("Ground truth loglik: " + str(ground_truth_slogl)) gbn_bic_folder = trained_models_folder + '/PC/GBN/LinearCorrelation' all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle')) final_model = all_models[-1] gbn = load(final_model) gbn.fit(training_data) slogl = gbn.slogl(test_data) print("GBN LinearCorrelation results:") print("Loglik: " + str(slogl)) print("SHD: " + str(experiments_helper.shd(gbn, true_model))) print("Hamming: " + str(experiments_helper.hamming(gbn, true_model))) print() gbn_bge_folder = trained_models_folder + '/PC/GBN/RCoT' all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle')) final_model = all_models[-1] gbn = load(final_model) gbn.fit(training_data) slogl = gbn.slogl(test_data) print("GBN RCoT results:") print("Loglik: " + str(slogl)) print("SHD: " + str(experiments_helper.shd(gbn, true_model))) print("Hamming: " + str(experiments_helper.hamming(gbn, true_model))) print()
def compare_models(true_model, trained_models_folder, training_data, test_data, patience): ground_truth_slogl = true_model.slogl(test_data) print("Ground truth loglik: " + str(ground_truth_slogl)) print("SPBN results:") for p in patience: ckde_folder = trained_models_folder + '/HillClimbing/SPBN/' + str(p) all_models = sorted(glob.glob(ckde_folder + '/*.pickle')) final_model = all_models[-1] spbn = load(final_model) spbn.fit(training_data) logl = spbn.slogl(test_data) print("Loglik, p " + str(p) + ": " + str(logl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(spbn, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(spbn, true_model))) print("Type Hamming, p " + str(p) + ": " + str(experiments_helper.hamming_type(spbn))) print()
def compare_models(true_model, trained_models_folder, training_data, test_data): ground_truth_slogl = true_model.slogl(test_data) print("Ground truth loglik: " + str(ground_truth_slogl)) for p in experiments_helper.PATIENCE: folder = trained_models_folder + '/PC/SPBN/LinearCorrelation/' + str(p) all_models = sorted(glob.glob(folder + '/*.pickle')) final_model = all_models[-1] spbn = load(final_model) spbn.fit(training_data) slogl = spbn.slogl(test_data) print("SPBN LinearCorrelation results:") print("Loglik, p " + str(p) + ": " + str(slogl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(spbn, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(spbn, true_model))) print("Type Hamming, p " + str(p) + ": " + str(experiments_helper.hamming_type(spbn))) print() for p in experiments_helper.PATIENCE: folder = trained_models_folder + '/PC/SPBN/RCoT/' + str(p) all_models = sorted(glob.glob(folder + '/*.pickle')) final_model = all_models[-1] spbn = load(final_model) spbn.fit(training_data) slogl = spbn.slogl(test_data) print("SPBN RCoT results:") print("Loglik, p " + str(p) + ": " + str(slogl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(spbn, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(spbn, true_model))) print("Type Hamming, p " + str(p) + ": " + str(experiments_helper.hamming_type(spbn))) print()
def test_spbn(df, model_folder, patience, dag_type): print("Dag Type " + dag_type) for p in patience: result_folder = model_folder + '/PC/SPBN/' + dag_type + '/' + str(p) pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) all_models = sorted(glob.glob(result_folder + '/*.pickle')) final_model = load(all_models[-1]) final_model.fit(df) slogl = final_model.slogl(df_test) print("Loglik, p " + str(p) + ": " + str(slogl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(final_model, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(final_model, true_model))) print("Hamming type, p " + str(p) + ": " + str(experiments_helper.hamming_type(final_model, true_model))) print()
df_200 = pd.read_csv('synthetic_200.csv') df_2000 = pd.read_csv('synthetic_2000.csv') df_10000 = pd.read_csv('synthetic_10000.csv') df_test = pd.read_csv('synthetic_test.csv') print("True model logl: " + str(slogl_model(df_test))) patience = experiments_helper.PATIENCE for df, model_folder in [(df_200, 'models/200'), (df_2000, 'models/2000'), (df_10000, 'models/10000')]: print("Folder " + model_folder) for p in patience: result_folder = model_folder + '/HillClimbing/SPBN_CKDE/' + str(p) pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) all_models = sorted(glob.glob(result_folder + '/*.pickle')) final_model = load(all_models[-1]) final_model.fit(df) slogl = final_model.slogl(df_test) print("Loglik, p " + str(p) + ": " + str(slogl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(final_model, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(final_model, true_model))) print("Hamming type, p " + str(p) + ": " + str(experiments_helper.hamming_type(final_model, true_model))) print()
def extract_info(train_datasets, test_datasets, model_folders, true_models): patience = experiments_helper.PATIENCE tests = experiments_helper.TESTS slogl_true = np.empty((len(train_datasets, ))) slogl_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0]))) slogl_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0]))) slogl_hc_spbn = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) slogl_hc_spbn_ckde = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) slogl_pc_gbn = np.empty( (len(train_datasets), len(train_datasets[0]), len(tests))) slogl_pc_spbn = np.empty((len(train_datasets), len(train_datasets[0]), len(tests), len(patience))) slogl_pc_spbn_ckde = np.empty((len(train_datasets), len(train_datasets[0]), len(tests), len(patience))) hmd_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0]))) hmd_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0]))) hmd_hc_spbn = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) hmd_hc_spbn_ckde = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) hmd_pc = np.empty( (len(train_datasets), len(train_datasets[0]), len(tests))) shd_hc_gbn_bic = np.empty((len(train_datasets), len(train_datasets[0]))) shd_hc_gbn_bge = np.empty((len(train_datasets), len(train_datasets[0]))) shd_hc_spbn = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) shd_hc_spbn_ckde = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) shd_pc = np.empty( (len(train_datasets), len(train_datasets[0]), len(tests))) thd_hc_spbn = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) thd_hc_spbn_ckde = np.empty( (len(train_datasets), len(train_datasets[0]), len(patience))) thd_pc_spbn = np.empty((len(train_datasets), len(train_datasets[0]), len(tests), len(patience))) thd_pc_spbn_ckde = np.empty((len(train_datasets), len(train_datasets[0]), len(tests), len(patience))) for idx_dataset, (instance_datasets, test_data, dataset_folders, true_model) in enumerate( zip(train_datasets, test_datasets, model_folders, true_models)): for idx_instances, (training_data, folder) in enumerate( zip(instance_datasets, dataset_folders)): slogl_true[idx_dataset] = true_model.slogl(test_data) ########################### # GBN BIC ########################### gbn_bic_folder = folder + '/HillClimbing/GBN_BIC/' all_models = sorted(glob.glob(gbn_bic_folder + '/*.pickle')) final_model = all_models[-1] bic = load(final_model) bic.fit(training_data) slogl_hc_gbn_bic[idx_dataset, idx_instances] = bic.slogl(test_data) hmd_hc_gbn_bic[idx_dataset, idx_instances] = experiments_helper.hamming( bic, true_model) shd_hc_gbn_bic[idx_dataset, idx_instances] = experiments_helper.shd( bic, true_model) ########################### # GBN BGe ########################### gbn_bge_folder = folder + '/HillClimbing/GBN_BGe/' all_models = sorted(glob.glob(gbn_bge_folder + '/*.pickle')) final_model = all_models[-1] bge = load(final_model) bge.fit(training_data) slogl_hc_gbn_bge[idx_dataset, idx_instances] = bge.slogl(test_data) hmd_hc_gbn_bge[idx_dataset, idx_instances] = experiments_helper.hamming( bge, true_model) shd_hc_gbn_bge[idx_dataset, idx_instances] = experiments_helper.shd( bge, true_model) ########################### # HC SPBN ########################### for idx_p, p in enumerate(patience): spbn_hc_folder = folder + '/HillClimbing/SPBN/' + str(p) all_models = sorted(glob.glob(spbn_hc_folder + '/*.pickle')) final_model = all_models[-1] spbn = load(final_model) spbn.fit(training_data) slogl_hc_spbn[idx_dataset, idx_instances, idx_p] = spbn.slogl(test_data) hmd_hc_spbn[idx_dataset, idx_instances, idx_p] = experiments_helper.hamming( spbn, true_model) shd_hc_spbn[idx_dataset, idx_instances, idx_p] = experiments_helper.shd(spbn, true_model) thd_hc_spbn[idx_dataset, idx_instances, idx_p] = experiments_helper.hamming_type(spbn) ########################### # HC SPBN CKDE ########################### for idx_p, p in enumerate(patience): spbn_ckde_hc_folder = folder + '/HillClimbing/SPBN_CKDE/' + str( p) all_models = sorted( glob.glob(spbn_ckde_hc_folder + '/*.pickle')) final_model = all_models[-1] spbn_ckde = load(final_model) spbn_ckde.fit(training_data) slogl_hc_spbn_ckde[idx_dataset, idx_instances, idx_p] = spbn_ckde.slogl(test_data) hmd_hc_spbn_ckde[idx_dataset, idx_instances, idx_p] = experiments_helper.hamming( spbn_ckde, true_model) shd_hc_spbn_ckde[idx_dataset, idx_instances, idx_p] = experiments_helper.shd( spbn_ckde, true_model) thd_hc_spbn_ckde[idx_dataset, idx_instances, idx_p] = experiments_helper.hamming_type( spbn_ckde) ########################### # PC GBN and PC Graph ########################### for idx_t, test in enumerate(tests): gbn_pc_folder = folder + '/PC/GBN/' + test all_models = sorted(glob.glob(gbn_pc_folder + '/*.pickle')) final_model = all_models[-1] gbn_pc = load(final_model) gbn_pc.fit(training_data) slogl_pc_gbn[idx_dataset, idx_instances, idx_t] = gbn_pc.slogl(test_data) hmd_pc[idx_dataset, idx_instances, idx_t] = experiments_helper.hamming(gbn_pc, true_model) shd_pc[idx_dataset, idx_instances, idx_t] = experiments_helper.shd(gbn_pc, true_model) ########################### # PC SPBN ########################### for idx_t, test in enumerate(tests): for idx_p, p in enumerate(patience): spbn_pc_folder = folder + '/PC/SPBN/' + test + '/' + str(p) all_models = sorted(glob.glob(spbn_pc_folder + '/*.pickle')) final_model = all_models[-1] spbn_pc = load(final_model) spbn_pc.fit(training_data) slogl_pc_spbn[idx_dataset, idx_instances, idx_t, idx_p] = spbn_pc.slogl(test_data) thd_pc_spbn[idx_dataset, idx_instances, idx_t, idx_p] = experiments_helper.hamming_type( spbn_pc) ########################### # PC SPBN CKDE ########################### for idx_t, test in enumerate(tests): for idx_p, p in enumerate(patience): spbn_ckde_pc_folder = folder + '/PC/SPBN_CKDE/' + test + '/' + str( p) all_models = sorted( glob.glob(spbn_ckde_pc_folder + '/*.pickle')) final_model = all_models[-1] spbn_ckde_pc = load(final_model) spbn_ckde_pc.fit(training_data) slogl_pc_spbn_ckde[idx_dataset, idx_instances, idx_t, idx_p] = spbn_ckde_pc.slogl(test_data) thd_pc_spbn_ckde[idx_dataset, idx_instances, idx_t, idx_p] = experiments_helper.hamming_type( spbn_ckde_pc) return (slogl_true, slogl_hc_gbn_bic, slogl_hc_gbn_bge, slogl_hc_spbn, slogl_hc_spbn_ckde, slogl_pc_gbn, slogl_pc_spbn, slogl_pc_spbn_ckde), \ (hmd_hc_gbn_bic, hmd_hc_gbn_bge, hmd_hc_spbn, hmd_hc_spbn_ckde, hmd_pc), \ (shd_hc_gbn_bic, shd_hc_gbn_bge, shd_hc_spbn, shd_hc_spbn_ckde, shd_pc),\ (thd_hc_spbn, thd_hc_spbn_ckde, thd_pc_spbn, thd_pc_spbn_ckde)
df_2000 = pd.read_csv('synthetic_2000.csv') df_10000 = pd.read_csv('synthetic_10000.csv') df_test = pd.read_csv('synthetic_test.csv') print("True model logl: " + str(slogl_model(df_test))) patience = experiments_helper.PATIENCE for df, model_folder in [(df_200, 'models/200'), (df_2000, 'models/2000'), (df_10000, 'models/10000')]: print("Folder " + model_folder) for p in patience: result_folder = model_folder + '/HillClimbing/SPBN/' + str(p) pathlib.Path(result_folder).mkdir(parents=True, exist_ok=True) all_models = sorted(glob.glob(result_folder + '/*.pickle')) final_model = load(all_models[-1]) final_model.fit(df) slogl = final_model.slogl(df_test) print("Loglik, p " + str(p) + ": " + str(slogl)) print("SHD, p " + str(p) + ": " + str(experiments_helper.shd(final_model, true_model))) print("Hamming, p " + str(p) + ": " + str(experiments_helper.hamming(final_model, true_model))) print("Hamming type, p " + str(p) + ": " + str(experiments_helper.hamming_type(final_model, true_model))) print()