def run_simulation(tmp_path, results_path): report = SimulationReport(results_path) samples_path = './simulation_study/samples' for model_name in ['model1', 'model2']: for sample_size in [5000, 10000, 20000]: tmp = os.path.join(tmp_path, str(sample_size), str(model_name)) for sample_idx, sample in fetch_samples(model_name, sample_size, samples_path, cache_dir=tmp): bootstrap = Bootstrap(tmp, 100, '1') smc = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=None, n_sizes=(0.3, 0.9), alpha=0.01, epsilon=0.01, df_method='ct06', perl_compatible=False, num_cores=NUM_CORES) smc.fit(sample) report.add(smc, model_name, sample_size, sample_idx) print(report.summary()) print("============== Simulation results =======================") print(report.summary())
def test_smc_bic_pl(bootstrap, sample, tmp_path): smc = SMC(bootstrap, perl_compatible=True, penalty_interval=(0.05, 100), cache_dir=str(tmp_path)) smc.context_trees = [] smc.estimate_trees(sample)
def test_smc_caching(bootstrap, sample, tmp_path): smc = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=str(tmp_path)) smc2 = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=str(tmp_path)) smc.fit(sample) caching.save_cache(smc, sample) caching.load_cache(smc2, sample) t1 = [t.to_str() for t in smc.context_trees] t2 = [t.to_str() for t in smc2.context_trees] assert [t for t in t1 if t in t2] == t1 assert [t for t in t2 if t in t1] == t2
def run_smc_lcb(X): from g4l.smc_lcb import SMC num_cores = 1 if args.num_cores > 1: num_cores = args.num_cores bootstrap = Bootstrap(args.folder, args.resamples, args.renewal_point) smc = SMC(bootstrap, cache_dir=args.folder, n_sizes=args.n_sizes, alpha=args.alpha, num_cores=num_cores) smc.fit(X) report(smc, X)
def run_smc_bic(X): from g4l.smc_bic import SMC num_cores = 1 if args.num_cores > 1: num_cores = args.num_cores bootstrap = Bootstrap(args.folder, args.resamples, args.renewal_point) smc = SMC(bootstrap, penalty_interval=tuple(args.penalty_interval), cache_dir=args.folder, n_sizes=args.n_sizes, alpha=args.alpha, epsilon=args.epsilon, df_method=args.df, perl_compatible=bool(args.perl_compatible), num_cores=num_cores) smc.fit(X) report(smc, X)
def test_smc_bic(bootstrap, sample, tmp_path): correct_tree = '000 100 10 1' random.seed(12345) smc = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=str(tmp_path)) smc.fit(sample) assert smc.optimal_tree.to_str(reverse=True) == correct_tree out_path = os.path.join(str(tmp_path), 'out') smc.save_output(sample, out_path) html_report = open(os.path.join(out_path, 'report.html')).read() assert html_report.find(correct_tree) >= 0