def run_simulation(tmp_path, results_path):
    report = SimulationReport(results_path)
    samples_path = './simulation_study/samples'
    for model_name in ['model1', 'model2']:
        for sample_size in [5000, 10000, 20000]:
            tmp = os.path.join(tmp_path, str(sample_size), str(model_name))
            for sample_idx, sample in fetch_samples(model_name,
                                                    sample_size,
                                                    samples_path,
                                                    cache_dir=tmp):
                bootstrap = Bootstrap(tmp, 100, '1')
                smc = SMC(bootstrap,
                          penalty_interval=(0.01, 800),
                          cache_dir=None,
                          n_sizes=(0.3, 0.9),
                          alpha=0.01,
                          epsilon=0.01,
                          df_method='ct06',
                          perl_compatible=False,
                          num_cores=NUM_CORES)
                smc.fit(sample)
                report.add(smc, model_name, sample_size, sample_idx)
                print(report.summary())
    print("============== Simulation results =======================")
    print(report.summary())
Example #2
0
def test_smc_bic_pl(bootstrap, sample, tmp_path):
    smc = SMC(bootstrap,
              perl_compatible=True,
              penalty_interval=(0.05, 100),
              cache_dir=str(tmp_path))
    smc.context_trees = []
    smc.estimate_trees(sample)
def test_smc_caching(bootstrap, sample, tmp_path):
    smc = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=str(tmp_path))
    smc2 = SMC(bootstrap,
               penalty_interval=(0.01, 800),
               cache_dir=str(tmp_path))
    smc.fit(sample)
    caching.save_cache(smc, sample)
    caching.load_cache(smc2, sample)
    t1 = [t.to_str() for t in smc.context_trees]
    t2 = [t.to_str() for t in smc2.context_trees]
    assert [t for t in t1 if t in t2] == t1
    assert [t for t in t2 if t in t1] == t2
def run_smc_lcb(X):
    from g4l.smc_lcb import SMC
    num_cores = 1
    if args.num_cores > 1:
        num_cores = args.num_cores

    bootstrap = Bootstrap(args.folder, args.resamples, args.renewal_point)
    smc = SMC(bootstrap,
              cache_dir=args.folder,
              n_sizes=args.n_sizes,
              alpha=args.alpha,
              num_cores=num_cores)
    smc.fit(X)
    report(smc, X)
def run_smc_bic(X):
    from g4l.smc_bic import SMC
    num_cores = 1
    if args.num_cores > 1:
        num_cores = args.num_cores

    bootstrap = Bootstrap(args.folder, args.resamples, args.renewal_point)
    smc = SMC(bootstrap,
              penalty_interval=tuple(args.penalty_interval),
              cache_dir=args.folder,
              n_sizes=args.n_sizes,
              alpha=args.alpha,
              epsilon=args.epsilon,
              df_method=args.df,
              perl_compatible=bool(args.perl_compatible),
              num_cores=num_cores)
    smc.fit(X)
    report(smc, X)
Example #6
0
def test_smc_bic(bootstrap, sample, tmp_path):
    correct_tree = '000 100 10 1'
    random.seed(12345)
    smc = SMC(bootstrap, penalty_interval=(0.01, 800), cache_dir=str(tmp_path))
    smc.fit(sample)
    assert smc.optimal_tree.to_str(reverse=True) == correct_tree
    out_path = os.path.join(str(tmp_path), 'out')
    smc.save_output(sample, out_path)
    html_report = open(os.path.join(out_path, 'report.html')).read()
    assert html_report.find(correct_tree) >= 0