Ejemplo n.º 1
0
def test_rpy2(sampler):
    # run the notebook example
    r_file = "doc/examples/myRModel.R"
    r = pyabc.external.R(r_file)
    r.display_source_ipython()
    model = r.model("myModel")
    distance = r.distance("myDistance")
    sum_stat = r.summary_statistics("mySummaryStatistics")
    data = r.observation("mySumStatData")
    prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10),
                               meanY=pyabc.RV("uniform", 0, 10))
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       summary_statistics=sum_stat,
                       sampler=sampler,
                       population_size=5)
    db = pyabc.create_sqlite_db_id(file_="test_external.db")
    abc.new(db, data)
    history = abc.run(minimum_epsilon=0.9, max_nr_populations=2)
    history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head()

    # try load
    id_ = history.id
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       summary_statistics=sum_stat,
                       sampler=sampler,
                       population_size=6)
    # shan't even need to pass the observed data again
    abc.load(db, id_)
    abc.run(minimum_epsilon=0.1, max_nr_populations=2)
Ejemplo n.º 2
0
def test_stochastic_acceptor():
    acceptor = pyabc.StochasticAcceptor(
        pdf_norm_method=pyabc.pdf_norm_max_found)
    eps = pyabc.Temperature(initial_temperature=1)
    distance = pyabc.IndependentNormalKernel(var=np.array([1, 1]))

    def model(par):
        return {'s0': par['p0'] + np.array([0.3, 0.7])}

    x_0 = {'s0': np.array([0.4, -0.6])}

    # just run
    prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2))
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       eps=eps,
                       acceptor=acceptor,
                       population_size=10)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    abc.run(max_nr_populations=1, minimum_epsilon=1.)

    # use no initial temperature and adaptive c
    acceptor = pyabc.StochasticAcceptor()
    eps = pyabc.Temperature()
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       eps=eps,
                       acceptor=acceptor,
                       population_size=10)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    abc.run(max_nr_populations=3, minimum_epsilon=1.)
Ejemplo n.º 3
0
def test_default_eps():
    def model(par):
        return {'s0': par['p0'] + np.random.random(), 's1': np.random.random()}

    x_0 = {'s0': 0.4, 's1': 0.6}

    prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2))

    # usual setting
    abc = pyabc.ABCSMC(model, prior, population_size=10)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    abc.run(max_nr_populations=3)

    assert abc.minimum_epsilon == 0.0

    # noisy setting
    acceptor = pyabc.StochasticAcceptor()
    eps = pyabc.Temperature()

    distance = pyabc.IndependentNormalKernel(var=np.array([1, 1]))

    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       eps=eps,
                       acceptor=acceptor,
                       population_size=10)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    abc.run(max_nr_populations=3)

    assert abc.minimum_epsilon == 1.0
Ejemplo n.º 4
0
def test_pipeline(db_file):
    """Test whole pipeline using a learned summary statistic."""
    rng = np.random.Generator(np.random.PCG64(0))

    def model(p):
        return {"s0": p["p0"] + 1e-2 * rng.normal(size=2), "s1": rng.normal()}

    prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 1))

    distance = pyabc.AdaptivePNormDistance(sumstat=PredictorSumstat(
        LinearPredictor(), fit_ixs={1, 3}), )

    data = {"s0": np.array([0.1, 0.105]), "s1": 0.5}

    # run a little analysis
    abc = pyabc.ABCSMC(model, prior, distance, population_size=100)
    abc.new("sqlite:///" + db_file, data)
    h = abc.run(max_total_nr_simulations=1000)

    # first iteration
    df0, w0 = h.get_distribution(t=0)
    off0 = abs(pyabc.weighted_mean(df0.p0, w0) - 0.1)
    # last iteration
    df, w = h.get_distribution()
    off = abs(pyabc.weighted_mean(df.p0, w) - 0.1)

    assert off0 > off

    # alternative run with simple distance

    distance = pyabc.PNormDistance()
    abc = pyabc.ABCSMC(model, prior, distance, population_size=100)
    abc.new("sqlite:///" + db_file, data)
    h = abc.run(max_total_nr_simulations=1000)

    df_comp, w_comp = h.get_distribution()
    off_comp = abs(pyabc.weighted_mean(df_comp.p0, w_comp) - 0.1)
    assert off_comp > off

    # alternative run with info weighting
    distance = pyabc.InfoWeightedPNormDistance(
        predictor=LinearPredictor(),
        fit_info_ixs={1, 3},
    )
    abc = pyabc.ABCSMC(model, prior, distance, population_size=100)
    abc.new("sqlite:///" + db_file, data)
    h = abc.run(max_total_nr_simulations=1000)

    df_info, w_info = h.get_distribution()
    off_info = abs(pyabc.weighted_mean(df_info.p0, w_info) - 0.1)
    assert off_comp > off_info
Ejemplo n.º 5
0
def setup_module():
    """Run an analysis and create a database.

    Called once at the beginning.
    """
    def model(p):
        return {
            'ss0': p['p0'] + 0.1 * np.random.uniform(),
            'ss1': p['p1'] + 0.1 * np.random.uniform(),
        }

    p_true = {'p0': 3, 'p1': 4}
    observation = {'ss0': p_true['p0'], 'ss1': p_true['p1']}
    limits = {'p0': (0, 5), 'p1': (1, 8)}
    prior = pyabc.Distribution(
        **{
            key: pyabc.RV('uniform', limits[key][0], limits[key][1] -
                          limits[key][0])
            for key in p_true.keys()
        })
    distance = pyabc.PNormDistance(p=2)

    abc = pyabc.ABCSMC(model, prior, distance, population_size=50)
    abc.new(db_path, observation)
    abc.run(minimum_epsilon=0.1, max_nr_populations=4)
Ejemplo n.º 6
0
def test_redis_catch_error():
    def model(pars):
        if np.random.uniform() < 0.1:
            raise ValueError("error")
        return {'s0': pars['p0'] + 0.2 * np.random.uniform()}

    def distance(s0, s1):
        return abs(s0['s0'] - s1['s0'])

    prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 10))
    sampler = RedisEvalParallelSamplerServerStarter(batch_size=3,
                                                    workers=1,
                                                    processes_per_worker=1)
    try:
        abc = pyabc.ABCSMC(model,
                           prior,
                           distance,
                           sampler=sampler,
                           population_size=10)

        db_file = "sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db")
        data = {'s0': 2.8}
        abc.new(db_file, data)
        abc.run(minimum_epsilon=.1, max_nr_populations=3)
    finally:
        sampler.shutdown()
Ejemplo n.º 7
0
def test_redis_look_ahead_error():
    """Test whether the look-ahead mode fails as expected."""
    model, prior, distance, obs = basic_testcase()
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh:
        sampler = RedisEvalParallelSamplerServerStarter(
            look_ahead=True,
            look_ahead_delay_evaluation=False,
            log_file=fh.name)
        args_list = [{
            'eps': pyabc.MedianEpsilon()
        }, {
            'distance_function': pyabc.AdaptivePNormDistance()
        }]
        for args in args_list:
            if 'distance_function' not in args:
                args['distance_function'] = distance
            try:
                with pytest.raises(AssertionError) as e:
                    abc = pyabc.ABCSMC(model,
                                       prior,
                                       sampler=sampler,
                                       population_size=10,
                                       **args)
                    abc.new(pyabc.create_sqlite_db_id(), obs)
                    abc.run(max_nr_populations=3)
                assert "cannot be used in look-ahead mode" in str(e.value)
            finally:
                sampler.shutdown()
Ejemplo n.º 8
0
def test_redis_look_ahead_delayed():
    """Test the look-ahead sampler with delayed evaluation in an adaptive
    setup."""
    model, prior, distance, obs = basic_testcase()
    # spice things up with an adaptive population size
    pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50,
                                            mean_cv=0.5,
                                            max_population_size=50)
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh:
        sampler = RedisEvalParallelSamplerLookAheadDelayWrapper(
            log_file=fh.name)
        try:
            abc = pyabc.ABCSMC(model,
                               prior,
                               distance,
                               sampler=sampler,
                               population_size=pop_size)
            abc.new(pyabc.create_sqlite_db_id(), obs)
            abc.run(max_nr_populations=3)
        finally:
            sampler.shutdown()
        # read log file
        df = pd.read_csv(fh.name, sep=',')
        assert (df.n_lookahead > 0).any()
        assert (df.n_lookahead_accepted > 0).any()
        # in delayed mode, all look-aheads must have been preliminary
        assert (df.n_lookahead == df.n_preliminary).all()
Ejemplo n.º 9
0
def test_export():
    """Test database export.

    Just calls export and does some very basic checks.
    """
    # simple problem
    def model(p):
        return {"y": p["p"] + 0.1 * np.random.normal()}

    prior = pyabc.Distribution(p=pyabc.RV("uniform", -1, 2))
    distance = pyabc.PNormDistance()

    try:
        # run
        db_file = tempfile.mkstemp(suffix=".db")[1]
        abc = pyabc.ABCSMC(model, prior, distance, population_size=100)
        abc.new("sqlite:///" + db_file, model({"p": 0}))
        abc.run(max_nr_populations=3)

        # export history
        for fmt in ["csv", "json", "html", "stata"]:
            out_file = tempfile.mkstemp()[1]
            try:
                pyabc.storage.export(db_file, out=out_file, out_format=fmt)
                assert os.path.exists(out_file)
                assert os.stat(out_file).st_size > 0
            finally:
                if os.path.exists(out_file):
                    os.remove(out_file)

    finally:
        if os.path.exists(db_file):
            os.remove(db_file)
Ejemplo n.º 10
0
def test_early_stopping():
    """Basic test whether an early stopping pipeline works.
    Heavily inspired by the `early_stopping` notebook.
    """
    prior = pyabc.Distribution(step_size=pyabc.RV("uniform", 0, 10))

    n_steps = 30
    gt_step_size = 5
    gt_trajectory = simulate(n_steps, gt_step_size)

    model = MyStochasticProcess(n_steps=n_steps,
                                gt_step_size=gt_step_size,
                                gt_trajectory=gt_trajectory)

    abc = pyabc.ABCSMC(
        models=model,
        parameter_priors=prior,
        distance_function=pyabc.NoDistance(),
        population_size=30,
        transitions=pyabc.LocalTransition(k_fraction=0.2),
        eps=pyabc.MedianEpsilon(300, median_multiplier=0.7),
    )
    # initializing eps manually is necessary as we only have an integrated
    #  model
    # TODO automatically iniitalizing would be possible, e.g. using eps = inf

    abc.new(pyabc.create_sqlite_db_id())
    abc.run(max_nr_populations=3)
Ejemplo n.º 11
0
def test_simple_function_acceptor():
    def distance(x, x_0):
        return sum(abs(x[key] - x_0[key]) for key in x_0)

    def dummy_accept(dist, eps, x, x_0, t, par):
        d = dist(x, x_0)
        return AcceptorResult(d, d < eps(t))

    x = {'s0': 1, 's1': 0}
    y = {'s0': 2, 's1': 2}

    acceptor = pyabc.SimpleFunctionAcceptor(dummy_accept)

    ret = acceptor(distance_function=distance,
                   eps=lambda t: 0.1,
                   x=x,
                   x_0=y,
                   t=0,
                   par=None)

    assert isinstance(ret, AcceptorResult)
    assert ret.distance == 3

    # test integration

    def model(par):
        return {'s0': par['p0'] + 1, 's1': 42}

    prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10))
    abc = pyabc.ABCSMC(model, prior, distance, population_size=2)
    abc.new(pyabc.create_sqlite_db_id(), model({'p0': 1}))
    h = abc.run(max_nr_populations=2)

    df = h.get_weighted_distances()
    assert np.isfinite(df['distance']).all()
Ejemplo n.º 12
0
def setup_module():
    """Set up module. Called before all tests here."""
    # create and run some model
    observation = {'ss0': p_true['p0'], 'ss1': p_true['p1']}

    prior = pyabc.Distribution(
        **{
            key: pyabc.RV('uniform', limits[key][0], limits[key][1] -
                          limits[key][0])
            for key in p_true.keys()
        })

    distance = pyabc.PNormDistance(p=2)
    n_history = 2
    sampler = pyabc.sampler.MulticoreEvalParallelSampler(n_procs=2)

    for _ in range(n_history):
        abc = pyabc.ABCSMC(model,
                           prior,
                           distance,
                           population_size=100,
                           sampler=sampler)
        abc.new(db_path, observation)
        abc.run(minimum_epsilon=.1, max_nr_populations=3)

    for j in range(n_history):
        history = pyabc.History(db_path)
        history.id = j + 1
        histories.append(history)
        labels.append("Some run " + str(j))
Ejemplo n.º 13
0
def test_pdf_norm_methods_integration():
    """Test integration of pdf normalization methods in ABCSMC."""
    def model(par):
        return {'s0': par['p0'] + np.array([0.3, 0.7])}

    x_0 = {'s0': np.array([0.4, -0.6])}

    for pdf_norm in [
            pyabc.pdf_norm_max_found,
            pyabc.pdf_norm_from_kernel,
            pyabc.ScaledPDFNorm(),
    ]:
        # just run
        acceptor = pyabc.StochasticAcceptor(pdf_norm_method=pdf_norm)
        eps = pyabc.Temperature()
        distance = pyabc.IndependentNormalKernel(var=np.array([1, 1]))
        prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2))

        abc = pyabc.ABCSMC(model,
                           prior,
                           distance,
                           eps=eps,
                           acceptor=acceptor,
                           population_size=20)
        abc.new(pyabc.create_sqlite_db_id(), x_0)
        abc.run(max_nr_populations=3)
Ejemplo n.º 14
0
def test_redis_look_ahead():
    """Test the redis sampler in look-ahead mode."""
    model, prior, distance, obs = basic_testcase()
    eps = pyabc.ListEpsilon([20, 10, 5])
    # spice things up with an adaptive population size
    pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50,
                                            mean_cv=0.5,
                                            max_population_size=50)
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh:
        sampler = RedisEvalParallelSamplerServerStarter(
            look_ahead=True,
            look_ahead_delay_evaluation=False,
            log_file=fh.name)
        try:
            abc = pyabc.ABCSMC(model,
                               prior,
                               distance,
                               sampler=sampler,
                               population_size=pop_size,
                               eps=eps)
            abc.new(pyabc.create_sqlite_db_id(), obs)
            h = abc.run(max_nr_populations=3)
        finally:
            sampler.shutdown()

        assert h.n_populations == 3

        # read log file
        df = pd.read_csv(fh.name, sep=',')
        assert (df.n_lookahead > 0).any()
        assert (df.n_lookahead_accepted > 0).any()
        assert (df.n_preliminary == 0).all()
Ejemplo n.º 15
0
    def load_history(self, dbpath, id):
        def fakesim(p):
            return dict(null=p)

        dummy_abc = pyabc.ABCSMC(fakesim,
                                 None,
                                 None,
                                 sampler=pyabc.sampler.SingleCoreSampler())

        return dummy_abc.load("sqlite:///" + dbpath, id)
Ejemplo n.º 16
0
def test_stochastic_acceptor():
    """Test the stochastic acceptor's features."""
    # store pnorms
    pnorm_file = tempfile.mkstemp(suffix=".json")[1]
    acceptor = pyabc.StochasticAcceptor(
        pdf_norm_method=pyabc.pdf_norm_max_found, log_file=pnorm_file)
    eps = pyabc.Temperature(initial_temperature=1)
    distance = pyabc.IndependentNormalKernel(var=np.array([1, 1]))

    def model(par):
        return {'s0': par['p0'] + np.array([0.3, 0.7])}

    x_0 = {'s0': np.array([0.4, -0.6])}

    # just run
    prior = pyabc.Distribution(p0=pyabc.RV('uniform', -1, 2))
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       eps=eps,
                       acceptor=acceptor,
                       population_size=10)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    h = abc.run(max_nr_populations=1, minimum_epsilon=1.)

    # check pnorms
    pnorms = pyabc.storage.load_dict_from_json(pnorm_file)
    assert len(pnorms) == h.max_t + 2  # +1 t0, +1 one final update
    assert isinstance(list(pnorms.keys())[0], int)
    assert isinstance(pnorms[0], float)

    # use no initial temperature and adaptive c
    acceptor = pyabc.StochasticAcceptor()
    eps = pyabc.Temperature()
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       eps=eps,
                       acceptor=acceptor,
                       population_size=20)
    abc.new(pyabc.create_sqlite_db_id(), x_0)
    abc.run(max_nr_populations=3)
Ejemplo n.º 17
0
def test_progressbar(sampler):
    """Test whether using a progress bar gives any errors."""
    model, prior, distance, obs = basic_testcase()

    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       sampler=sampler,
                       population_size=20)
    abc.new(db=pyabc.create_sqlite_db_id(), observed_sum_stat=obs)
    abc.run(max_nr_populations=3)
Ejemplo n.º 18
0
def test_sensitivity_sankey():
    """Test pyabc.visualization.plot_sensitivity_sankey`"""
    sigmas = {"p1": 0.1}

    def model(p):
        return {
            "y1": p["p1"] + 1 + sigmas["p1"] * np.random.normal(),
            "y2": 2 + 0.1 * np.random.normal(size=3),
        }

    gt_par = {"p1": 3}

    data = {"y1": gt_par["p1"] + 1, "y2": 2 * np.ones(shape=3)}

    prior_bounds = {"p1": (0, 10)}

    prior = pyabc.Distribution(
        **{
            key: pyabc.RV("uniform", lb, ub - lb)
            for key, (lb, ub) in prior_bounds.items()
        }, )

    total_sims = 1000

    # tmp files
    db_file = tempfile.mkstemp(suffix=".db")[1]
    scale_log_file = tempfile.mkstemp(suffix=".json")[1]
    info_log_file = tempfile.mkstemp(suffix=".json")[1]
    info_sample_log_file = tempfile.mkstemp()[1]

    distance = pyabc.InfoWeightedPNormDistance(
        p=1,
        scale_function=pyabc.distance.mad,
        predictor=pyabc.predictor.LinearPredictor(),
        fit_info_ixs=pyabc.util.EventIxs(sims=int(0.4 * total_sims)),
        scale_log_file=scale_log_file,
        info_log_file=info_log_file,
        info_sample_log_file=info_sample_log_file,
    )

    abc = pyabc.ABCSMC(model, prior, distance, population_size=100)
    h = abc.new(db="sqlite:///" + db_file, observed_sum_stat=data)
    abc.run(max_total_nr_simulations=total_sims)

    pyabc.visualization.plot_sensitivity_sankey(
        info_sample_log_file=info_sample_log_file,
        t=info_log_file,
        h=h,
        predictor=pyabc.predictor.LinearPredictor(),
    )
Ejemplo n.º 19
0
def train_patient(
    pat_df,
    training_time_range=(0, 10),
    prediction_time_range=(-10, 40),
    n_predictions=1,
    treatments=[],
    frequency=0.1,
    n_percent=10,
    epsilon=1,
    populations=20,
):
    train_start, train_end = training_time_range

    train_df = pat_df[(pat_df["t"] >= train_start)
                      & (pat_df["t"] <= train_end)]
    print(len(train_df))

    abc = pyabc.ABCSMC(
        abc_model(prediction_time_range, training_time_range, frequency,
                  treatments),
        prior(n_percent),
        abs_distance,
    )
    db_path = "sqlite:///test.db"

    abc.new(db_path, {"data": train_df["mtd"]})

    history = abc.run(minimum_epsilon=epsilon, max_nr_populations=populations)

    df, w = history.get_distribution()

    best_inds = np.argpartition(w, -n_predictions)[-n_predictions:]

    print(w)
    print(best_inds, w[best_inds])

    weights = sorted([*zip(best_inds, w[best_inds])],
                     key=operator.itemgetter(1),
                     reverse=True)

    print(weights)

    best_inds_sorted = [weight[0] for weight in weights]

    print(best_inds_sorted)

    best_params = df.iloc[best_inds_sorted]

    return best_params, history
Ejemplo n.º 20
0
def test_basic(sampler: pyabc.sampler.Sampler):
    """Some basic tests."""

    def model(par):
        return {'s0': par['p0'] + np.random.randn(4)}

    def distance(x, y):
        return np.sum(x['s0'] - y['s0'])

    x0 = model({'p0': 2})
    prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 10))

    abc = pyabc.ABCSMC(
        model, prior, distance, sampler=sampler, population_size=50
    )
    abc.new(pyabc.create_sqlite_db_id(), x0)
    abc.run(max_nr_populations=4)
Ejemplo n.º 21
0
def test_r(sampler):
    r_file = "doc/examples/myRModel.R"
    r = pyabc.external.R(r_file)
    r.display_source_ipython()
    model = r.model("myModel")
    distance = r.distance("myDistance")
    sum_stat = r.summary_statistics("mySummaryStatistics")
    prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10),
                               meanY=pyabc.RV("uniform", 0, 10))
    abc = pyabc.ABCSMC(model,
                       prior,
                       distance,
                       summary_statistics=sum_stat,
                       sampler=sampler)
    db = "sqlite:///" + os.path.join(gettempdir(), "test_external.db")
    abc.new(db, r.observation("mySumStatData"))
    history = abc.run(minimum_epsilon=0.9, max_nr_populations=2)
    history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head()
Ejemplo n.º 22
0
def test_reference_parameter():
    def model(parameter):
        return {"data": parameter["mean"] + 0.5 * np.random.randn()}

    prior = pyabc.Distribution(p0=pyabc.RV("uniform", 0, 5),
                               p1=pyabc.RV("uniform", 0, 1))

    def distance(x, y):
        return abs(x["data"] - y["data"])

    abc = pyabc.ABCSMC(model, prior, distance, population_size=2)
    db_path = ("sqlite:///" + os.path.join(tempfile.gettempdir(), "test.db"))
    observation = 2.5
    gt_par = {'p0': 1, 'p1': 0.25}
    abc.new(db_path, {"data": observation}, gt_par=gt_par)
    history = abc.history
    par_from_history = history.get_ground_truth_parameter()
    assert par_from_history == gt_par
Ejemplo n.º 23
0
def test_redis_look_ahead():
    """Test the redis sampler in look-ahead mode."""
    model, prior, distance, obs = basic_testcase()
    eps = pyabc.ListEpsilon([20, 10, 5])
    # spice things up with an adaptive population size
    pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50,
                                            mean_cv=0.5,
                                            max_population_size=50)
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh:
        sampler = RedisEvalParallelSamplerServerStarter(
            look_ahead=True,
            look_ahead_delay_evaluation=False,
            log_file=fh.name,
        )
        try:
            abc = pyabc.ABCSMC(
                model,
                prior,
                distance,
                sampler=sampler,
                population_size=pop_size,
                eps=eps,
            )
            abc.new(pyabc.create_sqlite_db_id(), obs)
            h = abc.run(max_nr_populations=3)
        finally:
            sampler.shutdown()

        assert h.n_populations == 3

        # read log file
        df = pd.read_csv(fh.name, sep=',')
        assert (df.n_lookahead > 0).any()
        assert (df.n_lookahead_accepted > 0).any()
        assert (df.n_preliminary == 0).all()

        # check history proposal ids
        for t in range(0, h.max_t + 1):
            pop = h.get_population(t=t)
            pop_size = len(pop)
            n_lookahead_pop = len(
                [p for p in pop.particles if p.proposal_id == -1])
            assert (min(pop_size, int(
                df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
Ejemplo n.º 24
0
def test_r():
    """
    This is basically just the using_R notebook.
    """
    r = R(r_file)
    r.display_source_ipython()
    model = r.model("myModel")
    distance = r.distance("myDistance")
    sum_stat = r.summary_statistics("mySummaryStatistics")
    prior = pyabc.Distribution(meanX=pyabc.RV("uniform", 0, 10),
                               meanY=pyabc.RV("uniform", 0, 10))
    sampler = pyabc.sampler.MulticoreEvalParallelSampler(n_procs=2)
    abc = pyabc.ABCSMC(model, prior, distance,
                       summary_statistics=sum_stat,
                       sampler=sampler)
    db = "sqlite:///" + os.path.join(gettempdir(), "test_external.db")
    abc.new(db, r.observation("mySumStatData"))
    history = abc.run(minimum_epsilon=0.9, max_nr_populations=2)
    history.get_weighted_sum_stats_for_model(m=0, t=1)[1][0]["cars"].head()
Ejemplo n.º 25
0
def test_pipeline(db_path):
    model = BasicoModel(MODEL1_PATH, duration=MAX_T, method="deterministic")
    data = model(TRUE_PAR)
    prior = pyabc.Distribution(rate=pyabc.RV("uniform", 0, 100))

    n_test_times = 20
    t_test_times = np.linspace(0, MAX_T, n_test_times)

    def distance(x, y):
        xt_ind = np.searchsorted(x["t"], t_test_times) - 1
        yt_ind = np.searchsorted(y["t"], t_test_times) - 1
        error = (
            np.absolute(x["X"][:, 1][xt_ind] - y["X"][:, 1][yt_ind]).sum() /
            t_test_times.size)
        return error

    abc = pyabc.ABCSMC(model, prior, distance)
    abc.new(db_path, data)
    abc.run(max_nr_populations=3)
Ejemplo n.º 26
0
    def execute(self):
        result_id = f"{self.model_id}__{self.analysis_id}__{self.i_data}__{self.i_rep}"
        db_file = f"db_{result_id}.db"

        print("Result id: ", result_id)
        if os.path.isfile(db_file):
            print("Skipping since exists already.")
            return

        abc = pyabc.ABCSMC(models=self.model,
                           parameter_priors=self.prior,
                           distance_function=self.distance,
                           population_size=self.n_acc,
                           transitions=self.transition,
                           eps=self.eps,
                           acceptor=self.acceptor,
                           sampler=self.sampler)
        abc.new("sqlite:///" + db_file, self.y_obs, gt_par=self.p_true)
        abc.run(minimum_epsilon=self.eps_min,
                min_acceptance_rate=self.min_acc_rate,
                max_nr_populations=self.n_pop)
Ejemplo n.º 27
0
def test_redis_subprocess():
    """Test whether the instructed redis sampler allows worker subprocesses."""
    # print worker output
    logging.getLogger("Redis-Worker").addHandler(logging.StreamHandler())

    def model_process(p, pipe):
        """The actual model."""
        pipe.send({"y": p['p0'] + 0.1 * np.random.randn(10)})

    def model(p):
        """Model calling a subprocess."""
        parent, child = multiprocessing.Pipe()
        proc = multiprocessing.Process(target=model_process, args=(p, child))
        proc.start()
        res = parent.recv()
        proc.join()
        return res

    prior = pyabc.Distribution(p0=pyabc.RV('uniform', -5, 10),
                               p1=pyabc.RV('uniform', -2, 2))

    def distance(y1, y2):
        return np.abs(y1['y'] - y2['y']).sum()

    obs = {'y': 1}
    # False as daemon argument is ok, True and None are not allowed
    sampler = RedisEvalParallelSamplerServerStarter(workers=1,
                                                    processes_per_worker=2,
                                                    daemon=False)
    try:
        abc = pyabc.ABCSMC(model,
                           prior,
                           distance,
                           sampler=sampler,
                           population_size=10)
        abc.new(pyabc.create_sqlite_db_id(), obs)
        # would just never return if model evaluation fails
        abc.run(max_nr_populations=3)
    finally:
        sampler.shutdown()
Ejemplo n.º 28
0
def test_redis_look_ahead_delayed():
    """Test the look-ahead sampler with delayed evaluation in an adaptive
    setup."""
    model, prior, distance, obs = basic_testcase()
    # spice things up with an adaptive population size
    pop_size = pyabc.AdaptivePopulationSize(start_nr_particles=50,
                                            mean_cv=0.5,
                                            max_population_size=50)
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv') as fh:
        sampler = RedisEvalParallelSamplerLookAheadDelayWrapper(
            log_file=fh.name, wait_for_all_samples=True)
        try:
            abc = pyabc.ABCSMC(
                model,
                prior,
                distance,
                sampler=sampler,
                population_size=pop_size,
            )
            abc.new(pyabc.create_sqlite_db_id(), obs)
            h = abc.run(max_nr_populations=3)
        finally:
            sampler.shutdown()
        # read log file
        df = pd.read_csv(fh.name, sep=',')
        assert (df.n_lookahead > 0).any()
        assert (df.n_lookahead_accepted > 0).any()
        # in delayed mode, all look-aheads must have been preliminary
        assert (df.n_lookahead == df.n_preliminary).all()
        print(df)

        # check history proposal ids
        for t in range(0, h.max_t + 1):
            pop = h.get_population(t=t)
            pop_size = len(pop)
            n_lookahead_pop = len(
                [p for p in pop.particles if p.proposal_id == -1])
            assert (min(pop_size, int(
                df.loc[df.t == t, 'n_lookahead_accepted'])) == n_lookahead_pop)
Ejemplo n.º 29
0
def calibrate(observed: dict, hostname: str = None):
    """Calibrates. observed is a dictionary with keys as in calibration_statistic.statistics containing the real data"""
    db_path = "sqlite:///" + os.path.join(os.getcwd(), "data",
                                          "calibration.db")
    if hostname is not None:
        # If we're given a hostname, use the above sandman mapping wrapper
        sampler = pyabc.sampler.MappingSampler(map_=get_sm_map(hostname),
                                               mapper_pickles=True)
    else:
        # Otherwise, run locally with the normal sampler
        sampler = pyabc.sampler.MulticoreEvalParallelSampler()
    # Adaptive distance based on Prangle (2017) (also acceptor below)
    dist = pyabc.distance.AdaptivePNormDistance(p=2, adaptive=True)
    prior = pyabc.Distribution(**get_prior())
    pop_size = pyabc.populationstrategy.AdaptivePopulationSize(
        start_nr_particles=32, max_population_size=256, min_population_size=4)

    abc = pyabc.ABCSMC(
        model,
        parameter_priors=prior,
        distance_function=dist,
        population_size=pop_size,
        sampler=sampler,
        acceptor=pyabc.accept_use_complete_history,
    )

    run_id = abc.new(db=db_path, observed_sum_stat=observed)
    print(f"Run ID is {run_id}")
    history = abc.run(max_nr_populations=10)
    df, w = history.get_distribution()
    results = {}
    for param in df.columns.values:
        # Calculate the posterior mean of each parameter
        results[param] = np.dot(list(df[param]), list(w))

    print("Done! The results are:")
    print(results)
Ejemplo n.º 30
0
def run_smc(priors, data, epsilon, max_episodes, smc_population_size, sequence_sample_size, pop_size,
            distance_function=l1_distance):
    start = time.time()
    initial_gen = data.columns.min()
    gen_num = data.columns.max() - initial_gen
    model = partial(smc_model, intial_freq=data[initial_gen].values, sequence_sample_size=sequence_sample_size,
                    pop_size=pop_size, gen_num=gen_num, initial_gen=initial_gen)
    model.__name__ = 'model with params'  # SMC needs this for some reason...
    abc = pyabc.ABCSMC(
            model, priors, distance_function, smc_population_size)
    # TODO: add a readme to temp_smc_dbs folder.. or erase on error..?
    dbs_dir = '.temp_smc_dbs'
    os.makedirs(dbs_dir, exist_ok=True)
    random_num = random.randint(0, 9999)
    db_path = os.path.join(dbs_dir, f"db_{random_num}.db")
    sql_path = (f"sqlite:///{db_path}")
    smc_post = abc.new(sql_path, {'a': data})
    smc_post = abc.run(minimum_epsilon=epsilon, max_nr_populations=max_episodes)
    print("SMC run time: ", round(time.time()-start, 2))
    print("Total number of SMC simulations: ", smc_post.total_nr_simulations)
    df, ws = smc_post.get_distribution()
    df['weights'] = ws
    os.remove(db_path)
    return df