Ejemplo n.º 1
0
def test_save_no_sum_stats(history: History):
    """
    Test that what has been stored can be retrieved correctly
    also when no sum stats are saved.
    """
    particle_list = []
    for _ in range(0, 6):
        particle = Particle(
            m=0,
            parameter=Parameter({"th0": np.random.random()}),
            weight=1.0 / 6,
            sum_stat={"ss0": np.random.random(), "ss1": np.random.random()},
            distance=np.random.random(),
        )
        particle_list.append(particle)

    population = Population(particle_list)

    # do not save sum stats
    # use the attribute first to make sure we have no typo
    print(history.stores_sum_stats)
    history.stores_sum_stats = False

    # test some basic routines
    history.append_population(
        t=0,
        current_epsilon=42.97,
        population=population,
        nr_simulations=10,
        model_names=[""],
    )

    # just call
    history.get_distribution(0, 0)

    # test whether weights and distances returned correctly
    weighted_distances_h = history.get_weighted_distances()
    weighted_distances = population.get_weighted_distances()

    assert np.allclose(
        weighted_distances_h[['distance', 'w']],
        weighted_distances[['distance', 'w']],
    )

    weights, sum_stats = history.get_weighted_sum_stats(t=0)
    # all particles should be contained nonetheless
    assert len(weights) == len(particle_list)
    for sum_stat in sum_stats:
        # should be empty
        assert not sum_stat

    history.get_population_extended()
Ejemplo n.º 2
0
def test_single_particle_save_load(history: History):
    particle_list = [
        Particle(m=0,
                 parameter=Parameter({
                     "a": 23,
                     "b": 12
                 }),
                 weight=.2,
                 sum_stat={"ss": .1},
                 distance=.1),
    ]
    history.append_population(0, 42, Population(particle_list), 2, [""])

    df, w = history.get_distribution(0, 0)
    assert w[0] == 1
    assert df.a.iloc[0] == 23
    assert df.b.iloc[0] == 12
Ejemplo n.º 3
0
def test_sum_stats_save_load(history: History):
    arr = np.random.rand(10)
    arr2 = np.random.rand(10, 2)
    particle_list = [
        Particle(m=0,
                 parameter=Parameter({
                     "a": 23,
                     "b": 12
                 }),
                 weight=.2,
                 sum_stat={
                     "ss1": .1,
                     "ss2": arr2,
                     "ss3": example_df(),
                     "rdf0": r["iris"]
                 },
                 distance=.1),
        Particle(m=0,
                 parameter=Parameter({
                     "a": 23,
                     "b": 12
                 }),
                 weight=.2,
                 sum_stat={
                     "ss12": .11,
                     "ss22": arr,
                     "ss33": example_df(),
                     "rdf": r["mtcars"]
                 },
                 distance=.1)
    ]

    history.append_population(0, 42, Population(particle_list), 2,
                              ["m1", "m2"])
    weights, sum_stats = history.get_weighted_sum_stats_for_model(0, 0)
    assert (weights == 0.5).all()
    assert sum_stats[0]["ss1"] == .1
    assert (sum_stats[0]["ss2"] == arr2).all()
    assert (sum_stats[0]["ss3"] == example_df()).all().all()
    with localconverter(pandas2ri.converter):
        assert (sum_stats[0]["rdf0"] == r["iris"]).all().all()
    assert sum_stats[1]["ss12"] == .11
    assert (sum_stats[1]["ss22"] == arr).all()
    assert (sum_stats[1]["ss33"] == example_df()).all().all()
    with localconverter(pandas2ri.converter):
        assert (sum_stats[1]["rdf"] == r["mtcars"]).all().all()
Ejemplo n.º 4
0
def test_model_name_load_single_with_pop(history_uninitialized: History):
    h = history_uninitialized
    model_names = ["m1"]
    h.store_initial_data(0, {}, {}, {}, model_names, "", "", "")
    particle_list = [
        Particle(0, Parameter({
            "a": 23,
            "b": 12
        }), .2, [.1], [{
            "ss": .1
        }], [], True)
    ]
    h.append_population(0, 42, Population(particle_list), 2, model_names)

    h2 = History(h.db_identifier)
    model_names_loaded = h2.model_names()
    assert model_names == model_names_loaded
Ejemplo n.º 5
0
def test_model_name_load_single_with_pop(history_uninitialized: History):
    h = history_uninitialized
    model_names = ["m1"]
    h.store_initial_data(0, {}, {}, {}, model_names, "", "", "")
    particle_list = [
        Particle(
            m=0,
            parameter=Parameter({"a": 23, "b": 12}),
            weight=1.0,
            sum_stat={"ss": 0.1},
            distance=0.1,
        )
    ]
    h.append_population(0, 42, Population(particle_list), 2, model_names)

    h2 = History(h.db)
    model_names_loaded = h2.model_names()
    assert model_names == model_names_loaded
Ejemplo n.º 6
0
def history(request):
    # Test in-memory and filesystem based database
    if request.param == "file":
        this_path = "/" + path()
    elif request.param == "memory":
        this_path = ""
    else:
        raise Exception(f"Bad database type for testing: {request.param}")
    model_names = ["fake_name_{}".format(k) for k in range(50)]
    h = History("sqlite://" + this_path)
    h.store_initial_data(0, {}, {}, {}, model_names, "", "",
                         '{"name": "pop_strategy_str_test"}')
    yield h
    if request.param == "file":
        try:
            os.remove(this_path)
        except FileNotFoundError:
            pass
Ejemplo n.º 7
0
def test_total_nr_samples(history: History):
    particle_list = [
        Particle(m=0,
                 parameter=Parameter({
                     "a": 23,
                     "b": 12
                 }),
                 weight=.2,
                 accepted_sum_stats=[{
                     "ss": .1
                 }],
                 accepted_distances=[.1])
    ]
    population = Population(particle_list)
    history.append_population(0, 42, population, 4234, ["m1"])
    history.append_population(0, 42, population, 3, ["m1"])

    assert 4237 == history.total_nr_simulations
Ejemplo n.º 8
0
def test_single_particle_save_load_np_int64(history: History):
    # Test if np.int64 can also be used for indexing
    # This is an important test!!!
    m_list = [0, np.int64(0)]
    t_list = [0, np.int64(0)]
    particle_list = [Particle(
        m=0,
        parameter=Parameter({"a": 23, "b": 12}),
        weight=.2,
        accepted_sum_stats=[{"ss": .1}],
        accepted_distances=[.1])]
    history.append_population(0, 42, Population(particle_list), 2, [""])

    for m in m_list:
        for t in t_list:
            df, w = history.get_distribution(m, t)
            assert w[0] == 1
            assert df.a.iloc[0] == 23
            assert df.b.iloc[0] == 12
Ejemplo n.º 9
0
def test_observed_sum_stats(history_uninitialized: History, gt_model):
    h = history_uninitialized
    obs_sum_stats = {"s1": 1,
                     "s2": 1.1,
                     "s3": np.array(.1),
                     "s4": np.random.rand(10)}
    h.store_initial_data(gt_model, {}, obs_sum_stats, {}, [""], "", "", "")

    h2 = History(h.db_identifier)
    loaded_sum_stats = h2.observed_sum_stat()

    for k in ["s1", "s2", "s3"]:
        assert loaded_sum_stats[k] == obs_sum_stats[k]

    assert (loaded_sum_stats["s4"] == obs_sum_stats["s4"]).all()
    assert loaded_sum_stats["s1"] == obs_sum_stats["s1"]
    assert loaded_sum_stats["s2"] == obs_sum_stats["s2"]
    assert loaded_sum_stats["s3"] == obs_sum_stats["s3"]
    assert loaded_sum_stats["s4"] is not obs_sum_stats["s4"]
Ejemplo n.º 10
0
def history_uninitialized():
    # Don't use memory database for testing.
    # A real file with disconnect and reconnect is closer to the real scenario
    this_path = path()
    h = History("sqlite:///" + this_path)
    yield h
    try:
        os.remove(this_path)
    except FileNotFoundError:
        pass
Ejemplo n.º 11
0
def test_single_particle_save_load_np_int64(history: History):
    # Test if np.int64 can also be used for indexing
    # This is an important test!!!
    m_list = [0, np.int64(0)]
    t_list = [0, np.int64(0)]
    particle_population = [
        ValidParticle(0, Parameter({
            "a": 23,
            "b": 12
        }), .2, [.1], [{
            "ss": .1
        }])
    ]
    history.append_population(0, 42, particle_population, 2, [""])

    for m in m_list:
        for t in t_list:
            df, w = history.get_distribution(m, t)
            assert w[0] == 1
            assert df.a.iloc[0] == 23
            assert df.b.iloc[0] == 12
Ejemplo n.º 12
0
def test_model_name_load_single_with_pop(history_uninitialized: History):
    h = history_uninitialized
    model_names = ["m1"]
    h.store_initial_data(0, {}, {}, {}, model_names, "", "", "")
    particle_list = [
        Particle(m=0,
                 parameter=Parameter({
                     "a": 23,
                     "b": 12
                 }),
                 weight=.2,
                 accepted_sum_stats=[{
                     "ss": .1
                 }],
                 accepted_distances=[.1])
    ]
    h.append_population(0, 42, Population(particle_list), 2, model_names)

    h2 = History(h.db_identifier)
    model_names_loaded = h2.model_names()
    assert model_names == model_names_loaded
Ejemplo n.º 13
0
def test_update_nr_samples(history: History):
    history.store_initial_data(None, {}, {}, {}, ["m0"], "", "", "")
    pops = history.get_all_populations()
    assert 0 == pops[pops['t'] == History.PRE_TIME]['samples'].values
    history.update_nr_samples(History.PRE_TIME, 43)
    pops = history.get_all_populations()
    assert 43 == pops[pops['t'] == History.PRE_TIME]['samples'].values
Ejemplo n.º 14
0
def test_get_population(history: History):
    population = Population(rand_pop_list(0))
    history.append_population(t=0,
                              current_epsilon=7.0,
                              population=population,
                              nr_simulations=200,
                              model_names=["m0"])
    population_h = history.get_population(t=0)

    # length
    assert len(population) == len(population_h)

    # distances
    distances = [p.distance for p in population.get_list()]
    distances_h = [p.distance for p in population_h.get_list()]
    for d0, d1 in zip(distances, distances_h):
        assert np.isclose(d0, d1)

    # weights
    weights = [p.weight for p in population.get_list()]
    weights_h = [p.weight for p in population_h.get_list()]
    for w0, w1 in zip(weights, weights_h):
        assert np.isclose(w0, w1)
Ejemplo n.º 15
0
def test_update_after_calibration(history: History):
    history.store_initial_data(None, {}, {}, {}, ["m0"], "", "", "")
    pops = history.get_all_populations()
    assert 0 == pops[pops['t'] == History.PRE_TIME]['samples'].values
    time = datetime.datetime.now()
    history.update_after_calibration(43, end_time=time)
    pops = history.get_all_populations()
    assert 43 == pops[pops['t'] == History.PRE_TIME]['samples'].values
    assert pops.population_end_time[0] == time
Ejemplo n.º 16
0
def result_single(paramfile, obsfile, dbfile, run_id, save):
    """
    Plot the result of a single fitting
    """

    db_path = 'sqlite:///' + dbfile
    abc_history = History(db_path)
    abc_history.id = run_id

    observed = simtools.parse_observations(obsfile)
    # print(observed)
    id_str = next(iter(observed))
    simtools.parse_params(paramfile, observed)

    # violin plot of results
    max_gen = abc_history.max_t

    # num_models_total = abc_history.nr_of_models_alive(0)
    num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1
    num_models_final = abc_history.nr_of_models_alive(max_gen)
    max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1]
                               for x in range(num_models_final)])

    # fig, axs = plt.subplots(ncols=num_models_final, sharey=True, sharex=True)
    # fig.set_size_inches(num_models_final*3, 3)

    if save is not None:
        # first time, construct the multipage pdf
        pdf_out = PdfPages(save)

    for j in range(num_models_total):
        if j not in abc_history.get_model_probabilities():
            continue
        model_prob = abc_history.get_model_probabilities()[j][max_gen]
        # print(model_prob)
        if model_prob == 0.0:
            continue
        fig, axs = plt.subplots()
        fig.set_size_inches(4, 3)
        end_time = simtools.PARAMS['end_time'][id_str]()
        # print(end_time)

        df, w = abc_history.get_distribution(m=j, t=max_gen)
        # print(df)
        # print(df.columns)
        # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])]
        time_axis = np.linspace(0, end_time, len(list(df.columns)))

        # for x in list(df.columns):
            # print(x)
            # print(df[x])
        abc_data = [sorted(df[x]) for x in list(df.columns)]
        # print(abc_data)

        violinparts = axs.violinplot(abc_data, positions=time_axis,
                                        widths=end_time/(max_point_in_models + 1)*0.8,
                                        showmeans=False, showmedians=False, showextrema=False)
        for part in violinparts['bodies']:
            part.set_facecolor('lightgrey')
            part.set_alpha(1)
            # from user Ruggero Turra https://stackoverflow.com/questions/29776114/half-violin-plot
            m = np.mean(part.get_paths()[0].vertices[:, 0])
            part.get_paths()[0].vertices[:, 0] = np.clip(
                part.get_paths()[0].vertices[:, 0],
                -np.inf,
                m
            )
            part.set_facecolor('lightgrey')
            part.set_color('lightgrey')

        for t, d in zip(time_axis, abc_data):
            axs.scatter(t + np.random.uniform(
                0.1,
                end_time/(max_point_in_models + 1)*0.4,
                size=len(d)
            ), d, color='grey', marker='.', s=1.0, alpha = 0.8)
            # print('HPDI')
            hpdi_interval = hpdi(d)
            axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4],
                     [hpdi_interval[0], hpdi_interval[0]],
                      linestyle='--', color='k', linewidth=1.0)
            axs.plot([t + 0.1, t + end_time/(max_point_in_models + 1)*0.4],
                     [hpdi_interval[1], hpdi_interval[1]],
                      linestyle='--', color='k', linewidth=1.0)


# for b in v1['bodies']:
#     m = np.mean(b.get_paths()[0].vertices[:, 0])
#     b.get_paths()[0].vertices[:, 0] = np.clip(b.get_paths()[0].vertices[:, 0], -np.inf, m)
#     b.set_color('r')


        quartile1, medians, quartile3 = np.percentile(abc_data, [25, 50, 75], axis=1)
        whiskers = np.array([
            adjacent_values(sorted_array, q1, q3)
            for sorted_array, q1, q3 in zip(abc_data, quartile1, quartile3)])
        whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1]
        axs.scatter(time_axis, medians, marker='.', color='white', s=30, zorder=3)
        axs.vlines(time_axis, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1)
        axs.vlines(time_axis, quartile1, quartile3, color='k', linestyle='-', lw=5)

        birthrate = [statistics.median(x) for x in abc_data]
        axs.plot(time_axis, birthrate, color='k')
        axs.set_xlabel('Time [days]')
        axs.set_ylabel(r'Growth rate [divisions day$^{-1}$ cell$^{-1}$]')

        title = simtools.PARAMS['plot_params']['coupling_names']
        axs.set_title(title)


        # axs.set_ylim(0, simtools.PARAMS['abc_params']['rate_limits'][1])

        plt.tight_layout()

        if save is not None:
            pdf_out.savefig()
        else:
            plt.show()


    # fit against timeline
    for j in range(num_models_total):
        if j not in abc_history.get_model_probabilities():
            continue
        model_prob = abc_history.get_model_probabilities()[j][max_gen]
        if model_prob == 0.0:
            continue
        fig, axs = plt.subplots()
        fig.set_size_inches(4, 3)
        end_time = simtools.PARAMS['end_time'][id_str]()

        df, w = abc_history.get_distribution(m=j, t=max_gen)
        time_axis = np.linspace(0, end_time, len(list(df.columns)))

        # samplings = [simtools.get_samplings_dilutions(observed[id_str], x)[0]
        #              for x, __ in enumerate(observed[id_str]['time'])]
        # dilutions = [simtools.get_samplings_dilutions(observed[id_str], x)[1]
        #              for x, __ in enumerate(observed[id_str]['time'])]

        # print(observed)
        # print('main obs', simtools.OBSERVED)

        # id_str = list(observed.keys())[j]

        samplings, dilutions = simtools.get_samplings_dilutions(observed[id_str])

        # samplings = list(zip(*samplings))
        # dilutions = list(zip(*dilutions))

        abc_data = [sorted(df[x]) for x in list(df.columns)]
        for k, v in observed.items():
            # print(k, v)
            samplings, dilutions = simtools.get_samplings_dilutions(observed[k])
            measured = np.array(v['count'])
            for s in samplings.transpose():
                # print(measured, s)
                measured /= s
            for d in dilutions.transpose():
                measured *= d

            axs.scatter(v['time'], measured, marker='.', color='k')

        # print(samplings, dilutions)

        simulations = None

        time_axis = np.linspace(0, max(observed[id_str]['time']), 100)

        i = 0
        for index, row in df.iterrows():
            # if i > 100:
            #     break
            # print(index, row)
            time, size, rate = simtools.simulate_timeline(
                simtools.PARAMS['starting_population'][id_str](),
                time_axis,
                list(row),
                simtools.PARAMS['simulation_params']['deathrate_interaction'],
                # simtools.PARAMS['abc_params']['simulator'],
                'bernoulli',
                verbosity=1
            )

            if simulations is None:
                simulations = np.zeros((len(size), len(df)))

            simulations[:, i] = size
            i += 1

        qt1, qt2, qt3 = np.quantile(simulations, (0.05, 0.5, 0.95), axis=1)
        # print(qt2)

        # axs.plot(time, qt1)
        axs.plot(time_axis, qt2, color='k')
        # axs.plot(time, qt3)
        axs.fill_between(time_axis, qt1, qt3, zorder=-1, color='lightgray')

        axs.set_xlabel('Time [days]')
        measurename = 'Population measure'
        if 'population_measure' in simtools.PARAMS['plot_params']:
            measurename = simtools.PARAMS['plot_params']['population_measure']
        axs.set_ylabel(measurename)

        # print(j, i, index)
        # print(simtools.PARAMS['abc_params']['birthrate_coupling_sets'])

        title = simtools.PARAMS['plot_params']['coupling_names']
        axs.set_title(title)

        plt.tight_layout()

        if save is not None:
            pdf_out.savefig()
        else:
            plt.show()

    if save is not None:
        pdf_out.close()
Ejemplo n.º 17
0
def abc_info(paramfile, obsfile, dbfile, run_id, save):
    """
    Plots for examining ABC fitting process
    """

    db_path = 'sqlite:///' + dbfile
    abc_history = History(db_path)
    abc_history.id = run_id

    observed = simtools.parse_observations(obsfile)
    simtools.parse_params(paramfile, observed)

    ### PLOTS SHOWING MODEL PROBABILITIES ###
    num_models = abc_history.nr_of_models_alive(0)
    max_points_in_models = max([abc_history.get_distribution(m=x, t=0)[0].shape[1] for x in range(num_models)])

    axs = abc_history.get_model_probabilities().plot.bar()
    axs.set_ylabel("Probability")
    axs.set_xlabel("Generation")
    resolutions = list(range(simtools.PARAMS['abc_params']['resolution_limits'][0],
                             simtools.PARAMS['abc_params']['resolution_limits'][1] + 1))
    axs.legend(resolutions,
               title="Reconstruction resolution")

    if save is not None:
        # first time, construct the multipage pdf
        pdf_out = PdfPages(save)
        pdf_out.savefig()
    else:
        plt.show()

    ### ABC SIMULATION DIAGNOSTICS ###
    fig, ax = plt.subplots(nrows=3, sharex=True)

    t_axis = list(range(abc_history.max_t + 1))

    populations = abc_history.get_all_populations()
    populations = populations[populations.t >= 0]

    ax[0].plot(t_axis, populations['particles'])
    ax[1].plot(t_axis, populations['epsilon'])
    ax[2].plot(t_axis, populations['samples'])

    ax[0].set_title('ABC parameters per generation')
    ax[0].set_ylabel('Particles')
    ax[1].set_ylabel('Epsilon')
    ax[2].set_ylabel('Samples')
    ax[-1].set_xlabel('Generation (t)')
    ax[0].xaxis.set_major_locator(MaxNLocator(integer=True))

    fig.set_size_inches(8, 5)

    if save is not None:
        pdf_out.savefig()
    else:
        plt.show()


    ### PARAMETERS OVER TIME ###
    fig, axs = plt.subplots(nrows=max_points_in_models, sharex=True, sharey=True)

    t_axis = np.arange(abc_history.max_t + 1)
    # print(t_axis)
    # parameters = ['birthrate.s0.d', 'birthrate.s0.r0']
    all_parameters = [list(abc_history.get_distribution(m=m, t=0)[0].columns)
                  for m in range(num_models)]
    # abc_data, __ = abc_history.get_distribution(m=m, t=generation)
    parameters = []
    for x in all_parameters:
        for y in x:
            parameters.append(y)
    parameters = list(set(parameters))
    parameters = sorted(parameters, key=lambda x: x[-1])
    # print(parameters)

    for m in range(num_models):

        qs1 = {param: [np.nan for __ in t_axis] for param in parameters}
        medians = {param: [np.nan for __ in t_axis] for param in parameters}
        qs3 = {param: [np.nan for __ in t_axis] for param in parameters}

        for i, generation in enumerate(t_axis):
            abc_data, __ = abc_history.get_distribution(m=m, t=generation)
            data = {x: np.array(abc_data[x]) for x in parameters if x in abc_data}
            for k, v in data.items():
                t_q1, t_m, t_q3 = np.percentile(
                    v, [25, 50, 75]
                )
                qs1[k][i] = t_q1
                medians[k][i] = t_m
                qs3[k][i] = t_q3


        for i, param in enumerate(parameters):
            # if len(medians[param]) == 0:
            if not medians[param]:
                continue
            # print(t_axis, medians[param])
            axs[i].plot(t_axis, medians[param], color=COLORS[m])
            axs[i].fill_between(t_axis, qs1[param], qs3[param], color=COLORS[m], alpha=0.2)

            axs[i].set_ylabel(param[10:])

        axs[-1].set_xlabel('Generation (t)')

    if save is not None:
        pdf_out.savefig()
    else:
        plt.show()

    if save is not None:
        pdf_out.close()
Ejemplo n.º 18
0
from pyabc import History
import matplotlib.pyplot as plt
from hft_abm_smc_abc.config import DELTA_TRUE, MU_TRUE, ALPHA_TRUE, LAMBDA0_TRUE, C_LAMBDA_TRUE, DELTA_S_TRUE, \
    WORK_DIR, temp_output_folder, version_number, PROCESSED_FOLDER, \
    DELTA_MIN, DELTA_MAX, MU_MIN, MU_MAX, ALPHA_MIN, ALPHA_MAX, LAMBDA0_MIN, LAMBDA0_MAX,\
    C_LAMBDA_MIN, C_LAMBDA_MAX, DELTAS_MIN, DELTAS_MAX, SMCABC_DISTANCE, SMCABC_POPULATION_SIZE, SMCABC_SAMPLER,\
    SMCABC_TRANSITIONS, SMCABC_EPS
import pyabc

# load history
h_loaded = History(
    "sqlite:///" +
    "hft_abm_smc_abc/resultsTH100_t=6_stochasticAcceptor_eps0001_seed21590917044.8407867.db"
)

# check that the history is not empty
print(h_loaded.all_runs())

from pyabc.visualization import plot_kde_matrix
df, w = h_loaded.get_distribution(m=0, t=4)
plot_kde_matrix(df, w)
plt.show()


def plot_coonvergence(history, parameter, range_min, range_max, true_value,
                      ax):
    #fig, ax = plt.subplots()
    for t in range(history.max_t - 1):
        df, w = history.get_distribution(m=0, t=t)
        pyabc.visualization.plot_kde_1d(df,
                                        w,
Ejemplo n.º 19
0
def test_population_retrieval(history: History):
    history.append_population(1, .23, Population(rand_pop(0)), 234, ["m1"])
    history.append_population(2, .123, Population(rand_pop(0)), 345, ["m1"])
    history.append_population(2, .1235, Population(rand_pop(5)), 20345,
                              ["m1"] * 6)
    history.append_population(3, .12330, Population(rand_pop(30)), 30345,
                              ["m1"] * 31)
    df = history.get_all_populations()

    assert df[df.t == 1].epsilon.iloc[0] == .23
    assert df[df.t == 2].epsilon.iloc[0] == .123
    assert df[df.t == 2].epsilon.iloc[1] == .1235
    assert df[df.t == 3].epsilon.iloc[0] == .12330

    assert df[df.t == 1].samples.iloc[0] == 234
    assert df[df.t == 2].samples.iloc[0] == 345
    assert df[df.t == 2].samples.iloc[1] == 20345
    assert df[df.t == 3].samples.iloc[0] == 30345

    assert history.alive_models(1) == [0]
    assert history.alive_models(2) == [0, 5]
    assert history.alive_models(3) == [30]
    print("ID", history.id)
Ejemplo n.º 20
0
    def make_hist():

        h = History("sqlite:///" + path)
        h.store_initial_data(0, {}, {}, {}, model_names, "", "", "")
        return h
Ejemplo n.º 21
0
 def __init__(self, abc_hist: History):
     # Get the dataframe of particles (parameter point estimates) and associated weights
     dist_df, dist_w = abc_hist.get_distribution(m=0, t=abc_history.max_t)
     # Create a KDE using the particles
     self.kde = MultivariateNormalTransition(scaling=1)
     self.kde.fit(dist_df, dist_w)
Ejemplo n.º 22
0
    positive_price_path = accept_pos(p.intradayPrice)

    # poor results - set to arbitrarily high number
    if not positive_price_path:
        price_path = pd.DataFrame([9999] * TIME_HORIZON)
    else:
        # Log and divide price path by 1000, Convert to pandas dataframe
        price_path = pd.DataFrame(np.log(p.intradayPrice / PRICE_PATH_DIVIDER))

    return price_path, p


if __name__ == '__main__':
    ###### simulatee hft data ######
    h_loaded = History(
        "sqlite:///" +
        "hft_abm_smc_abc/resultsReal_Data_Small_Test - Smaller Test - eps1_negfix_pop6_pop301597579353.943031.db"
    )

    param_list = ["mu", "lambda0", "delta", "delta_S", "alpha", "C_lambda"]

    posterior_mean_dict = posterior_mean(h_loaded, param_list)

    log_price_path, preis_object = preisSim_object(
        parameters=posterior_mean_dict)

    log_price_path = log_price_path.rename(columns={0: "Simulated Midprice"})

    ###### real world hft data ######
    midprice = pd.read_csv(os.path.join(PROCESSED_FOLDER,
                                        "Log_Original_Price_Bars_2300.csv"),
                           header=None)
Ejemplo n.º 23
0
def setup(modelfile: str,
          *experiments: Experiment,
          err_pars: List[str]=None,
          pacevar: str='membrane.V',
          tvar: str='phys.T',
          prev_runs: List[str]=[],
          logvars: List[str]=myokit.LOG_ALL,
          log_interval: float=None,
          normalise: bool=True
          ) -> Tuple[pd.DataFrame, Callable, Callable]:
    """Combine chosen experiments into inputs for ABC.

    Args:
        modelfile (str): Path to Myokit MMT file.
        *experiments (Experiment): Any number of experiments to run in ABC.
        err_pars (List[str]): Optional list of parameters representing model
            discrepancy variance for each experiment.
        pacevar (str): Optionally specify name of pacing variable in modelfile.
            Defaults to `membrane.V` assuming voltage clamp protocol but could
            also be set to stimulating current.
        tvar (str): Optionally specify name of temperature in modelfile.
            Defaults to `phys.T`.
        prev_runs (List[str]): Path to previous pyABC runs containing samples
            to randomly sample outside of ABC algorithm.
        logvars (List[str]): Optionally specify variables to log in simulations.

    Returns:
        Tuple[pd.DataFrame, Callable, Callable]:
            Observations combined from experiments.
            Model function to run combined protocols from experiments.
            Summary statistics function to convert 'raw' simulation output.
    """

    # Create Myokit model instance
    m = myokit.load_model(modelfile)

    # Set pacing variable
    pace = m.get(pacevar)
    if pace.binding() != 'pace':
        if pace.is_state():
            pace.demote()
        pace.set_rhs(0)
        pace.set_binding('pace')
    model_temperature = m.get(tvar).value()

    # Initialise combined variables
    observations = get_observations_df(list(experiments),
                                       normalise=normalise,
                                       temp_adjust=True,
                                       model_temperature=model_temperature)

    # Combine protocols into Myokit simulations
    simulations, times = [], []
    for exp in list(experiments):
        s = myokit.Simulation(m, exp.protocol)
        for ci, vi in exp.conditions.items():
            s.set_constant(ci, vi)
        simulations.append(s)
        times.append(exp.protocol.characteristic_time())

    # Get previous pyABC runs
    # Note: defaults to latest run in database file
    sample_df, sample_w = [], []
    for run in prev_runs:
        h = History(run)
        df, w = h.get_distribution()
        sample_df.append(df)
        sample_w.append(w)

    # Create model function
    def simulate_model(**pars):
        sim_output = []
        # Pre-optimised parameters
        for df, w in zip(sample_df, sample_w):
            pars = dict([(key[4:], 10**value) if key.startswith("log")
                         else (key, value)
                         for key, value in df.sample(weights=w, replace=True).items()],
                        **pars)
        for sim, time in zip(simulations, times):
            for p, v in pars.items():
                if err_pars is not None and p in err_pars:
                    continue
                try:
                    sim.set_constant(p, v)
                except:
                    warnings.warn("Could not set value of {}"
                                  .format(p))
                    return None
            sim.reset()
            try:
                sim_output.append(sim.run(time, log=logvars, log_interval=log_interval))
            except:
                del(sim_output)
                return None
        return sim_output
    def model(x):
        return log_transform(simulate_model)(**x)

    # Combine summary statistic functions
    normalise_factor = {}
    for i, f in enumerate(observations.normalise_factor):
        normalise_factor[i] = f
    sum_stats_combined = combine_sum_stats(
        *[e.sum_stats for e in list(experiments)]
    )
    def summary_statistics(data):
        if data is None:
            return {str(i): np.inf for i in range(len(observations))}
        ss = {str(i): val/normalise_factor[i]
              for i, val in enumerate(sum_stats_combined(data))}
        return ss

    return observations, model, summary_statistics
Ejemplo n.º 24
0
def run_app(db, debug, port):
    db = os.path.expanduser(db)
    history = History("sqlite:///" + db)
    app.config["HISTORY"] = history
    app.run(debug=debug, port=port)
Ejemplo n.º 25
0
def posterior_mean(h_loaded, param_list):
    mean_dict = {}

    for param in param_list:
        data = h_loaded.get_distribution(t=3)[0][param]

        res_mean, res_var, res_std = stats.bayes_mvs(data, alpha=0.90)

        mean_dict.update({param: res_mean[0]})

    return mean_dict


if __name__ == '__main__':
    # load history
    h_loaded = History("sqlite:///"
                       + "hft_abm_smc_abc/resultsReal_Data_Small_Test - Smaller Test - eps1_negfix_pop6_pop301597579353.943031.db")

    # check that the history is not empty
    print(h_loaded.all_runs())

    from pyabc.visualization import plot_kde_matrix

    df, w = h_loaded.get_distribution(m=0, t=4)
    plot_kde_matrix(df, w);
    plt.show()

    fig, axs = plt.subplots(2, 3)
    plot_coonvergence(h_loaded, 'mu', MU_MIN, MU_MAX, MU_TRUE, ax=axs[0, 0])
    plot_coonvergence(h_loaded, 'lambda0', LAMBDA0_MIN, LAMBDA0_MAX, LAMBDA0_TRUE, ax=axs[0, 1])
    plot_coonvergence(h_loaded, 'delta', DELTA_MIN, DELTA_MAX, DELTA_TRUE, ax=axs[0, 2])
    plot_coonvergence(h_loaded, 'delta_S', DELTAS_MIN, DELTAS_MAX, DELTA_S_TRUE, ax=axs[1, 0])
Ejemplo n.º 26
0
def tabulate_single(paramfile, obsfile, dbfile, csvfile, run_id):
    """
    Table of results (appending to table)
    """

    fieldnames = ['name', 'model_index', 'model_probability', 'rate_position', 'rate_mean', 'rate_stdev']

    db_path = 'sqlite:///' + dbfile
    abc_history = History(db_path)
    abc_history.id = run_id

    observed = simtools.parse_observations(obsfile)
    # print(observed)
    # id_str = next(iter(observed))
    simtools.parse_params(paramfile, observed)

    # violin plot of results
    max_gen = abc_history.max_t

    # num_models_total = abc_history.nr_of_models_alive(0)
    num_models_total = simtools.PARAMS['abc_params']['resolution_limits'][1] - simtools.PARAMS['abc_params']['resolution_limits'][0] + 1
    num_models_final = abc_history.nr_of_models_alive(max_gen)
    max_point_in_models = max([abc_history.get_distribution(m=x, t=max_gen)[0].shape[1]
                               for x in range(num_models_final)])

    # print(max_gen, num_models_total, num_models_final)

    with open(csvfile, 'w') as csv_out:
        wtr = csv.DictWriter(csv_out, fieldnames=fieldnames)
        wtr.writeheader()

        for j in range(num_models_total):
            # print(abc_history.get_model_probabilities())
            if j not in abc_history.get_model_probabilities():
                continue
            model_prob = abc_history.get_model_probabilities()[j][max_gen]
            if model_prob == 0.0:
                continue

            # print(j + 1, model_prob)

            df, w = abc_history.get_distribution(m=j, t=max_gen)
            # print(df)
            # print(df.columns)
            # abc_data = [sorted(df['birthrate.b' + str(x)]) for x in range(df.shape[1])]

            # for x in list(df.columns):
            #     print(x)
            #     print(df[x])
            abc_data = [sorted(df[x]) for x in list(df.columns)]
            # print(abc_data)

            for i, d in enumerate(abc_data):
                print('HPDI')
                hpdi_interval = hpdi(d)
                print(hpdi_interval)
                print('MEAN')
                mean = np.mean(d)
                print(mean)
                print('SIGMA')
                sigma = np.std(d)
                print(sigma)

                row = {
                    'name': simtools.PARAMS['plot_params']['coupling_names'],
                    'model_index': j,
                    'model_probability': model_prob,
                    'rate_position': i,
                    'rate_mean': mean,
                    'rate_stdev': sigma,
                }
                wtr.writerow(row)