Beispiel #1
0
def main(trainkey,
         predkey,
         outputkey,
         inference_method='',
         ncores='',
         nchains=1,
         niters='1500',
         redishost='10.42.72.93'):
    panthera = redishost
    conn = redis.StrictRedis(host=panthera, password='******')
    #predkey = 'p-50x50-guerrero-4'
    #trainkey = 't-luca-guerrero-4'
    #outputkey = 'test-model'
    PDF = preparePredictors(loadDataFrameFromRedis(predkey, conn))
    TDF = loadDataFrameFromRedis(trainkey, conn)

    formula = 'LUCA ~ Longitude + Latitude + Q("Dist.to.road_m") + Population_m + name'

    TM, PM = splitByFormula(formula, TDF, PDF['clean'])
    logger.info("Start modelling inference")

    model = ModelSamplingEffort(TM, PM)
    trace = SampleModel(model,
                        inference_method=inference_method,
                        ncores=ncores,
                        nchains=nchains,
                        niters=niters)
    logger.info("Saving trace")
    try:
        pm.save_trace(
            trace,
            directory=
            '/storage/users/escamill/presence-only-model/output/rawtrace',
            overwrite=True)
    except:
        logger.error("not possible to save trace")
    tracedf = pm.trace_to_dataframe(trace)
    tracedf.to_csv(
        '/storage/users/escamill/presence-only-model/output/trace%s.csv' %
        outputkey,
        encoding='utf8')

    try:
        pred_sample = SamplePredictions(model, TM, PM, trace)
    except:
        logger.error("something went wrong")

    pred_sample.to_csv(
        '/storage/users/escamill/presence-only-model/output/pred_cond-%s.csv' %
        outputkey,
        encoding='utf8')
    # pred sample is a dictionary

    pickle.dump(
        '/storage/users/escamill/presence-only-model/output/pred%s.pickle' %
        outputkey, pred_sample)
    #conn.set(outputkey+'-df',pickle.dumps(tracedf))
    #conn.set(outputkey+'-trace',pickle.dumps(pred_sample))
    logger.info("Finished!")
Beispiel #2
0
def run(sir_model, N_SAMPLES, cluster_save_path):
    print('sample start')
    with sir_model:
        trace = pm.sample(N_SAMPLES,
                          model=sir_model,
                          step=pm.Metropolis(),
                          progressbar=True)
    pm.save_trace(trace, cluster_save_path + 'sir_model.trace', overwrite=True)
    print('sample end')
    # -------- prepare data for visualization ---------------
    varnames = get_all_free_RVs_names(sir_model)
    #for varname in varnames:
    #visualize_trace(trace[varname][:, None], varname, N_SAMPLES)

    lambda_t = np.median(trace['lambda_t'][:, :], axis=0)
    μ = np.median(trace['mu'][:, None], axis=0)

    # -------- visualize histogram ---------------
    num_cols = 5
    num_rows = int(np.ceil(len(varnames) / num_cols))
    x_size = num_cols * 2.5
    y_size = num_rows * 2.5

    fig, axes = plt.subplots(num_rows,
                             num_cols,
                             figsize=(x_size, y_size),
                             squeeze=False)
    i_ax = 0
    for i_row, axes_row in enumerate(axes):
        for i_col, ax in enumerate(axes_row):
            if i_ax >= len(varnames):
                ax.set_visible(False)
                continue
            else:
                plot_hist(sir_model,
                          trace,
                          ax,
                          varnames[i_ax],
                          colors=('tab:blue', 'tab:green'))
            if i_col == 0:
                ax.set_ylabel('Density')
            if i_col == 0 and i_row == 0:
                ax.legend()
            i_ax += 1
    fig.subplots_adjust(wspace=0.25, hspace=0.4)
    plt.savefig(cluster_save_path + 'plot_hist.png')
    plt.clf()

    np.save(cluster_save_path + 'varnames.npy', varnames)
    np.save(cluster_save_path + 'SIR_params.npy', [lambda_t, μ])
Beispiel #3
0
def save_traces(trace):
    fname={}
    folder_name = 'traces'
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
    for key,val in trace.items():
        fname[key]=pm.save_trace(trace[key],directory=folder_name+'/'+key,overwrite=True)
Beispiel #4
0
    def run_group_inference(self, ndraws=300, nburn=100, cores=5):

        curr_model = self.group_model()

        with curr_model:

            step = pm.Metropolis()  #S=np.ones(1)*0.01)

            trace = pm.sample(ndraws,
                              tune=nburn,
                              discard_tuned_samples=True,
                              step=step,
                              cores=cores)

            # plot the traces
            #            plt.figure()
            #            _ = pm.traceplot(trace)#, lines=('h', 1./alpha_true))
            #            plt.show()
            #            plt.figure()
            #            _ = pm.plot_posterior(trace, var_names=['h'], ref_val=(1./alpha_true))
            #            plt.show()

            # save the traces
            fname = pm.save_trace(trace)

        return fname
Beispiel #5
0
def run_on_all_gals():
    sid_list = pd.read_csv('lib/subject-id-list.csv').values.T[0]
    for subject_id in sid_list:
        traces_dir = os.path.join('uniform-traces', str(subject_id))
        if os.path.isdir(traces_dir):
            continue
        try:
            arms = get_arms(subject_id)
        except IndexError:
            continue
        print('Working on', subject_id)
        trace = get_logsp_trace_from_arms(arms)
        try:
            os.mkdir(traces_dir)
        except FileExistsError:
            pass
        pm.save_trace(trace, directory=traces_dir, overwrite=True)
Beispiel #6
0
    def test_save_new_model(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp("data"))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory
        with pm.Model() as model:
            w = pm.Normal("w", 0, 1)
            new_trace = pm.sample(return_inferencedata=False)

        with pytest.raises(OSError):
            _ = pm.save_trace(new_trace, directory)

        _ = pm.save_trace(new_trace, directory, overwrite=True)
        with model:
            new_trace_copy = pm.load_trace(directory)

        assert (new_trace["w"] == new_trace_copy["w"]).all()
Beispiel #7
0
def create_and_run_model(
        cases_filename,
        covariates_filename,
        dist_params,
        num_baseline_intensities,
        num_draws=100,
        num_burn=100,
        fixed_r_c=None,
        fixed_r_h=None,
        discharges_filename=None,
        output_prefix='',
        step='slice'
):
    """
    Perform a fit of a given set of cases, covariates, and optionally
    discharges, with specified distribution parameters, and using the
    likelihood form defined in likelihood.py. Output the MCMC history to
    disk. Output a plot of the history to disk. Output a summary of the fitted
    parameters to the screen and to disk.
    """

    likelihood_obj = CareHomeLikelihood(
        cases_filename, covariates_filename, dist_params, discharges_filename,
        fixed_r_c=fixed_r_c, fixed_r_h=fixed_r_h
    )
    model = get_model(
        likelihood_obj,
        fixed_r_c=fixed_r_c,
        fixed_r_h=fixed_r_h,
        num_baseline_intensities=num_baseline_intensities
    )
    trace = mcmc_fit(model, num_draws=num_draws, num_burn=num_burn, step=step)
    plot_trace(
        trace,
        output_prefix.parent / (output_prefix.name + 'traceplot.pdf')
    )
    pm.save_trace(
        trace,
        output_prefix.parent / (output_prefix.name + 'trace.dat')
    )
    summary = pm.summary(trace, round_to="none")
    print_result(
        summary,
        likelihood_obj,
        filename=output_prefix.parent / (output_prefix.name + 'summary.txt')
    )
Beispiel #8
0
    def test_save_new_model(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory
        with pm.Model() as model:
            w = pm.Normal('w', 0, 1)
            new_trace = pm.sample()

        with pytest.raises(OSError):
            _ = pm.save_trace(new_trace, directory)

        _ = pm.save_trace(new_trace, directory, overwrite=True)
        with model:
            new_trace_copy = pm.load_trace(directory)

        assert (new_trace['w'] == new_trace_copy['w']).all()
Beispiel #9
0
    def test_save_new_model(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory
        with pm.Model() as model:
            w = pm.Normal('w', 0, 1)
            new_trace = pm.sample()

        with pytest.raises(OSError):
            _ = pm.save_trace(new_trace, directory)

        _ = pm.save_trace(new_trace, directory, overwrite=True)
        with model:
            new_trace_copy = pm.load_trace(directory)

        assert (new_trace['w'] == new_trace_copy['w']).all()
Beispiel #10
0
def add_receiver_theory_points(
        ds,
        output_file=None,
        theory_ds='POLARBEAR/pb2a_cryostat_japan_measurements.txt'):

    if type(ds) is str:
        ds = mf.Dataset(from_file=ds)
    elif type(ds) is mf.Dataset:
        ds = ds.copy()
    else:
        raise ValueError('Type of dataset argument not recoginized.')

    if type(theory_ds) is str:
        theoryds = mf.Dataset(from_file=theory_ds)
    elif type(theory_ds) is mf.Dataset:
        theoryds = theory_ds.copy()
    else:
        raise ValueError('Type of theory dataset argument not recoginized.')

    dsuse = ds.subset_from_labels(MEASURED_RECEIVER_TARGET_NAMES)

    with mf.AlignDatasets(ds1=dsuse, ds2=theoryds, fitmap={'s':
                                                           False}) as tamodel:
        tatrace = pm.sample(2000,
                            tune=5500,
                            init='advi+adapt_diag',
                            nuts_kwargs={
                                'target_accept': .90,
                                'max_treedepth': 25
                            },
                            error_scale1=1.,
                            error_scale2=1.)
    pm.save_trace(tatrace)
    pm.traceplot(tatrace)
    plt.show()
    fptheory = theoryds.subset_from_marker('FOCALPLANE')
    pos, err = tamodel.use_transform_trace(fptheory.to_tensors(), tatrace)
    newtheoryarray = mf.DatasetArrays(pos=np.mean(pos, axis=0),
                                      err=np.std(pos, axis=0),
                                      serr=np.std(pos, axis=0))
    newfptheory = fptheory.remake_from_arrays(newtheoryarray)
    for p in newfptheory.values():
        ds.add_point(p)
    print(ds)
Beispiel #11
0
    def test_save_and_load(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        trace2 = pm.load_trace(directory, model=TestSaveLoad.model())

        for var in ('x', 'z'):
            assert (self.trace[var] == trace2[var]).all()
Beispiel #12
0
    def test_save_and_load(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        trace2 = pm.load_trace(directory, model=TestSaveLoad.model())

        for var in ('x', 'z'):
            assert (self.trace[var] == trace2[var]).all()
Beispiel #13
0
def sampleandsave(f):
    """Sample from the model in context.

    """
    if not exists(f):

        # sample and save

        trace = pm.sample(8000, tune=2000, chains=1)
        pm.save_trace(trace, f)
        pm.traceplot(trace, compact=True)
        rcParams["font.size"] = 14
        plt.savefig(f"{f}/traceplot.png")
        ppc = pm.sample_posterior_predictive(trace)["$Y$"]
        np.savez_compressed(f"{f}/ppc.npz", ppc)

    else:

        trace = pm.load_trace(f)

    return trace
Beispiel #14
0
    def save(self, output, trace=None):
        if trace is not None:
            trace_fname = pm.save_trace(trace)
        else:
            trace_fname = None

        with open(output, "wb") as buff:
            pickle.dump(
                {
                    'galaxies': self.galaxies,
                    'trace': trace_fname,
                    'n_chains': trace.nchains if trace is not None else None,
                }, buff)
Beispiel #15
0
def test_pm():
    # This takes 5min to run
    # Hiding this import in here
    import pymc3 as pm

    parm_dict = mcmc.grab_parmdict()
    outroot = os.path.join(resource_filename('frb', 'tests'), 
                           'files', 'mcmc')

    with mcmc.pm_four_parameter_model(parm_dict, beta=3.):
        # Sample
        #trace = pm.sample(40000, tune=2000) # This defaults to 4 chains
        trace = pm.sample(1000, tune=500) # This defaults to 4 chains
        # Save the traces -- Needs to be done before the plot
        pm.save_trace(trace, directory=outroot, overwrite=True)
        print("All done with the 4 parameter, beta=3 run ")
        # Save a plot
        plt.clf()
        _ = pm.plot_trace(trace)
        #plt.savefig(os.path.join(outroot, 'traceplot.png'))
        # Parameters
        jdict = utils.jsonify(parm_dict)
        utils.savejson(os.path.join(outroot, 'parms.json'), jdict, easy_to_read=True)
Beispiel #16
0
    def write_traces(self, destination, user=None):
        """ Create directory for each user. In each user directory create folders for each model.
        Each chain goes inside a directory, and each directory contains a metadata json file,
        and a numpy compressed file.

        :param destination: Directory to save to.
        :type destination: Union[str, Path]
        :param user: Only write traces of this user.
        :type: int
        """
        if isinstance(destination, str):
            destination = Path(destination)

        if user:
            if user not in self.traces.keys():
                warnings.warn(f"User {user} not found!")
                return
            elif not self.traces[user]:
                warnings.warn(f"No traces for user {user}!")
                return

        # ToDo: parallelize.
        print("Writing traces to files... ")
        for u, models in self.traces.items():
            # If user given, only process this one.
            if user and u != user:
                continue
            # User folder.
            user_folder = destination / f'user_{u}'
            for model_name, trace in models.items():
                # Create model folder.
                model_folder = user_folder / model_name
                model_folder.mkdir(parents=True, exist_ok=True)
                pm.save_trace(trace,
                              directory=str(model_folder),
                              overwrite=True)
        print("Done.\n")
Beispiel #17
0
    def test_save_and_load(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp("data"))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        trace2 = pm.load_trace(directory, model=TestSaveLoad.model())

        for var in ("x", "z"):
            assert (self.trace[var] == trace2[var]).all()

        assert self.trace.stat_names == trace2.stat_names
        for stat in self.trace.stat_names:
            assert all(self.trace[stat] == trace2[stat]), (
                "Restored value of statistic %s does not match stored value" %
                stat)
Beispiel #18
0
    def test_sample_ppc(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            ppc = pm.sample_ppc(self.trace)

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_ppc(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()
Beispiel #19
0
    def test_sample_posterior_predictive(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp("data"))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        rng = np.random.RandomState(10)

        with TestSaveLoad.model(rng_seeder=rng):
            ppc = pm.sample_posterior_predictive(self.trace)

        rng = np.random.RandomState(10)

        with TestSaveLoad.model(rng_seeder=rng):
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_posterior_predictive(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()
Beispiel #20
0
    def test_sample_ppc(self, tmpdir_factory):
        directory = str(tmpdir_factory.mktemp('data'))
        save_dir = pm.save_trace(self.trace, directory, overwrite=True)

        assert save_dir == directory

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            ppc = pm.sample_ppc(self.trace)

        seed = 10
        np.random.seed(seed)
        with TestSaveLoad.model():
            trace2 = pm.load_trace(directory)
            ppc2 = pm.sample_ppc(trace2)

        for key, value in ppc.items():
            assert (value == ppc2[key]).all()
Beispiel #21
0
model = BaseModel(tspan,
                  county_info, [
                      "../data/ia_effect_samples/{}_{}.pkl".format(disease, i)
                      for i in range(100)
                  ],
                  include_eastwest=use_eastwest,
                  include_demographics=use_age)

print("Sampling parameters on the training set.")
trace = model.sample_parameters(target_train,
                                samples=num_samples,
                                tune=100,
                                target_accept=0.95,
                                max_treedepth=15,
                                chains=num_chains,
                                cores=num_cores)

with open(filename_model, "wb") as f:
    pkl.dump(model.model, f)

with model.model:
    pm.save_trace(trace, filename_params, overwrite=True)

print("Sampling predictions on the testing set.")
pred = model.sample_predictions(target_test.index, target_test.columns, trace)
with open(filename_pred, 'wb') as f:
    pkl.dump(pred, f)

# for file in [filename_params, filename_pred]:
#     set_file_permissions(file, uid=46836, gid=10033)

annInput = theano.shared(XsTrain)
annTarget = theano.shared(YsTrain)
errAnnInput = theano.shared(errXsTrain)
errAnnTarget = theano.shared(errYsTrain)

neural_network = construct_nn(annInput, errAnnInput, annTarget, errAnnTarget)

print("Starting the training of the BNN...")

if not os.path.exists(cache_file_bnn):

    with neural_network:
        #fit model
        trace = pm.sample(draws=nsamples,
                          init='advi+adapt_diag',
                          n_init=ninit,
                          tune=ninit // 2,
                          chains=nchains,
                          cores=ncores,
                          nuts_kwargs={'target_accept': 0.90},
                          discard_tuned_samples=True,
                          compute_convergence_checks=True,
                          progressbar=False)
    pm.save_trace(trace, directory=cache_file_bnn)
else:
    trace = pm.load_trace(cache_file_bnn, model=neural_network)

print("Done...")
Beispiel #23
0
def get_SIR(x, y, y0, country, forecast_len=0, load_post=False):
    '''
    If 'forecast_len' is nonzero, attempts to load a trace corresponding to the
    country of interest from the directory 'traces' and retrieves predicted numbers
    of infected and susceptible patients 'forecast_len' days into the future after the 
    1st case is detected in the country.
    '''

    # If in 'prediction mode', modify x, y to reflect forecast length
    if forecast_len != 0:
        ext = np.arange(1, forecast_len + 1).astype(float)
        ext += x[-1]
        x = np.append(x, ext)
        y = np.empty((x.shape[0], y.shape[1]))

    # SIR Model
    # p[0]: beta, p[1]: lambda
    def SIR(y, t, p):
        ds = -p[0] * y[0] * y[1]  # Susceptible differential
        di = p[0] * y[0] * y[1] - p[1] * y[1]  # Infected differential
        return [ds, di]

    # Initialize ODE
    sir_ode = DifferentialEquation(func=SIR,
                                   times=x,
                                   n_states=2,
                                   n_theta=2,
                                   t0=0)

    load_dir = osp.join('traces', country.lower())

    with pm.Model() as model:
        sigma = pm.HalfNormal('sigma', 3, shape=2)

        # R0 is bounded below by 1 because we see an epidemic has occured
        R0 = pm.Normal('R0', 2, 3)

        lmbda = pm.Normal('lambda', 0.1, 0.1)

        beta = pm.Deterministic('beta', lmbda * R0)

        print('Setting up model for ' + country)
        sir_curves = sir_ode(y0=y0, theta=[beta, lmbda])

        y_obs = pm.Normal('y_obs', mu=sir_curves, sigma=sigma, observed=y)

        if forecast_len == 0:
            trace = pm.sample(2000,
                              tune=1000,
                              cores=2,
                              chains=2,
                              progressbar=True)

            # Save trace
            pm.save_trace(trace, load_dir, overwrite=True)

            # Get the posterior
            post = pm.sample_posterior_predictive(trace, progressbar=True)

            out_post = post
        else:
            # Load trace
            print('Loading trace')
            trace = pm.load_trace(load_dir)

            print('Computing posterior')
            #Get posterior
            if not load_post:
                post = pm.sample_posterior_predictive(trace[500:],
                                                      progressbar=True)
                out_post = post
                with open(country + '_post.pkl', 'wb') as buff:
                    pickle.dump({'post': post}, buff)

            else:
                with open(country + '_post.pkl', 'rb') as buff:
                    data = pickle.load(buff)
                out_post = data['post']

    print('Done')

    return trace, out_post, x
    #Switchpoint
    tau = pm.DiscreteUniform("tau", lower=0, upper=len(data) - 1)
    #Prior when t <= tau
    mu_1 = pm.Normal("mu_1", mu=280, sd=20)
    sd_1 = pm.HalfNormal("sd_1", sigma=40)
    #Prior when t > tau
    mu_2 = pm.Normal("mu_2", mu=280, sd=20)
    sd_2 = pm.HalfNormal("sd_2", sigma=40)
    #Observations
    idx = np.arange(len(data))
    mu_t = pm.math.switch(tau > idx, mu_1, mu_2)
    sd_t = pm.math.switch(tau > idx, sd_1, sd_2)
    observations = pm.Normal("observations", mu=mu_t, sd=sd_t, observed=data)

#Perform inference
with model:
    step = pm.NUTS()
    trace = pm.sample(50000, tune=5000, step=step)

#Save summary
summary = pm.summary(trace)
with open('sp.summary', 'w') as f:
    summary.to_string(f)

#Plot and save posterior traces
pm.save_trace(trace, 'switchpoint.trace', overwrite=True)
az.plot_trace(trace)
plt.savefig(
    '/home/gerardo/Desktop/Projects/PGA-Analysis/reports/figures/driving-distance-pymc3-posteriors.png'
)
plt.show()
Beispiel #25
0
def main():

    # load the data
    df = pd.read_csv("../../assets/data/HS.csv", index_col=0)

    # define items to keep
    item_names = [
        "visual",
        "cubes",
        "paper",
        "flags",
        "general",
        "paragrap",
        "sentence",
        "wordc",
        "wordm",
        "addition",
        "code",
        "counting",
        "straight",
        "wordr",
        "numberr",
        "figurer",
        "object",
        "numberf",
        "figurew",
    ]

    # define the factor structure
    factors = np.array([
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 1, 0],
    ])

    paths = np.array([
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 1],
        [0, 0, 0, 0, 0],
    ])

    # iterate over the two schools
    for school, sdf in df.groupby("school"):

        # define the path to save results
        f = f"../data/BSEM examples/{school}"

        # select the 19 commonly used variables
        items = sdf[item_names]

        # for numerical convenience, standardize the data
        items = (items - items.mean()) / items.std()

        with pm.Model():

            # construct the model
            bsem(items, factors, paths)

            if not exists(f):

                # sample and save
                trace = pm.sample(chains=2)  # 19000, tune=1000,
                pm.save_trace(trace, f)

            else:

                trace = pm.load_trace(f)

        pm.traceplot(trace, compact=True)
        rcParams["font.size"] = 14
        plt.savefig(f"{f}/traceplot.png")

        # create a nice summary table
        loadings = pd.DataFrame(
            trace[r"$\Lambda$"].mean(axis=0).round(3),
            index=[v.title() for v in item_names],
            columns=["Spatial", "Verbal", "Speed", "Memory", "g"],
        )
        loadings.to_csv(f"{f}/loadings.csv")
        print(tabulate(loadings, tablefmt="pipe", headers="keys"))
        #
        # # correlations = pd.DataFrame(
        # #     trace[r"$\Psi$"].mean(axis=0).round(3),
        # #     index=["Spatial", "Verbal", "Speed", "Memory", "g"],
        # #     columns=["Spatial", "Verbal", "Speed", "Memory", "g"],
        # # )
        # # correlations.to_csv(f"{f}/factor_correlations.csv")
        #
        _paths = pd.DataFrame(
            trace[r"$\Gamma$"].mean(axis=0).round(3),
            index=["Spatial", "Verbal", "Speed", "Memory", "g"],
            columns=["Spatial", "Verbal", "Speed", "Memory", "g"],
        )
        _paths.to_csv(f"{f}/factor_paths.csv")
        print(tabulate(_paths, tablefmt="pipe", headers="keys"))
with pm.Model():
    params = []
    starts = {}
    starts_arr = []

    for i in range(one_d_size):
        # param = one_rho[i] + np.random.randn()
        # params.append(pm.Uniform('d_{}'.format(i), lower=-1, upper=3, ))
        params.append(pm.Bound(pm.Normal, lower=-1.0)('d_{}'.format(i), mu=0.0, sigma=4.0))
        start = 0
        if one_Tb[i] == 0:
            start = one_rho[i] + np.random.randn()
            starts['d_{}'.format(i)] = start
        starts_arr.append(start)

    np.save("/Users/sabrinaberger/Library/Mobile Documents/com~apple~CloudDocs/CosmicDawn/T2D2 Model/STAT_DATA/CORR_DATA/inital_{}_{}.npy".format(z, one_d_size), starts_arr)

    prm = tt.as_tensor_variable(params)

    # use a DensityDist (use a lamdba function to "call" the Op)
    pm.DensityDist('likelihood', lambda v: logl(v), observed={'v': prm})

    trace = pm.sample(ndraws, tune=nburn, cores=4, start=starts)
    pm.save_trace(trace, directory="/Users/sabrinaberger/Library/Mobile Documents/com~apple~CloudDocs/CosmicDawn/T2D2 Model/STAT_DATA/TRACES/z_{}_{}.trace".format(z, one_d_size), overwrite=True)

# samples_pymc3 = np.vstack((trace['d_0'], trace['d_1'], trace['d_2'], trace['d_3'], trace['d_4'], trace['d_5'], trace['d_6'], trace['d_7'])).T
# fig = corner.corner(samples_pymc3, labels=["d_0", "d_1", "d_2", "d_3", "d_4", "d_5", "d_6", "d_7"])
# plt.show()

Beispiel #27
0
	m1 = pm.Uniform('m1',lower=1, upper=7)
	m2 = pm.Uniform('m2',lower=1, upper=6)
	m3 = pm.Uniform('m3',lower=1, upper=6)
	p1 = pm.Normal('p1',mu=0.2,sd=10)
	p2 = pm.Uniform('p2',lower=1, upper=6)
	p3 = pm.Uniform('p3',lower=1, upper=6)		
	trace = pm.sample(100)	
	'''
    #pm.traceplot(trace)
    alpha = pm.Normal('alpha', mu=0, sd=10)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=4)
    pInfect = pm.Uniform('pInfect', 0.01, 0.5, testval=0.05)
    params = [2, 4, 3, 0.2, 0.6, 0.3, 0.025, 0.88, 8, 0.56]
    yhat = pm.Deterministic(
        'yhat', alpha + beta * step(24 * 7, 6, params, pdf, gdf, pInfect))
    #func = lambda a,b,ps,d1,d2,pI: a+b*step(24*7,6,ps,d1,d2,pI)
    #yhat = func(a=alpha,b=beta,ps=params,d1=pdf,d2=gdf,pI=pInfect)
    likelihood = pm.Normal('y', mu=yhat, sd=sigma, observed=y)

    start = find_MAP()
    #step=NUTS()
    step = Metropolis()

    #trace = pm.sample(100,njobs=4,start=start, progressbar=True, verbose=False) #tune=1000
    trace = pm.sample(100, njobs=4)  #tune=1000

pm.traceplot(trace)
#print(pm.summary(trace))
pm.save_trace(trace, r'f:/_MU/model.trace')
Beispiel #28
0
def run_simultaneous_hierarchial(recalculate=False,
                                 sample_size=None,
                                 max_ngals=None,
                                 outfolder='hierarchical-model'):
    enc = OrdinalEncoder(dtype=np.int32)
    sid_list = pd.read_csv('lib/subject-id-list.csv').values.T[0]

    if os.path.isfile('Xall.npy') and not recalculate:
        X_all = np.load('Xall.npy')
    else:
        X_all = make_X_all(sid_list)
        np.save('Xall.npy', X_all)

    # remove all points with weight of zero (or less..?)
    all_gal_idx, all_arm_idx = enc.fit_transform(X_all[:, [3, 4]]).T
    if max_ngals is not None and max_ngals <= all_gal_idx.max():
        gals = np.random.choice(np.arange(all_gal_idx.max() + 1),
                                max_ngals,
                                replace=False)
    else:
        gals = np.arange(len(all_gal_idx.max() + 1))
    X_masked = X_all[(X_all.T[2] > 0) & np.isin(all_gal_idx, gals)]
    sample = np.random.choice(len(X_masked), size=sample_size,
                              replace=False) if sample_size else np.arange(
                                  len(X_masked))
    X = X_masked[sample]

    t, R, point_weights = X.T[:3]
    # encode categorical variables into an index
    enc = OrdinalEncoder(dtype=np.int32)
    gal_idx, arm_idx = enc.fit_transform(X[:, [3, 4]]).T
    n_gals = len(np.unique(gal_idx))
    n_unique_arms = len(np.unique(arm_idx))

    print('{} galaxies, {} spiral arms, {} points'.format(
        n_gals, n_unique_arms, len(X)))

    with pm.Model() as hierarchical_model:
        print('Defining model')
        # Hyperpriors (informative for now)
        mu_psi = pm.Uniform('mu_psi', lower=0, upper=80, testval=15)
        # sigma_psi = pm.Gamma('sigma_psi', alpha=2, beta=10)
        sigma_psi = pm.HalfCauchy('sigma_psi', beta=1)
        psi_offset = pm.Normal('psi_offset', mu=0, sd=1, shape=n_gals)
        psi = pm.Deterministic('psi', mu_psi + sigma_psi * psi_offset)
        psi_radians = psi * np.pi / 180

        a = pm.Uniform('a', lower=0, upper=200, testval=1, shape=n_unique_arms)

        # define our equation for mu_r
        r_est = (a[arm_idx] / 100 * tt.exp(tt.tan(psi_radians[gal_idx]) * t))

        # define our expected error on r here we assume this sigma is the
        # same for all galaxies (not necessarily true)
        base_sigma = pm.HalfCauchy('sigma', beta=1, testval=0.02)
        sigma_y = theano.shared(np.asarray(np.sqrt(point_weights),
                                           dtype=theano.config.floatX),
                                name='sigma_y')
        sigmas = base_sigma / sigma_y

        # define our likelihood function
        likelihood = pm.Normal('R_like', mu=r_est, sd=sigmas, observed=R)

    with hierarchical_model:
        trace = pm.sample(2000, tune=1000, cores=2, target_accept=0.95)
    if outfolder is not None:
        traces_dir = os.path.join('uniform-traces', outfolder)
    try:
        os.mkdir(traces_dir)
    except FileExistsError:
        shutil.rmtree(traces_dir)
    pm.save_trace(trace, directory=traces_dir, overwrite=True)
    pm.traceplot(trace, varnames=['mu_psi', 'sigma_psi', 'sigma'])
    plt.show()
def main(input_dir, output_dir, dataset, model_type, n_samples, n_tune, target_accept, n_cores, seed, init, profile):
    '''Fit log-parabola model to DATASET. 

    Parameters
    ----------
    input_dir : [type]
        input directory containing subdirs for each instrument with dl3 data
    output_dir : [type]
        where to save the results. traces and two plots
    dataset : string
        telescope name
    model_type : string
        whether to use the profile likelihood ('wstat' or 'profile') or not ('full')
    n_samples : int
        number of samples to draw
    n_tune : int
        number of tuning steps
    target_accept : float
        target accept fraction for the pymc sampler
    n_cores : int
        number of cpu cores to use
    seed : int
        random seed
    init : string
        pymc init string
    profile : bool
        whether to output debugging/profiling information to the console
    Raises
    ------
    NotImplementedError
        This does not yet work on the joint dataset. but thats good enough for me.
    '''
    np.random.seed(seed)

    if dataset == 'joint':
        #TODO need to calculate mu_b for each observation independently.
        raise NotImplementedError('This is not implemented for the joint dataset yet.')
        # observations, lo, hi = load_joint_spectrum_observation(input_dir)
    else:
        p = os.path.join(input_dir, dataset)
        observations, lo, hi = load_spectrum_observations(p)

    prepare_output(output_dir)

    # TODO: this has to happen for every observation independently
    exposure_ratio = observations[0].alpha[0]
    # print(exposure_ratio)
    on_data, off_data = get_observed_counts(observations)

    integrator = init_integrators(observations)

    print('On Data')
    display_data(on_data)

    print('Off Data')
    display_data(off_data)
    
    print('--' * 30)
    print(f'Fitting data for {dataset} in {len(observations)} observations.  ')
    print(f'Using {len(on_data)} bins with { on_data.sum()} counts in on region and {off_data.sum()} counts in off region.')
    print(f'Fit range is: {(lo, hi) * u.TeV}.')
    model = pm.Model(theano_config={'compute_test_value': 'ignore'})
    with model:
        # amplitude = pm.TruncatedNormal('amplitude', mu=4, sd=1, lower=0.01, testval=4)
        # alpha = pm.TruncatedNormal('alpha', mu=2.5, sd=1, lower=0.00, testval=2.5)
        # beta = pm.TruncatedNormal('beta', mu=0.5, sd=0.5, lower=0.00000, testval=0.5)
        amplitude = pm.HalfFlat('amplitude', testval=4)
        alpha = pm.HalfFlat('alpha', testval=2.5)
        beta = pm.HalfFlat('beta', testval=0.5)

        mu_s = forward_fold_log_parabola_symbolic(integrator, amplitude, alpha, beta, observations)
        # mu_s = forward_fold_log_parabola_analytic(amplitude, alpha, beta, observations)

        if model_type == 'wstat':
            print('Building profiled likelihood model')
            mu_b = pm.Deterministic('mu_b', calc_mu_b(mu_s, on_data, off_data, exposure_ratio))
        else:
            print('Building full likelihood model')
            mu_b = pm.HalfFlat('mu_b', shape=len(off_data))

        pm.Poisson('background', mu=mu_b, observed=off_data, shape=len(off_data))
        pm.Poisson('signal', mu=mu_s + exposure_ratio * mu_b, observed=on_data, shape=len(on_data))


    print('--' * 30)
    print('Model debug information:')
    for RV in model.basic_RVs:
        print(RV.name, RV.logp(model.test_point))

    if profile:
        model.profile(model.logpt).summary()

    print(model.check_test_point())

    print('--' * 30)
    print('Plotting landscape:')
    fig, _ = plot_landscape(model, off_data)
    fig.savefig(os.path.join(output_dir, 'landscape.pdf'))

    print('--' * 30)
    print('Printing  graphs:')
    theano.printing.pydotprint(mu_s, outfile=os.path.join(output_dir, 'graph_mu_s.pdf'), format='pdf', var_with_name_simple=True)  
    theano.printing.pydotprint(mu_s + exposure_ratio * mu_b, outfile=os.path.join(output_dir, 'graph_n_on.pdf'), format='pdf', var_with_name_simple=True)  


    print('--' * 30)
    print('Sampling likelihood:')
    with model:
        trace = pm.sample(n_samples, cores=n_cores, tune=n_tune, init=init, seed=[seed] * n_cores)

    print('--' * 30)
    print(f'Fit results for {dataset}')
    print(trace['amplitude'].mean(), trace['alpha'].mean(), trace['beta'].mean())
    print(np.median(trace['amplitude']), np.median(trace['alpha']), np.median(trace['beta']))

    print('--' * 30)
    # print('Plotting traces')
    # plt.figure()
    # varnames = ['amplitude', 'alpha', 'beta'] if model_type != 'full' else ['amplitude', 'alpha', 'beta', 'mu_b']
    # pm.traceplot(trace, varnames=varnames)
    # plt.savefig(os.path.join(output_dir, 'traces.pdf'))

    p = os.path.join(output_dir, 'num_samples.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_samples}}}')

    p = os.path.join(output_dir, 'num_chains.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_cores}}}')
    
    p = os.path.join(output_dir, 'num_tune.txt')
    with open(p, "w") as text_file:
        text_file.write(f'\\num{{{n_tune}}}')

    plt.figure()
    pm.energyplot(trace)
    plt.savefig(os.path.join(output_dir, 'energy.pdf'))

    # plt.figure()
    # pm.autocorrplot(trace, burn=n_tune)
    # plt.savefig(os.path.join(output_dir, 'autocorr.pdf'))
    
    plt.figure()
    pm.forestplot(trace, varnames=['amplitude', 'alpha', 'beta'])
    plt.savefig(os.path.join(output_dir, 'forest.pdf'))
    

    trace_output = os.path.join(output_dir, 'traces')
    print(f'Saving traces to {trace_output}')
    with model:
        pm.save_trace(trace, trace_output, overwrite=True)
    with model as model:
        return pm.find_MAP(model=model)


def sample(model):

    with model as model:
        trace = pm.sample(2000,
                          tune=3800,
                          init='advi+adapt_diag',
                          nuts_kwargs={
                              'target_accept': .98,
                              'max_treedepth': 25
                          })
        #trace = pm.sample(2000, tune=5500, init = 'jitter+adapt_diag', nuts_kwargs={'target_accept': .90, 'max_treedepth': 25})

        return trace


if __name__ == '__main__':

    #model = load_multi_model_moons()
    model = load_model_align_primary()
    #model = load_model_primary()
    #model = load_multi_model_primary()
    print(model.vars, model.test_point)
    trace = sample(model)
    pm.save_trace(trace)
    pm.traceplot(trace)
    plt.show()
Beispiel #31
0
if not os.path.exists(cache_file_hier):

    with neural_network:
        #fit model
        trace_hier = pm.sample(draws=nsamples_hier,
                               init='advi+adapt_diag',
                               n_init=ninit,
                               tune=ninit // 2,
                               chains=nchains_hier,
                               cores=ncores_hier,
                               nuts_kwargs={'target_accept': 0.90},
                               discard_tuned_samples=True,
                               compute_convergence_checks=True,
                               progressbar=False)
    pm.save_trace(trace_hier, directory=cache_file_hier)
else:
    trace_hier = pm.load_trace(cache_file_hier, model=neural_network)

print("Done...")

if not os.path.exists(cache_file_samples):

    samples_tmp = defaultdict(list)
    samples = {}

    for layer_name in layer_names:
        for mu, sd in zip(
                trace_hier.get_values(layer_name,
                                      burn=nsamples_hier // 2,
                                      combine=True),
                         observed=tiltslx)
    tsly_obs = pm.Normal('tsly_obs',
                         mu=tsly_mod,
                         sigma=tilt_std,
                         observed=tiltsly)
    tstx_obs = pm.Normal('tstx_obs',
                         mu=tstx_mod,
                         sigma=tilt_std,
                         observed=tiltstx)
    tsty_obs = pm.Normal('tsty_obs',
                         mu=tsty_mod,
                         sigma=tilt_std,
                         observed=tiltsty)

    x_obs = pm.Normal('x_obs', mu=x_mod, sigma=gps_std, observed=gps)
    stack_obs = pm.Normal('stack_obs',
                          mu=stack_mod,
                          sigma=tilt_std * 1e+6,
                          observed=stack)
    trace = pm.sample(Niter, init='advi', tune=100, target_accept=0.85)
map_estimate = pm.find_MAP(model=model)
results = {}
results['MAP'] = map_estimate
results['iterations'] = Niter
pickle.dump(results,
            open(path_results + 'res' + str(Niter) + '_UF.pickle', 'wb'))
pm.save_trace(trace,
              path_results + 'trace' + str(Niter) + '_UF',
              overwrite=True)
#pm.traceplot(trace)
Beispiel #33
0
                        help='Subject id of galaxy to perform trace on')
    parser.add_argument('--outfolder',
                        '-o',
                        metavar='/path/to/directory',
                        default=False,
                        help='Output directory')
    parser.add_argument('--plot',
                        '-p',
                        action='store_true',
                        default='.',
                        help='Should plot trace and show to screen')
    args = parser.parse_args()

    arms = get_arms(args.subject_id)
    pa, sigma_pa = arms[0].get_parent().get_pitch_angle(arms)
    gal_pa_est = pa * arms[0].chirality

    trace = get_logsp_trace_from_arms(arms)

    if args.outfolder:
        traces_dir = os.path.join(str(args.outfolder), str(args.subject_id))
        try:
            os.mkdir(traces_dir)
        except FileExistsError:
            pass
        pm.save_trace(trace, directory=traces_dir, overwrite=True)

    if args.plot:
        pm.traceplot(trace, lines={'psi': gal_pa_est})
        plt.show()