예제 #1
0
    def _build_model(self, sharpes, corr):
        """
        Build the entire author model (in one function). The model is
        sufficiently simple to specify entirely in one function.

        Parameters
        ----------
        sharpes : pd.DataFrame
            Long-format DataFrame of in-sample Sharpe ratios (from user-run
            backtests), indexed by user, algorithm and code ID.
            Note that currently, backtests are deduplicated based on code id.
            See fit_authors for more information.
        corr : np.ndarray
            Correlation matrix of returns streams (from backtests), estimated
            using Ledoit-Wolf shrinkage.
            See fit_authors for more information.
        """
        with pm.Model() as model:
            mu_global = pm.Normal('mu_global', mu=0, sd=3)

            mu_author_sd = pm.HalfNormal('mu_author_sd', sd=1)
            mu_author_raw = pm.Normal('mu_author_raw',
                                      mu=0,
                                      sd=1,
                                      shape=self.num_authors)
            mu_author = pm.Deterministic('mu_author',
                                         mu_author_sd * mu_author_raw)

            mu_algo_sd = pm.HalfNormal('mu_algo_sd', sd=1)
            mu_algo_raw = pm.Normal('mu_algo_raw',
                                    mu=0,
                                    sd=1,
                                    shape=self.num_algos)
            mu_algo = pm.Deterministic('mu_algo', mu_algo_sd * mu_algo_raw)

            mu_backtest = \
                pm.Deterministic('mu_backtest',
                                 mu_global
                                 + mu_author[self.author_to_backtest_encoding]
                                 + mu_algo[self.algo_to_backtest_encoding])

            sigma_backtest = pm.Deterministic(
                'sigma_backtest',
                tt.sqrt(APPROX_BDAYS_PER_YEAR / sharpes.meta_trading_days))

            cov = corr * sigma_backtest[:, None] * sigma_backtest[None, :]

            alpha_author = pm.Deterministic('alpha_author',
                                            mu_global + mu_author)

            alpha_algo = \
                pm.Deterministic('alpha_algo',
                                 mu_global
                                 + mu_author[self.author_to_algo_encoding]
                                 + mu_algo)

            sharpe = pm.MvNormal('sharpe',
                                 mu=mu_backtest,
                                 cov=cov,
                                 shape=self.num_backtests,
                                 observed=sharpes.sharpe_ratio)

        return model
예제 #2
0
파일: bayesian.py 프로젝트: dxcv/Skywalker
def model_best(y1, y2, samples=1000):
    """Bayesian Estimation Supersedes the T-Test

    This model runs a Bayesian hypothesis comparing if y1 and y2 come
    from the same distribution. Returns are assumed to be T-distributed.

    In addition, computes annual volatility and Sharpe of in and
    out-of-sample periods.

    This model replicates the example used in:
    Kruschke, John. (2012) Bayesian estimation supersedes the t
    test. Journal of Experimental Psychology: General.

    Parameters
    ----------
    y1 : array-like
        Array of returns (e.g. in-sample)
    y2 : array-like
        Array of returns (e.g. out-of-sample)
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    y = np.concatenate((y1, y2))

    mu_m = np.mean(y)
    mu_p = 0.000001 * 1 / np.std(y)**2

    sigma_low = np.std(y) / 1000
    sigma_high = np.std(y) * 1000
    with pm.Model() as model:
        group1_mean = pm.Normal('group1_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y1.mean())
        group2_mean = pm.Normal('group2_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y2.mean())
        group1_std = pm.Uniform('group1_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y1.std())
        group2_std = pm.Uniform('group2_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y2.std())
        nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.

        returns_group1 = StudentT('group1',
                                  nu=nu,
                                  mu=group1_mean,
                                  lam=group1_std**-2,
                                  observed=y1)
        returns_group2 = StudentT('group2',
                                  nu=nu,
                                  mu=group2_mean,
                                  lam=group2_std**-2,
                                  observed=y2)

        diff_of_means = pm.Deterministic('difference of means',
                                         group2_mean - group1_mean)
        pm.Deterministic('difference of stds', group2_std - group1_std)
        pm.Deterministic(
            'effect size', diff_of_means / pm.math.sqrt(
                (group1_std**2 + group2_std**2) / 2))

        pm.Deterministic(
            'group1_annual_volatility',
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_annual_volatility',
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'group1_sharpe', returns_group1.distribution.mean /
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_sharpe', returns_group2.distribution.mean /
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        step = pm.NUTS()

        trace = pm.sample(samples, step)
    return model, trace
예제 #3
0
    def build_model(self):

        base_numbers = self.data.n_safe.unique()
        choices = self.data.chose_risky.values

        safe_prior_mu = np.mean(np.log(base_numbers))
        safe_prior_sd = np.std(np.log(base_numbers))

        self.coords = {
            "subject":
            self.unique_subjects,
            "presentation": ['first', 'second'],
            "risky_prior_mu_regressors":
            self.design_matrices['risky_prior_mu'].design_info.term_names,
            "risky_prior_sd_regressors":
            self.design_matrices['risky_prior_sd'].design_info.term_names,
            "evidence_sd1_regressors":
            self.design_matrices['evidence_sd1'].design_info.term_names,
            "evidence_sd2_regressors":
            self.design_matrices['evidence_sd2'].design_info.term_names
        }

        with pm.Model(coords=self.coords) as self.model:

            inputs = self._get_model_input()
            for key, value in inputs.items():
                inputs[key] = pm.Data(key, value)

            def build_hierarchical_nodes(name, mu_intercept=0.0, sigma=.5):
                nodes = {}

                mu = np.zeros(self.design_matrices[name].shape[1])
                mu[0] = mu_intercept

                nodes[f'{name}_mu'] = pm.Normal(f"{name}_mu",
                                                mu=mu,
                                                sigma=sigma,
                                                dims=f'{name}_regressors')
                nodes[f'{name}_sd'] = pm.HalfCauchy(f'{name}_sd',
                                                    .5,
                                                    dims=f'{name}_regressors')
                nodes[f'{name}_offset'] = pm.Normal(
                    f'{name}_offset',
                    mu=0,
                    sd=1,
                    dims=('subject', f'{name}_regressors'))
                nodes[name] = pm.Deterministic(
                    name,
                    nodes[f'{name}_mu'] +
                    nodes[f'{name}_sd'] * nodes[f'{name}_offset'],
                    dims=('subject', f'{name}_regressors'))

                nodes[f'{name}_trialwise'] = softplus(tt.sum(nodes[name][inputs['subject_ix']] * \
                                                               np.asarray(self.design_matrices[name]), 1))

                return nodes

            # Hyperpriors for group nodes

            nodes = {}

            nodes.update(build_hierarchical_nodes('risky_prior_mu'),
                         mu_intercept=np.log(20.))
            nodes.update(build_hierarchical_nodes('risky_prior_sd'),
                         mu_intercept=1.)
            nodes.update(build_hierarchical_nodes('evidence_sd1'),
                         mu_intercept=1.)
            nodes.update(build_hierarchical_nodes('evidence_sd2'),
                         mu_intercept=1.)

            evidence_sd = tt.stack((nodes['evidence_sd1_trialwise'],
                                    nodes['evidence_sd2_trialwise']), 0)

            post_risky_mu, post_risky_sd = get_posterior(
                nodes['risky_prior_mu_trialwise'],
                nodes['risky_prior_sd_trialwise'], inputs['risky_mu'],
                evidence_sd[inputs['risky_ix'],
                            np.arange(self.data.shape[0])])

            post_safe_mu, post_safe_sd = get_posterior(
                safe_prior_mu, safe_prior_sd, inputs['safe_mu'],
                evidence_sd[inputs['safe_ix'],
                            np.arange(self.data.shape[0])])

            diff_mu, diff_sd = get_diff_dist(post_risky_mu, post_risky_sd,
                                             post_safe_mu, post_safe_sd)

            p = pm.Deterministic(
                'p', cumulative_normal(tt.log(.55), diff_mu, diff_sd))

            ll = pm.Bernoulli('ll_bernoulli', p=p, observed=choices)
예제 #4
0
def SIR_with_change_points(
    new_cases_obs,
    change_points_list,
    date_begin_simulation,
    num_days_sim,
    diff_data_sim,
    N,
    priors_dict=None,
    add_week_end_factor=False
):
    """
        Parameters
        ----------
        new_cases_obs : list or array
            Timeseries (day over day) of newly reported cases (not the total number)

        change_points_list : list of dicts
            List of dictionaries, each corresponding to one change point.

            Each dict can have the following key-value pairs. If a pair is not provided,
            the respective default is used.
                * pr_mean_date_begin_transient :     datetime.datetime, NO default
                * pr_median_lambda :                 number, same as default priors, below
                * pr_sigma_lambda :                  number, same as default priors, below
                * pr_sigma_date_begin_transient :    number, 3
                * pr_median_transient_len :          number, 3
                * pr_sigma_transient_len :           number, 0.3

        date_begin_simulation: datetime.datetime
            The begin of the simulation data

        num_days_sim : integer
            Number of days to forecast into the future

        diff_data_sim : integer
            Number of days that the simulation-begin predates the first data point in
            `new_cases_obs`. This is necessary so the model can fit the reporting delay.
            Set this parameter to a value larger than what you expect to find
            for the reporting delay.

        N : number
            The population size. For Germany, we used 83e6

        priors_dict : dict
            Dictionary of the prior assumptions

            Possible key-value pairs (and default values) are:
                * pr_beta_I_begin :        number, default = 100
                * pr_median_lambda_0 :     number, default = 0.4
                * pr_sigma_lambda_0 :      number, default = 0.5
                * pr_median_mu :           number, default = 1/8
                * pr_sigma_mu :            number, default = 0.2
                * pr_median_delay :        number, default = 8
                * pr_sigma_delay :         number, default = 0.2
                * pr_beta_sigma_obs :      number, default = 10
                * week_end_days :          tuple, default = (6,7)
                * pr_mean_weekend_factor : number, default = 0.7
                * pr_sigma_weekend_factor :number, default = 0.3

        add_week_end_factor : bool
            Whether to add the prior that cases are less reported on week ends. Multiplies the new cases numbers on weekends
            by a number between 0 and 1, given by a prior beta distribution. The beta distribution is parametrised
            by pr_mean_weekend_factor and pr_sigma_weekend_factor, and which days to consider as weekends by
            week_end_days. 6 and 7 corresponds to Saturday and Sunday respectively (the default).
        Returns
        -------
        : pymc3.Model
            Returns an instance of pymc3 model with the change points

    """
    if priors_dict is None:
        priors_dict = dict()

    default_priors = dict(
        pr_beta_I_begin=100,
        pr_median_lambda_0=0.4,
        pr_sigma_lambda_0=0.5,
        pr_median_mu=1 / 8,
        pr_sigma_mu=0.2,
        pr_median_delay=8,
        pr_sigma_delay=0.2,
        pr_beta_sigma_obs=10,
        week_end_days = (6,7),
        pr_mean_weekend_factor=0.7,
        pr_sigma_weekend_factor=0.3
    )
    default_priors_change_points = dict(
        pr_median_lambda=default_priors["pr_median_lambda_0"],
        pr_sigma_lambda=default_priors["pr_sigma_lambda_0"],
        pr_sigma_date_begin_transient=3,
        pr_median_transient_len=3,
        pr_sigma_transient_len=0.3,
        pr_mean_date_begin_transient=None,
    )

    if not add_week_end_factor:
        del default_priors['week_end_days']
        del default_priors['pr_mean_weekend_factor']
        del default_priors['pr_sigma_weekend_factor']

    for prior_name in priors_dict.keys():
        if prior_name not in default_priors:
            raise RuntimeError(f"Prior with name {prior_name} not known")
    for change_point in change_points_list:
        for prior_name in change_point.keys():
            if prior_name not in default_priors_change_points:
                raise RuntimeError(f"Prior with name {prior_name} not known")

    for prior_name, value in default_priors.items():
        if prior_name not in priors_dict:
            priors_dict[prior_name] = value
            print(f"{prior_name} was set to default value {value}")
    for prior_name, value in default_priors_change_points.items():
        for i_cp, change_point in enumerate(change_points_list):
            if prior_name not in change_point:
                change_point[prior_name] = value
                print(
                    f"{prior_name} of change point {i_cp} was set to default value {value}"
                )

    if (
        diff_data_sim
        < priors_dict["pr_median_delay"]
        + 3 * priors_dict["pr_median_delay"] * priors_dict["pr_sigma_delay"]
    ):
        raise RuntimeError("diff_data_sim is to small compared to the prior delay")
    if num_days_sim < len(new_cases_obs) + diff_data_sim:
        raise RuntimeError(
            "Simulation ends before the end of the data. Increase num_days_sim."
        )

    # ------------------------------------------------------------------------------ #
    # Model and prior implementation
    # ------------------------------------------------------------------------------ #

    with pm.Model() as model:
        # all pm functions now apply on the model instance
        # true cases at begin of loaded data but we do not know the real number
        I_begin = pm.HalfCauchy(name="I_begin", beta=priors_dict["pr_beta_I_begin"])

        # fraction of people that are newly infected each day
        lambda_list = []
        lambda_list.append(
            pm.Lognormal(
                name="lambda_0",
                mu=np.log(priors_dict["pr_median_lambda_0"]),
                sigma=priors_dict["pr_sigma_lambda_0"],
            )
        )
        for i, cp in enumerate(change_points_list):
            lambda_list.append(
                pm.Lognormal(
                    name=f"lambda_{i + 1}",
                    mu=np.log(cp["pr_median_lambda"]),
                    sigma=cp["pr_sigma_lambda"],
                )
            )

        # list of start dates of the transient periods of the change points
        tr_begin_list = []
        dt_before = date_begin_simulation
        for i, cp in enumerate(change_points_list):
            dt_begin_transient = cp["pr_mean_date_begin_transient"]
            if dt_before is not None and dt_before > dt_begin_transient:
                raise RuntimeError("Dates of change points are not temporally ordered")

            prior_mean = (
                dt_begin_transient - date_begin_simulation
            ).days  # convert the provided date format (argument) into days (a number)

            tr_begin = pm.Normal(
                name=f"transient_begin_{i}",
                mu=prior_mean,
                sigma=cp["pr_sigma_date_begin_transient"],
            )
            tr_begin_list.append(tr_begin)
            dt_before = dt_begin_transient

        # same for transient times
        tr_len_list = []
        for i, cp in enumerate(change_points_list):
            tr_len = pm.Lognormal(
                name=f"transient_len_{i}",
                mu=np.log(cp["pr_median_transient_len"]),
                sigma=cp["pr_sigma_transient_len"],
            )
            tr_len_list.append(tr_len)

        # build the time-dependent spreading rate
        lambda_t_list = [lambda_list[0] * tt.ones(num_days_sim)]
        lambda_before = lambda_list[0]

        for tr_begin, tr_len, lambda_after in zip(
            tr_begin_list, tr_len_list, lambda_list[1:]
        ):
            lambda_t = mh.smooth_step_function(
                start_val=0,
                end_val=1,
                t_begin=tr_begin,
                t_end=tr_begin + tr_len,
                t_total=num_days_sim,
            ) * (lambda_after - lambda_before)
            lambda_before = lambda_after
            lambda_t_list.append(lambda_t)
        lambda_t = sum(lambda_t_list)

        # fraction of people that recover each day, recovery rate mu
        mu = pm.Lognormal(
            name="mu",
            mu=np.log(priors_dict["pr_median_mu"]),
            sigma=priors_dict["pr_sigma_mu"],
        )

        # delay in days between contracting the disease and being recorded
        delay = pm.Lognormal(
            name="delay",
            mu=np.log(priors_dict["pr_median_delay"]),
            sigma=priors_dict["pr_sigma_delay"],
        )

        # prior of the error of observed cases
        sigma_obs = pm.HalfCauchy("sigma_obs", beta=priors_dict["pr_beta_sigma_obs"])

        # -------------------------------------------------------------------------- #
        # training the model with loaded data provided as argument
        # -------------------------------------------------------------------------- #

        S_begin = N - I_begin
        S, I, new_I = _SIR_model(
            lambda_t=lambda_t, mu=mu, S_begin=S_begin, I_begin=I_begin, N=N
        )

        new_cases_inferred = mh.delay_cases(
            new_I_t=new_I,
            len_new_I_t=num_days_sim,
            len_out=num_days_sim - diff_data_sim,
            delay=delay,
            delay_diff=diff_data_sim,
        )


        if add_week_end_factor:
            week_end_factor = pm.Beta('weekend_factor', mu=priors_dict['pr_mean_weekend_factor'],
                                                        sigma=priors_dict['pr_sigma_weekend_factor'])
            mask = np.zeros(num_days_sim - diff_data_sim)
            for i in range(num_days_sim - diff_data_sim):
                date_curr = date_begin_simulation  + datetime.timedelta(days=i + diff_data_sim + 1)
                if date_curr.isoweekday() in priors_dict['week_end_days']:
                    mask[i] = 1
            multiplication_vec = np.ones(num_days_sim - diff_data_sim) - (1-week_end_factor)*mask
            new_cases_inferred_eff  = new_cases_inferred * multiplication_vec
        else:
            new_cases_inferred_eff = new_cases_inferred

        # likelihood of the model:
        # observed cases are distributed following studentT around the model.
        # we want to approximate a Poisson distribution of new cases.
        # we choose nu=4 to get heavy tails and robustness to outliers.
        # https://www.jstor.org/stable/2290063
        num_days_data = new_cases_obs.shape[-1]
        pm.StudentT(
            name="_new_cases_studentT",
            nu=4,
            mu=new_cases_inferred_eff[:num_days_data],
            sigma=tt.abs_(new_cases_inferred[:num_days_data] + 1) ** 0.5
            * sigma_obs,  # +1 and tt.abs to avoid nans
            observed=new_cases_obs,
        )

        # add these observables to the model so we can extract a time series of them
        # later via e.g. `model.trace['lambda_t']`
        pm.Deterministic("lambda_t", lambda_t)
        pm.Deterministic("new_cases", new_cases_inferred)
        pm.Deterministic("new_cases_eff", new_cases_inferred_eff)

    return model
ax[0].plot(x, y, 'C0.')
ax[0].set_xlabel('x')
ax[0].set_ylabel('y', rotation=0)
ax[0].plot(x, y_real, 'k')
az.plot_kde(y, ax=ax[1])
ax[1].set_xlabel('y')
plt.tight_layout()

# Fit posterior with MCMC instead of analytically (for simplicity and flexibility)
# This is the same as BAP code, except we fix the noise variance to a constant.

with pm.Model() as model_g:
    w0 = pm.Normal('w0', mu=0, sd=10)
    w1 = pm.Normal('w1', mu=0, sd=1)
    #ϵ = pm.HalfCauchy('ϵ', 5)
    mu = pm.Deterministic('mu', w0 + w1 * x)
    #y_pred = pm.Normal('y_pred', mu=μ, sd=ϵ, observed=y)
    y_pred = pm.Normal('y_pred', mu=mu, sd=noiseSD, observed=y)
    trace_g = pm.sample(1000, cores=1, chains=2)

az.plot_trace(trace_g, var_names=['w0', 'w1'])

az.plot_pair(trace_g, var_names=['w0', 'w1'], plot_kwargs={'alpha': 0.1})
pml.savefig('linreg_2d_bayes_post_noncentered_data.pdf')
plt.show()

# To reduce the correlation between alpha and beta, we can center the data
x = x_orig - x_orig.mean()

# or standardize the data
#x = (x - x.mean())/x.std()
예제 #6
0
def main():

	st.title('Flu Inference')

	st.write("The full notebook can be found [here](https://github.com/benlevyx/modelling-infectious-disease/tree/master/notebooks/bayesian_model.ipynb).")

	st.write('## Gathering state-level features')

	st.write("The purpose of this section is to gather state-level features that may affect the degree to which a given state is suspectible or resistant to a virus such as the flu or Covid-19. Collecting these state-level characteristics can help us identify which features are responsible for the correlation in viral infection rates between states, and thus can also be used to quantify the correlation between states based on fundamental attributes of the states rather than just the raw wILI time series.")

	st.write("The density of a state is a natural feature to include because the denser a location, the more easily a virus can spread (look no further than NYC right now). However, it wouldn't make sense to report the density of a state because, for example, the high population density in Manhattan shouldn't be influenced by the fact that upstate New York State has a massive amount of scarsely populated land. Instead, a more sensible measure is a weighted average of the densities of each county in a given state, where the weights are the fraction of the state population that lives in the given county.")

	pred_dir = config.data / 'state_predictors'  

	# dataset that reports the land area in square miles of each county in the U.S.
	land_df = pd.read_csv(pred_dir / 'land_area.csv')

	# dataset that reports the population of each county in the U.S.
	popn_df = pd.read_csv(pred_dir / 'population.csv')


	# st.write(land_df.head())
	# st.write(popn_df.head())

	land_df = land_df[['Areaname', 'LND010190D']]
	popn_df = popn_df[['Areaname', 'PST045200D']]

	# limit analysis to Lower 48 states
	lower_48 = ["AL", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", 
            "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
            "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
            "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
            "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

	state_end = tuple(', ' + abbrev for abbrev in lower_48)

	# ignore AL and HI
	filtered_land_df = land_df[land_df.Areaname.str.endswith(state_end)]
	filtered_popn_df = popn_df[land_df.Areaname.str.endswith(state_end)]

	# There are 5 counties in Virginia that are included twice in both the land area and population datasets
	# so we need to ignore the duplicated row
	virginia_counties_df = filtered_land_df[filtered_land_df.Areaname.str.endswith(', VA')]
	indices_to_delete = []
	counties_set = set()
	for index, row in virginia_counties_df.iterrows():
	    county = row['Areaname']
	    if county not in counties_set:
	        counties_set.add(county)
	    else:
	        indices_to_delete.append(index)
	        
	filtered_land_df = filtered_land_df[~filtered_land_df.index.isin(indices_to_delete)]
	filtered_popn_df = filtered_popn_df[~filtered_popn_df.index.isin(indices_to_delete)]

	# merge land area and population datasets
	combined_df = pd.merge(filtered_land_df, filtered_popn_df, on='Areaname', how='inner')

	# extract state from Areaname column
	combined_df['state'] = combined_df.Areaname.str[-2:]
	combined_df.head()

	# rename column names
	combined_df.rename(columns={'Areaname': 'county', 'LND010190D': 'area', 'PST045200D': 'popn'}, inplace=True)

	# fill in missing value of land area of Broomfield, CO from Wikipedia page
	combined_df.loc[combined_df.county == 'Broomfield, CO', 'area'] = 33.00

	# calculate density of each county by dividing population by land area
	combined_df['density'] = combined_df['popn'] / combined_df['area']

	st.write(combined_df.head(10))

	# calculate total population of each state accross all counties
	state2pop = combined_df.groupby('state').agg({'popn': sum}).to_dict()['popn']
	combined_df['state_popn'] = [state2pop[state] for state in combined_df.state]
	combined_df.head()

	# calculate density metric for each state by weighing the density of each population by the fraction of 
	# the state population that lives in the given state
	state2density_metric = (combined_df.groupby('state').
	                        apply(lambda x: round(x['popn'] * (x['density'] ** 1) / x['state_popn'], 1))
	                        .groupby('state').sum()).to_dict()


	# sort states in order of decreasing density
	sorted_density_metrics = sorted(list(state2density_metric.values()), reverse=True)
	density_metric2state = {v: k for k, v in state2density_metric.items()}
	ordered_density_metric2state = {x: density_metric2state[x] for x in sorted_density_metrics}

	# create dataframe with this first state-level feature
	state_stats_df = pd.DataFrame(ordered_density_metric2state.keys(), columns=['density_metric'], 
	                              index=ordered_density_metric2state.values())


	st.write(state_stats_df)

	st.write("The next feature is the average latitude of each state.")

	latlong_df = pd.read_csv(pred_dir / 'statelatlong.csv')
	latlong_df.head()

	# include this latitude value in the feature dataframe
	state_stats_df1 = (pd.merge(state_stats_df, latlong_df[['Latitude', 'State']],
	                           left_index=True, right_on='State').drop(columns=['State']))
	state_stats_df1.index = ordered_density_metric2state.values()

	st.write(state_stats_df1)

	st.write("The next feature is whether each Lower 48 state borders either the Atlantic or Pacific Ocean. This can potentially be an important feature because tourists and immigrants usually fly into the country in a coastal location")

	coastal_states = set('ME NH MA RI CT NY NJ PA MD DE VA NC SC GA FL WA OR CA'.split())
	state_stats_df1['is_coastal'] = [int(state in coastal_states) for state in state_stats_df.index]

	st.write(state_stats_df1)

	st.write("A potentially important state-level feature is the number of airline passengers arriving in the state. As we've seen with Covid-19, clusters have started in particular locations because visiters have come into these places with the virus from foreigns countries. The most readily available source for this data are the 'List of airports in [state]' Wikipedia article for each state. Each of these pages contains the number of commerical passenger boardings in 2016 for each airport in the state. Although commerical passenger arrivals are not included, it's reasonable to assume that the number of boardings and arrivals are closely related to each other. The values in the dictionary below represents the sum of the number of commerical passenger arrivals for the major airports in each state. Note: the number of major airports variesby state (e.g. the only major airport in Massachusetts in Logan, there are no major airports in Delaware, and there are three major airports in Kentucky (Cincinatti, Louisville and Lexington). Finally, the number of annual boardings in each state in normalized by the population of the given state, as this metric represents the relative influence of air traffic on the given state.")

	state2passengers = {'NY': 50868391, 
	                    'PA': 15285948 + 4670954 + 636916, 
	                    'NJ': 19923009 + 589091,
	                    'MD': 13371816,
	                    'IL': round((83245472 / 2) + (22027737 / 2)),
	                    'MA': 17759044,
	                    'VA': 11470854 + 10596942 + 1777648 + 1602631,
	                    'MO': 6793076 + 5391557 + 462126,
	                    'CA': (39636042 + 25707101 + 10340164 + 5934639 + 5321603 + 5217242 
	                           + 4969366 + 2104625 + 2077892 + 1386357 + 995801 + 761298),
	                    'MI': 16847135 + 1334979 + 398508,
	                    'CO': 28267394 + 657694,
	                    'MN': 18123844,
	                    'TX': 31283579 + 20062072 + 7554596 + 6285181 + 6095545 + 4179994 + 1414376,
	                    'RI': 1803000,
	                    'GA': 50501858 + 1056265,
	                    'OH': 4083476 + 3567864 + 1019922 + 685553,
	                    'CT': 2982194,
	                    'IN': 4216766 + 360369 + 329957 + 204352,
	                    'DE': 0,
	                    'KY': 3269979 + 1631494 + 638316,
	                    'FL': (20875813 + 20283541 + 14263270 + 9194994 + 4239261 + 3100624 + 2729129 
	                           + 1321675 + 986766 + 915672 + 589860),
	                    'NE': 2127387 + 162876,
	                    'UT': 11143738,
	                    'OR': 9071154,
	                    'TN': 6338517 + 2016089 + 887103,
	                    'LA': 5569705 + 364200,
	                    'OK': 1796473 + 1342315,
	                    'NC': 21511880 + 5401714 + 848261,
	                    'KS': 781944,
	                    'WA': 21887110 + 1570652,
	                    'WI': 3496724 + 1043185 + 348026 + 314909,
	                    'NH': 995403,
	                    'AL': 1304467 + 527801 + 288209 + 173210,
	                    'NM': 2341719,
	                    'IA': 1216357 + 547786,
	                    'AZ': 20896265 + 1594594 + 705731,
	                    'SC': 1811695 + 991276 + 944849 + 553658,
	                    'AR': 958824 + 673810,
	                    'WV': 213412,
	                    'ID': 1633507,
	                    'NV': 22833267 + 1771864,
	                    'ME': 886343 + 269013,
	                    'MS': 491464 + 305157,
	                    'VT': 593311,
	                    'SD': 510105 + 272537,
	                    'ND': 402976 + 273980 + 150634 + 132557 + 68829,
	                    'MT': 553245 + 423213 + 381582 + 247816 + 176730 + 103239,
	                    'WY': 342044 + 92805}

	# population of each state according to the 2010 census
	state2popn_2010 = {
	        'AL': 4779736,
	        'AR': 2915918,
	        'AZ': 6392017,
	        'CA': 37253956,
	        'CO': 5029196,
	        'CT': 3574097,
	        'DE': 897934,
	        'FL': 18801310,
	        'GA': 9687653,
	        'IA': 3046355,
	        'ID': 1567582,
	        'IL': 12830632,
	        'IN': 6483802,
	        'KS': 2853118,
	        'KY': 4339367,
	        'LA': 4533372,
	        'MA': 6547629,
	        'MD': 5773552,
	        'ME': 1328361,
	        'MI': 9883640,
	        'MN': 5303925,
	        'MO': 5988927,
	        'MS': 2967297,
	        'MT': 989415,
	        'NC': 9535483,
	        'ND': 672591,
	        'NE': 1826341,
	        'NH': 1316470,
	        'NJ': 8791894,
	        'NM': 2059179,
	        'NV': 2700551,
	        'NY': 19378102,
	        'OH': 11536504,
	        'OK': 3751351,
	        'OR': 3831074,
	        'PA': 12702379,
	        'RI': 1052567,
	        'SC': 4625364,
	        'SD': 814180,
	        'TN': 6346105,
	        'TX': 25145561,
	        'UT': 2763885,
	        'VA': 8001024,
	        'VT': 625741,
	        'WA': 6724540,
	        'WI': 5686986,
	        'WV': 1852994,
	        'WY': 563626
	}

	state_stats_df1['airport_boardings'] = [state2passengers[state] / state2popn_2010[state]
	                                        for state in state_stats_df.index]


	st.write(state_stats_df1)

	abbrev2state = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AS': 'American Samoa',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'GU': 'Guam',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MP': 'Northern Mariana Islands',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NA': 'National',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'PR': 'Puerto Rico',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VI': 'Virgin Islands',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West Virginia',
        'WY': 'Wyoming'
}

	state2abbrev = {v: k for k, v in abbrev2state.items()}

	st.write("The next feature is the fraction of each state's population that falls into a set of age categories")

	age_df = pd.read_csv(pred_dir / 'age.csv')

	# merge age dataframe with dataframe that contains the rest of the features
	age_df['Location'] = [state2abbrev[state] for state in age_df.Location]
	state_stats_df2 = (pd.merge(state_stats_df1, age_df, left_index=True, right_on='Location')
	                  .drop(columns=['Location']))
	state_stats_df2.index = ordered_density_metric2state.values()

	st.write(state_stats_df2)

	st.write("The next feature is average temperature of each state during each of the four seasons of the year.")

	temps_df = pd.read_csv(pred_dir / 'temps.csv')

	temps_df['State'] = [state2abbrev[state] for state in temps_df.State]

	# merge temperature dataframe with dataframe that contains the rest of the features
	state_stats_df3 = (pd.merge(state_stats_df2, temps_df, left_index=True, right_on='State')
	                  .drop(columns=['State']))

	state_stats_df3.index = ordered_density_metric2state.values()

	st.write(state_stats_df3)

	st.write("It's possible that state-level political policies have an impact on the proliferation of virus infections. The Cook Partisan Voting Index taken from Wikipedia assigns a number to each state that indicates how strongly the state leads toward the Republican or Democratic Party based on recent state and federal elections. In our convention, a positive value signifies leaning Republican, while a negative value signifies leading Democratic.")

	state2partisan_score = {
	        'AL': 14,
	        'AR': 15,
	        'AZ': 5,
	        'CA': -12,
	        'CO': 1,
	        'CT': -6,
	        'DE': -6,
	        'FL': 2,
	        'GA': 5,
	        'IA': 3,
	        'ID': 19,
	        'IL': -7,
	        'IN': 9,
	        'KS': 13,
	        'KY': 15,
	        'LA': 11,
	        'MA': -12,
	        'MD': -12,
	        'ME': -3,
	        'MI': -1,
	        'MN': -1,
	        'MO': 9,
	        'MS': 9,
	        'MT': 11,
	        'NC': 3,
	        'ND': 17,
	        'NE': 14,
	        'NH': 0,
	        'NJ': -7,
	        'NM': -3,
	        'NV': -1,
	        'NY': -12,
	        'OH': 3,
	        'OK': 20,
	        'OR': -5,
	        'PA': 0,
	        'RI': -10,
	        'SC': 8,
	        'SD': 15,
	        'TN': 14,
	        'TX': 8,
	        'UT': 20,
	        'VA': -1,
	        'VT': -15,
	        'WA': -7,
	        'WI': 0,
	        'WV': 19,
	        'WY': 25
	}

	state_stats_df3['partisan_score'] = [state2partisan_score[state] for state in state_stats_df3.index]

	st.write(state_stats_df3)

	st.write("The following dataset was taken from a Stat139 problem set last semester and contains a range of socioeconomic, demographic and health indicators. These include:\n\n Cancer: prevalence of cancer per 100,000 individuals\n\n Hispanic: percent of adults that are hispanic \n\n Minority: percent of adults that are nonwhite\n\n Female: percent of adults that are female\n\n Income: median income\n\n Nodegree: percent of adults who have not completed high school\n\n Bachelor: percent of adults with a bachelor’s degree\n\nInactive: percent of adults who do not exercise in their leisure time\n\nObesity: percent of individuals with BMI > 30\n\n Cancer: prevalence of cancer per 100,000 individuals\n\n  We're not considering unemployment rate, as these rates are likely no longer accurate for many states.\n\nJust as with the density metric, the state-level value for each of these features is determined by calculating a weighted average of the measurements for each county, where the weights are the fraction of the state population that lives in the given county.")

	county_metrics_df = pd.read_csv(pred_dir / 'county_metrics.csv')


	county_metrics_df['state'] = [state2abbrev[state] for state in county_metrics_df.state]

	county_metrics_df = county_metrics_df[county_metrics_df.state.isin(lower_48)]

	st.write(county_metrics_df.head())

	state2pop_ = county_metrics_df.groupby('state').agg({'population': sum}).to_dict()['population']
	county_metrics_df['state_popn'] = [state2pop_[state] for state in county_metrics_df.state]

	metrics = ['hispanic', 'minority', 'female', 'unemployed', 'income', 'nodegree', 'bachelor', 'inactivity',
	          'obesity', 'cancer']

	for metric in metrics:
	    state2metric = (county_metrics_df.groupby('state').
	                    apply(lambda x: round((x['population'] * x[metric]) / x['state_popn'], 3))
	                    .groupby('state').sum()).to_dict()
	    
	    denom = 1000 if metric == 'income' else 1
	    state_stats_df3[metric] = [state2metric[state] / denom for state in state_stats_df3.index]

	st.write(state_stats_df3)

	st.write("The more people travel between states, the more closely related the states should be in terms of rate of virus infections. The Census Bureau Journey to Work datset reports the number of people that commute from any given county in the county to any other county in the country. This means we can aggregate these county to county commuting flows to determine the number of people that commute between any two states. From this data, we can create a symmetric matrix where the $i,j$ and $j,i$ elements represent the number of people that commute from state $i$ to state $j$ plus the number of people that commute from state $j$ to state $i$. However, just as with the number of annual boardings in each state, the final value of the number of people who commute between two states in normalized by the popualation of the given state. This means that this commuting matrix is no longer symmetric because the populations of state $i$ and state $j$ are different.")

	commuting_df_complete = pd.read_csv(pred_dir / 'commuting.csv')

	commuting_df = commuting_df_complete[['State Name', 'State Name.1', 'Workers in Commuting Flow']]

	commuting_df.rename(columns={'State Name': 'home_state', 
                             'State Name.1': 'work_state', 
                             'Workers in Commuting Flow': 'commuters'}, 
                   inplace=True)

	lower_48_full_name = [abbrev2state[abbrev] for abbrev in lower_48]
	commuting_df = commuting_df[commuting_df.work_state.isin(lower_48_full_name)]

	commuting_df['home_state'] = [state2abbrev[state] for state in commuting_df.home_state]
	commuting_df['work_state'] = [state2abbrev[state] for state in commuting_df.work_state]

	st.write(commuting_df.head(10))

	commuting_df['commuters'] = commuting_df['commuters'].apply(lambda x: int(''.join([y for y in x if y.isdigit()])))

	commuting_groupby_df = (commuting_df.groupby(['work_state', 'home_state'], as_index=False)
	                       .agg({'commuters': 'sum'}))

	# calculate the number of commuters between two states for all pairs of states
	for work_state in state_stats_df3.index:
	    vals = []
	    for home_state in state_stats_df3.index:
	        try:
	            num1 = int((commuting_groupby_df[(commuting_groupby_df.work_state == work_state)
	                       & (commuting_groupby_df.home_state == home_state)].commuters))
	            num2 = int((commuting_groupby_df[(commuting_groupby_df.work_state == home_state)
	                       & (commuting_groupby_df.home_state == work_state)].commuters))
	            num = num1 + num2
	            
	            num /= state2popn_2010[work_state]
	            
	        except TypeError:
	            num = 0

	        vals.append(num)

	    state_stats_df3[work_state + '_dest'] = vals

	st.write(state_stats_df3)

	st.write("States that are in close proximity may be similarly affected by viruses. Therefore, we include a column for each state in the design matrix that denotes whether that given states borders each of the other states.")

	# dictionary that maps each state in the Lower 48 to the states that directly border it or are not contiguous
	# but are very close (e.g. NJ and CT)
	state2neighbors = {'AL': {'AL', 'MS', 'TN', 'FL', 'GA', 'NC', 'SC'},
	                  'GA': {'GA', 'TN', 'FL', 'AL', 'SC', 'NC', 'MS'},
	                  'FL': {'FL', 'GA', 'AL', 'MS', 'SC'},
	                  'MS': {'MS', 'AL', 'TN', 'FL', 'LA', 'AR', 'GA'},
	                  'LA': {'LA', 'TX', 'AR', 'MS', 'OK', 'AL'},
	                  'SC': {'SC', 'FL', 'GA', 'NC', 'TN'},
	                  'NC': {'NC', 'SC', 'GA', 'TN', 'VA', 'KY'},
	                  'AR': {'AR', 'LA', 'TX', 'MS', 'TN', 'OK', 'MO', 'KY'},
	                  'VA': {'VA', 'NC', 'KY', 'WV', 'TN', 'DC', 'MD', 'DE'},
	                  'MD': {'MD', 'DC', 'VA', 'WV', 'DE', 'NJ', 'PA'},
	                  'DE': {'DE', 'MD', 'DC', 'NJ', 'PA'},
	                  'NJ': {'NJ', 'DE', 'MD', 'PA', 'NY', 'NJ', 'CT'},
	                  'NY': {'NY', 'NJ', 'PA', 'CT', 'MA', 'VT'},
	                  'CT': {'CT', 'NY', 'RI', 'MA', 'NJ'},
	                  'RI': {'RI', 'CT', 'MA'},
	                  'MA': {'MA', 'CT', 'RI', 'NH', 'VT', 'NY'},
	                  'NH': {'NH', 'VT', 'ME', 'MA'},
	                  'ME': {'ME', 'NH', 'MA', 'VT'},
	                  'VT': {'VT', 'NH', 'NY', 'MA'},
	                  'PA': {'PA', 'NY', 'NJ', 'MD', 'WV', 'OH', 'DE'},
	                  'WV': {'WV', 'DC', 'MD', 'PA', 'OH', 'KY', 'VA'},
	                  'OH': {'OH', 'PA', 'WV', 'MI', 'IN', 'KY'},
	                  'MI': {'MI', 'OH', 'WI', 'IN', 'IL'},
	                  'KY': {'KY', 'WV', 'OH', 'IN', 'IL', 'MO', 'TN', 'VA', 'AR', 'NC'},
	                  'TN': {'TN', 'KY', 'VA', 'NC', 'SC', 'GA', 'AL', 'MS', 'AR', 'MO', 'IL'},
	                  'IN': {'IN', 'KY', 'OH', 'MI', 'IL', 'WI'},
	                  'IL': {'IL', 'IN', 'MI', 'WI', 'IA', 'MO', 'KY', 'TN'},
	                  'WI': {'WI', 'IL', 'MN', 'MI', 'IA'},
	                  'MN': {'MN', 'MI', 'WI', 'IA', 'ND', 'SD', 'NE', 'IL'},
	                  'IA': {'IA', 'WI', 'MN', 'IL', 'MO', 'KS', 'NE', 'SD'},
	                  'MO': {'MO', 'IA', 'IL', 'KY', 'TN', 'AR', 'OK', 'KS', 'NE'},
	                  'ND': {'ND', 'SD', 'MN', 'MT', 'WY'},
	                  'SD': {'SD', 'ND', 'MN', 'IA', 'NE', 'MT', 'WY'},
	                  'NE': {'NE', 'SD', 'IA', 'MO', 'KS', 'WY', 'CO'},
	                  'KS': {'KS', 'NE', 'IA', 'MO', 'AR', 'OK', 'CO', 'TX', 'NM'},
	                  'OK': {'OK', 'KS', 'MO', 'AR', 'TX', 'NM', 'CO', 'LA'},
	                  'TX': {'TX', 'LA', 'AR', 'OK', 'NM', 'CO'},
	                  'MT': {'MT', 'ND', 'SD', 'WY', 'ID'},
	                  'WY': {'WY', 'MT', 'ND', 'SD', 'NE', 'CO', 'UT', 'ID'},
	                  'CO': {'CO', 'WY', 'NE', 'KS', 'OK', 'TX', 'NM', 'UT', 'AZ'},
	                  'NM': {'NM', 'CO', 'KS', 'OK', 'TX', 'AZ', 'UT'},
	                  'ID': {'ID', 'MT', 'WY', 'UT', 'NV', 'WA', 'OR'},
	                  'UT': {'UT', 'ID', 'WY', 'CO', 'NM', 'AZ', 'NV'},
	                  'AZ': {'AZ', 'NM', 'CO', 'UT', 'NV', 'CA'},
	                  'WA': {'WA', 'ID', 'OR'},
	                  'OR': {'OR', 'WA', 'ID', 'NV', 'CA'},
	                  'NV': {'NV', 'ID', 'OR', 'UT', 'AZ', 'CA'},
	                  'CA': {'CA', 'OR', 'NV', 'AZ'}
	                 }

	     
	for neighboring_state in state_stats_df3.index:
	    states = [int(neighboring_state in state2neighbors[state]) for state in state_stats_df3.index]
	    state_stats_df3[neighboring_state + '_is_neighbor'] = states  
	
	st.write(state_stats_df3)

	st.write("The proportion of each state that is vaccinated may affect the number of people who are infected with the flu. Therefore, we include information on the adult and child vaccination rate for each state.")
	flu_df = pd.read_csv(pred_dir / 'flu.csv')
	flu_df['State'] = [state2abbrev[state] for state in flu_df.State]

	state_stats_df4 = (pd.merge(state_stats_df3, flu_df, left_index=True, right_on='State').drop(columns=['State']))
	state_stats_df4.index = state_stats_df3.index

	st.write(state_stats_df4)

	st.write("Smoking may also affect suspectibility to viruses such as the flu and Covid-19, so we include a feature that reports the fraction of adults who smoke in each state.")

	state2smoking_rate = {
        'AL': 20.9,
        'AR': 22.3,
        'AZ': 15.6,
        'CA': 11.3,
        'CO': 14.6,
        'CT': 12.7,
        'DE': 17.0,
        'FL': 16.1,
        'GA': 17.5,
        'IA': 17.1,
        'ID': 14.3,
        'IL': 15.5,
        'IN': 21.8,
        'KS': 17.4,
        'KY': 24.6,
        'LA': 23.1,
        'MA': 13.7,
        'MD': 13.8,
        'ME': 17.3,
        'MI': 19.3,
        'MN': 14.5,
        'MO': 20.8,
        'MS': 22.2,
        'MT': 17.2,
        'NC': 17.2,
        'ND': 18.3,
        'NE': 15.4,
        'NH': 15.7,
        'NJ': 13.7,
        'NM': 17.5,
        'NV': 17.6,
        'NY': 14.1,
        'OH': 21.1,
        'OK': 20.1,
        'OR': 16.1,
        'PA': 18.7,
        'RI': 14.9,
        'SC': 18.8,
        'SD': 19.3,
        'TN': 22.6,
        'TX': 15.7,
        'UT': 8.9,
        'VA': 16.4,
        'VT': 15.8,
        'WA': 13.5,
        'WI': 16,
        'WV': 26,
        'WY': 18.7
	}

	state_stats_df4['smoking_rate'] = [state2smoking_rate[state] / 100 for state in state_stats_df4.index]

	st.write(state_stats_df4)

	st.write("## Bayesian Model")

	st.write("### Motivation")

	st.write("Before describing the model, it's important to first discuss the motivation behind it in the first place. The wILI time series clearly show that the states are affected differently by the flu. Therefore, we wanted to determine whether there are any state-level features that account for the disrepencies between the states. If we could identify these particular features, then we'd also be able to figure out which states are intrinsically linked based on their attributes.") 
	st.write("This information would then allow us to transfer this knowledge about the flu to Covid-19. Because both the flu and Covid are viruses, we'd expect some of the underlying risk factors of flu to generalize to Covid as well. We could then take one of two routes: first, we could assess if the interstate correlations discovered from the flu data apply in the case of Covid by comparing the number of Covid cases among different states. And second, we could assume that the flu relationships apply in the case of Covid and use these insights to look deeper than just the raw Covid numbers. For example, if the flu analysis reveals that two states share many similar characteristics, and one of these states has more Covid cases per 1000 people but also has more testing, then we may believe that the second state has more case of Covid than are reported. Alternatively, we can identify states that, based on their characteristics (e.g. high density, high obesity rate), are more susceptible to a major spike in Covid cases and thus should take additional precautions when opening up their states.")

	st.write("### Model Formulation")

	st.write("If the state wILI rates are correlated with each other, then we should, in theory, be able to predict the wILI rate in a given state and for a given week from the wILI rates of all the other states for the same week. Because correlated states may have similar flu trajectories but have different raw wILI rates, it's more robust to predict the weekly percent change in wILI rather than the absolute change in wILI. This means that we want to predict the trend in the number of flu cases for each state based on the trends of all the other states at the same time.")

	st.write("The big question is obviously how to use the percent change in the wILI rate of every other state to predict the percent change in the wILI rate for a single state. Because some states are more closely correlated with a given state than others, it makes sense to predict the percent change for a given state to be a weighted average of the percent changes of the other weeks, where the weights should ideally be proportional to the underlying correlation between the two states. For example, if we were trying to predict the trend in New York, we'd take into account the trend of every other state (except for Alaska and Hawaii), but the influence of each of these states on our overall prediction for New York would vary (e.g. the influence of New Jersey and Connecticut may be high, while the influenced of Idaho and Nebraska may be low).")

	st.write("Converting this into formal notation, let's define $\\delta_i$ to be the percent change in the wILI rate between two consecutive weeks for state $i$, and define $\\alpha_{ij}$ to be the weight coefficient of state $j$ on state $i$. We predict each $\\delta_i$ as:")

	st.latex("\\delta_i \\sim N\\left(\\frac{\\sum_{j=1}^{48}\\alpha_{ij}\\delta_jI(j \\neq i)}{\\sum_{j=1}^{48}\\alpha_{ij}I(j \\neq i)}, {\\sigma_{i}}^2\\right)")

	st.write("where ${\\sigma_{i}}^2$ is a state-specific variance. Intuitively, the lower the value of ${\\sigma}^2$ for a given state, the more the variation in the state's wILI trend can be explained by the wILI trends of the other states, and vice versa.")

	st.write("Next, we want to link the $\\alpha_{ij}$ weights to the features associated with each state such that states with more similar characteristics and high rates of interstate travel have higher $\\alpha_{ij}$ and $\\alpha_{ji}$ values and vice versa. Additionally, we only want a few of the $\\alpha_{ij}$s corresponding to state $i$ to be large, and the rest to be small (in a similar spirit to regularization). We can accomplish both of these features as follows: first, each $\\alpha_{ij}$ is modelled as being distributed according to an exponential distribution with a scale (i.e. inverse rate) parameter of $\\lambda_{ij}$. Because an exponential distribution is right skewed and has most of its mass near zero, this ensures that most of the $\\alpha_{ij}$ that are drawn from exponential distributions will take on relatively small values, while only a few will take on relatively large values. Next, we link the scale parameter ($\\lambda_{ij}$) of this exponential distribution to the state-level features by setting the log of $\\lambda_{ij}$ equal to the linear predictor function (taking the log is necessary to map the domain of the scale parameter (all positive real numbers) to the domain of the linear prediction function (the entire real line)).")

	st.write("Translating this into formal notation:")

	st.latex("\\alpha_{ij} \\sim Expo(\\lambda_{ij})")

	st.latex("log(\\lambda_{ij}) = \\beta_0 + \\beta_1X_1 + ... + \\beta_kX_k")

	st.write("In this case the linear predictor function is a little different that usual. Two of the predictors (normalized number of commuters between states $i$ and $j$ and the indicator of whether state $j$ borders state $i$) are included in the usual form of $\\beta_iX_i$, where a unit increase in $X_i$ corresponds to a $\\beta_i$ increase in the linear predictor. However, the rest of the predictors are state-level features such as obesity rate and density. This means that we don't care about the raw values of these features; instead, we only care about the difference between the values for state $i$ and state $j$. Therefore, each of the predictors is defined to be $|X_i - X_j|$, such that the predictor value is 0 when the two states have the same feature value, and increases as the difference between the two states grows.")

	st.write("Finally, because this is a Bayesian model, we need to define a prior distribution for the model parameters, which in this case are the $\\beta$ coefficient associated with each predictor variable and the ${\\sigma}^2$ parameter associated with each state. Because we have no substantial prior domain knowledge, we placed relatively uninformative priors on these parameters. Putting all of these components together produces the following generative model:")

	st.latex("\\delta_i \\sim N\\left(\\frac{\\sum_{j=1}^{48}\\alpha_{ij}\\delta_jI(j \\neq i)}{\\sum_{j=1}^{48}\\alpha_{ij}I(j \\neq i)}, {\\sigma_{i}}^2\\right)")

	st.latex("\\sigma_{i}^{2} \\sim Inv-Gamma(2, 2)")

	st.latex("\\alpha_{ij} \\sim Expo(\\lambda_{ij})")

	st.latex("log(\\lambda_{ij}) = \\beta_0 + \\beta_1X_1 + ... + \\beta_kX_k")

	st.latex("\\beta_i \\sim N(0, 5^2) ")

	st.write("Performing inference for this model yields the posterior distribution of the $\\beta$s and the ${\\sigma}^2$, but we only really care about the $\\beta$s. Because the exponential distribution is parameterized by a scale parameter rather than the usual rate parameter, the expected value of the distribution is equal to the scale parameter. This means that a larger $\\lambda_{ij}$ value corresponds, on average, to a higher $\\alpha_{ij}$ coefficient, and because the linear predictor function is defined to be the log of $\\lambda_{ij}$, this in turn means that a larger linear predictor corresponds, on average, to a higher $\\alpha_{ij}$ coefficient. For the two predictors that are not differences between the two given states, this means that a positive $\\beta$ coefficent indicates that a unit increase in the predictor value produces a stronger correlation between the two given states and vice versa. On the other hand, for the rest of the predictors that are included as differences between certain features of the two states, a strong correlation between two given states is signified by a negative $\\beta$ coefficient. This is the case because the predictor value represents the absolute differences between the features of the states, so a larger predictor value corresponds to a larger discrepancy between the states. Thus, the corresponding $\\beta$ coefficient can be interpreted as a penalty parameter, such that states that are less similar in terms of the given feature are less correlated with each other (assuming the $\\beta$ coefficient value is negative).")

	st.write("Overall, the model provides us with two interpretative results. First, the $\\beta$ coefficients indicate which features contribute to the correlation between the wILI time series of different states. And second, the $\\beta$ coefficients tell us about the $\\alpha_{ij}$ weights, which, in turn, inform us about which states are highly correlated with each other based on the fundamental characteristics of the states.")

	st.write("Finally, one major advantage of this model is that the observations (i.e. the percent change in the wILI rate for a given week) are independent of each other conditioned on the percent changes of the other states for the same week. This means that unlike in a classic time seris model, the past wILI rates of a state are irrelevant to predicting the percent change in the wILI rate at any given time. This greatly simplifies things, as it's much easier to deal with independent observations than it is to handle observations that are correlated with previous observations.")

	predictor_df = pd.read_csv(pred_dir / 'state_stats.csv')
	predictor_df.drop(index='FL', inplace=True, errors='ignore')
	flu_percent_change_df = pd.read_csv(pred_dir / 'flu_percent_change_imputed_48.csv')
	week_nums = flu_percent_change_df.week_num
	flu_percent_change_df.drop(columns='week_num', inplace=True)

	flu_percent_change_df = flu_percent_change_df[predictor_df.index]

	st.write("Weekly percent change in wILI rate by state:")
	st.write(flu_percent_change_df.head())

	# predictors that are compared between states
	comparison_predictors = ['density_metric', 'Latitude', 'is_coastal', 'airport_boardings', 'Children 0-18', 
	                          'Adults 19-25', 'Adults 26-34', 'Adults 35-54', 'Adults 55-64', '65+', 
	                         'partisan_score', 'hispanic', 'minority', 'female', 
	                         'income', 'nodegree', 'bachelor', 'inactivity', 'obesity', 'cancer',
	                         'overall_vacc_rate', 'child_vacc_rate', 'smoking_rate']
	season_predictors = ['spring', 'fall', 'winter']

	# predictors that are not compared between states
	no_comparison_predictors = ['commuters', 'is_neighbor']

	st.write("An important preprocessing step is to standardize each of the predictors (except for `is_coastal` and `is_neighbor` as these variables only take on the values 0 and 1. This ensures that the $\\beta$ coefficients associated with each predictor are all on the same scale and thus are easily comparable to each other. Additionally, ensuring the the $\\beta$ parameters lie in a similar range may help with the MCMC sampling.")


	predictors_to_standardize = [x for x in comparison_predictors if x != 'is_coastal'] + season_predictors

	# there are no observations during the summer so we don't need the summer weather predictor
	predictor_df_standardized = predictor_df.drop(columns='summer')
	for predictor in predictors_to_standardize:
	    data = predictor_df_standardized[predictor]
	    mean = np.mean(data)
	    std = np.std(data)
	    predictor_df_standardized[predictor] = [(x - mean) / std for x in data]

	commute_columns = [column for column in predictor_df_standardized if column.endswith('_dest')]
	commute_vals = predictor_df_standardized[commute_columns].to_numpy().flatten()
	commute_mean = np.mean(commute_vals)
	commute_std = np.std(commute_vals)

	for commute_column in commute_columns:
	    predictor_df_standardized[commute_column] = [(x - commute_mean) / commute_std 
	                                                 for x in predictor_df_standardized[commute_column]]
	    
	comparison_preds_df = predictor_df_standardized[comparison_predictors + season_predictors]

	st.write("Resulting state feature dataframe:")
	st.write(predictor_df_standardized)

	# determine season from week of the year
	def get_season(week):
	    if week >= 52 or week < 13:
	        return np.array([0, 0, 1])
	    if 13 <= week < 26:
	        return np.array([1, 0, 0])
	    if 39 <= week < 52:
	        return np.array([0, 1, 0])
	    raise 


	predictor_num = len(comparison_predictors) + len(season_predictors) + len(no_comparison_predictors)
	state_num = flu_percent_change_df.shape[1]
	comparison_preds_num = len(comparison_predictors)
	obs_num = len(flu_percent_change_df)

	# indicate which season each observation fall into 
	season_indictor_array = np.zeros((obs_num, state_num - 1, len(season_predictors)))
	for i, week_num in enumerate(week_nums[1:]):
	    season_indictor_array[i, :, :] = np.repeat(get_season(week_num)[np.newaxis, :], state_num - 1, axis=0)

	st.write("`Y_target` is a 1D array that contains the percent change of each state for each week of the time series that is included in the analysis. This is the variable we want to predict for each observation. Because there are 47 states (Lower 48 except for Florida) and 217 observations for each state, this array has a length of $47*217=10199$. \n\n`Y_state_idx` is a 1D array of the same length as `Y_target` that represents the specific state associated with each `Y_target` value. Therefore, it takes on values between 0 and 46. This is necessary to pick out the variance parameter corresponding to the given state. \n\n`X` is a 3D design matrix. The first axis has a length equal to the total number of observations (10199). The second axis has a length of 46, which represents the $47-1=46$ other states from which we're trying to predict the final state. And the first axis has a length of 29, which contain the 28 predictors in addition to an intercept term, which is simply the value of 1. Therefore, this `X` matrix contains all the predictors for each state for each observation.\n\n`X_flu` is a 2D array. The first axis has a length equal to the total number of observations (10199), while the second axis has a length of 46 and represents the percent change in wILI rate for all the $47-1=46$ other states from which we're trying to predict the final state. Therefore, this array is contains all the $\\delta_jI(j \\neq i)$ values for each observation.")

	Y_target = np.zeros(state_num * obs_num)
	X = np.zeros((Y_target.shape[0], state_num - 1, predictor_num + 1))
	Y_state_idx = np.zeros(Y_target.shape[0], dtype=int)
	X_flu = np.zeros((Y_target.shape[0], state_num - 1))
	X.shape

	for idx, state in enumerate(predictor_df_standardized.index):
    
	    # response variable
	    Y_target[obs_num * idx: obs_num * idx + obs_num] = flu_percent_change_df[state]
	    
	    # percent change of other states
	    X_flu[obs_num * idx: obs_num * idx + obs_num, :] = flu_percent_change_df.drop(columns=state).to_numpy()
	    
	    # index of response state
	    Y_state_idx[obs_num * idx: obs_num * idx + obs_num] = [idx] * obs_num
	    
	    state_comparison_preds = np.array(comparison_preds_df.loc[state])
	    
	    constant_design_matrix = np.zeros((X.shape[1], X.shape[2]))
	    constant_design_matrix[:, 0] = np.ones(state_num - 1)
	    
	    # two predictors that aren't differences between states: neighboring state and number of commuters
	    other_states_preds_df = predictor_df_standardized.drop(index=state)
	    not_difference_matrix = other_states_preds_df[[state + '_is_neighbor', state + '_dest']].to_numpy()
	    constant_design_matrix[:, 1: 1 + len(no_comparison_predictors)] = not_difference_matrix
	    
	    # the rest of the predictors are differences between two states
	    other_states_comparison_preds_array = comparison_preds_df.drop(index=state).to_numpy()
	    difference_matrix = abs((other_states_comparison_preds_array - state_comparison_preds) ** 1)
	    constant_design_matrix[:, 1 + len(no_comparison_predictors):] = difference_matrix
	    
	    constant_design_matrix_3D = np.repeat(constant_design_matrix[np.newaxis, :, :], repeats=obs_num, axis=0)
	    
	    # pick out appropriate season and set the rest of the temperature predictors to zero
	    constant_design_matrix_3D[:, :, -len(season_predictors):] *= season_indictor_array 
	    
	    X[obs_num * idx: obs_num * idx + obs_num, :, :] = constant_design_matrix_3D 
	
	st.write("The observations are shuffled before they are inputted to the pymc3 model.")

    # randomly shuffle the observations 
	np.random.seed(109)
	indices = np.arange(len(Y_target))
	np.random.shuffle(indices)
	Y_target_random = Y_target[indices]
	X_flu_random = X_flu[indices]
	X_random = X[indices]
	Y_state_idx_random = Y_state_idx[indices]

	st.write("See bottom of document for model specification.")

	st.write("Just as we did in HW3, it's important to first check whether the generative model is correctly specified. This can be done by hardcoding the  values for the parameters, generating response variables from these parameters and then trying to infer the parameters using MCMC.")

	st.write("The sampling took a whopping 13 hours to sample just 500 times for each chain (with a 500 burn-in sample). However, as shown below the results confirm that the model was correctly specified, as the majority of the true $\\beta$ values lie within the corresponding 94 percent credible interval. Therefore, performance inference for the actual data should yield reliable results.\n\nHowever, carrying out inference on this synthetic data reveals several issues. First, many of the r_hat values are significantly larger than 1.0, which means that more than 500 samples are needed for the chains to converge to the posterior distribution. And second, the fact that the sampling took so long may indicate that the uninformative priors are too flat and make it difficult for the NUTS sampler to sample points from the true posterior distribution. To address these issues, the number of samples is increased from 500 to 1000 and a semi-informative prior is placed on the $\\beta$ and $\\sigma^2$ parameters ($N(0, 25)$ for each of the $\\beta$s and $Inv-Gamma(2, 2)$ for each $\\sigma^2$.")

	sim_trace_df = pd.read_csv(pred_dir / 'sim_trace.csv')
	st.write(sim_trace_df)

	st.write("Unfortunately we ran into major issues running MCMC for the actual data. A burn-in of 500 and a sample of 1000 should have taken around 18 hours to finish. However, the first time we ran it, it was 80 percent complete after 14 hours and then the screen saver didn't turn off and the notebook shut down. We then tried running in a second time, and this time it again was 80 percent done after another 14 hours and then encountered a memory failure issue that terminated the notebook. Therefore, the third time we only asked for 500 samples, even though we knew this likely wouldn't be large enough for the sampler to converge. It took 14 hours to run but finished successfully. Even so, the model was so unwieldy that it took an additional three hours just to save the model and create a summary dataframe.")

	st.write("Results of MCMC sampling:")

	trace_df = pd.read_csv(pred_dir / 'trace.csv')
	st.write(trace_df)

	st.write("Unfortunately, most of the r_hat values of the $\\beta$ coefficients are extremely inflated (the average r_hat value is just under 2.0). This means that the sampler hasn't come close to converging and means that it's pointless to try to interpret the sign or the magnitude of the coefficients. At this point, we ran out of time. However, if we had more time, we'd randomly select a subset of the observations and get more samples for these observations, as it's better to have trustworthy results on less data than it is to have unreliable results on the entire datset.")

	st.write("While the results of the inference were unreliable, it's still worthwhile to discuss what the next steps would have been in the analysis. First, we would check the sign and 94 percent credible interval of each of the $\\beta$ coefficients to see if the majority of them make intuitive sense (i.e. negative coefficients for the difference predictors and positive coefficients for the non-difference predictors.) Next, we would evaluate the predictive power of the model and test the model assumptions at the same time. This could be done by first calculating the predictive power of a baseline naive model where the average of all the other states is used to predict for the percent change in the final state (in other words, where the weights associated with each state are the same). Because the likelihood function is modelled as a normal distribution, the optimal loss function is the mean squared error. The predictions would be performed for each state separately. \n\nAfter calculating the MSE for the naive model, we'd evaluate the Bayesian model as follows: first, we'd sample hundreds of times from the posterior distribution of each of the $\\beta$ coefficients. Then, for each sample, we'd work our way up the model (i.e. sample an $\\alpha$ for each state) and calculate the mean of the prediction. We'd then plot the residuals by subtracting the predicted percent change from the true percent change. Calculating the average of the square of the residuals would give us the MSE, which we'd compare to the baseline model to see if this model has any increased predictive power. Meanwhile, we'd plot these residuals to assess the assumption that the observations are normally distributed about the weighted average of the percent change of each of the other states. If this is the case, then we'd expect the distribution to being normally distributed around 0.0. Finally, we can calculate the variance of the residuals for each state and compare this sample variance to the posterior distribution of $\\sigma^2$ for each state to check if they are consistent with each other.")

	st.write("Model specification in pymc3:")

	with st.echo():
		model = pm.Model()

		with model:
		    # define prior distribution for beta parameters 
		    beta = pm.Normal('beta', mu=0, sigma=5, shape=predictor_num + 1)
		    
		    # define prior distribution for state-specific variance parameter
		    sigma_sq = pm.InverseGamma('sigma_sq', alpha=2, beta=2, shape=state_num)
		    
		    # calculate the linear predictor for each state by multipling the 3D X design matrix with the vector
		    # of beta parameters
		    nu = pm.Deterministic('nu', pm.math.dot(X_random, beta))
		    
		    # calculate the lambda parameter for each state by exponentiating the linear predictor
		    lambda_ = pm.Deterministic('lambda', pm.math.exp(nu))
		    
		    # sample an alpha random variable for each state from an exponential distribution with the 
		    # corresponding rate parameter
		    alpha = pm.Exponential('alpha', lam=1/lambda_, shape=(X_random.shape[0], state_num - 1))
		    
		    # calculate the mean of each response variable by taking the dot product between the alpha vector
		    # and the vector of the percent change in the wILI rates of the other 46 states and dividing by the 
		    # sum of the alpha weights
		    mu = pm.Deterministic('mu', pm.math.sum(alpha * X_flu_random, axis=1) / pm.math.sum(alpha, axis=1))
		    
		    # define the response variable to be normally distributed about the mean and with a standard deviation that
		    # is the square root of the variance parameter associated with the given state
		    Y_obs = pm.Normal('Y_obs', mu=mu, sigma=pm.math.sqrt(sigma_sq[Y_state_idx_random]), observed=Y_target_random)
예제 #7
0
def find_ism_params(grid,
                    dustlaw,
                    obs,
                    pca_result,
                    line_ls,
                    drpall_row,
                    Zsol=.0142,
                    nrad=30,
                    m_at_rad=5,
                    rlim=None):
    '''
    run a pymc3 grid on a whole galaxy

    - grid_covs, grid_alphas: yields from pre-GP-trained photoionization grid
    - dustlaw:
    - line_obs: tuple of flux, uncertainty, and mask
    - line_ls:
    - drpall_row:
    '''
    # access results from pca to get priors on tauV*mu and tauV*(1-mu)
    pca_results_good = ~np.logical_or(pca_result.mask, pca_result.badPDF())
    tauV_mu_loc, tauV_mu_sd = pca_result.to_normaldist('tau_V mu')
    tauV_1mmu_loc, tauV_1mmu_sd = pca_result.to_normaldist('tau_V (1 - mu)')
    logQH_loc, logQH_sd = pca_result.to_normaldist('logQH')

    # good spaxels must be good in both PCA results and emlines measurements
    goodspax = np.logical_and(obs.spaxels_good_to_run(), pca_results_good)
    print(goodspax.sum(), 'spaxels')
    # access emission-line measurements, and pick the good ones
    f = np.column_stack([obs.line_flux[k][goodspax] for k in obs.lines_used])
    unc = np.column_stack(
        [obs.line_unc[k].array[goodspax] for k in obs.lines_used])

    # filter PCA measurements of tauV mu and tauV (1 - mu)
    tauV_mu_loc, tauV_mu_sd = \
        tauV_mu_loc[goodspax].astype(np.float32), tauV_mu_sd[goodspax].astype(np.float32)
    tauV_1mmu_loc, tauV_1mmu_sd = \
        tauV_1mmu_loc[goodspax].astype(np.float32), tauV_1mmu_sd[goodspax].astype(np.float32)
    logQH_loc, logQH_sd = \
        logQH_loc[goodspax].astype(np.float32), logQH_sd[goodspax].astype(np.float32)

    # radius in Re units
    Rreff = obs.hdulist['SPX_ELLCOO'].data[1, ...][goodspax].astype(np.float32)

    #'''
    if type(rlim) is list:
        Rtargets = np.linspace(rlim[0], rlim[1], nrad)
    else:
        Rtargets = np.linspace(Rreff.min(), Rreff.max(), nrad)
    meas_ixs = np.unique(
        np.argsort(np.abs(Rreff[None, :] - Rtargets[:, None]),
                   axis=1)[:, :m_at_rad])
    print(meas_ixs)

    Rreff, f, unc = Rreff[meas_ixs], f[meas_ixs], unc[meas_ixs]
    tauV_mu_loc, tauV_mu_sd = tauV_mu_loc[meas_ixs], tauV_mu_sd[meas_ixs]
    tauV_1mmu_loc, tauV_1mmu_sd = tauV_1mmu_loc[meas_ixs], tauV_1mmu_sd[
        meas_ixs]
    logQH_loc, logQH_sd = logQH_loc[meas_ixs], logQH_sd[meas_ixs]
    #'''

    # distance, for absolute-scaling purposes
    zdist = drpall_row['nsa_zdist']
    four_pi_r2 = (4. * np.pi * cosmo.luminosity_distance(zdist)**2.).to(
        units.cm**2).value

    *obs_shape_, nlines = f.shape
    obs_shape = tuple(obs_shape_)
    print('in galaxy: {} measurements of {} lines'.format(obs_shape, nlines))

    with pymc3.Model() as model:
        #'''
        # gaussian process on radius determines logZ
        ls_logZ = pymc3.Gamma('ls-logZ', alpha=3., beta=3.,
                              testval=1.)  # effectively [0.5, 3] Re
        gp_eta = pymc3.HalfCauchy('eta', beta=.5, testval=.25)
        cov_r = gp_eta**2. * pymc3.gp.cov.ExpQuad(input_dim=1, ls=ls_logZ)
        logZ_gp = pymc3.gp.Latent(cov_func=cov_r)

        # draw from GP
        logZ_rad = logZ_gp.prior('logZ-r', X=Rreff[:, None])
        logZ_gp_rad_sigma = pymc3.HalfCauchy('logZ-rad-sigma', beta=.2)
        logZ = pymc3.Bound(pymc3.Normal,
                           *grid.range('logZ'))('logZ',
                                                mu=logZ_rad,
                                                sd=logZ_gp_rad_sigma,
                                                shape=obs_shape,
                                                testval=-.1)
        #'''

        # priors
        ## first on photoionization model
        #logZ = pymc3.Uniform('logZ', *grid.range('logZ'), shape=obs_shape, testval=0.)
        Z = Zsol * 10.**logZ
        logU = pymc3.Bound(pymc3.Normal, *grid.range('logU'))('logU',
                                                              mu=-2.,
                                                              sd=5.,
                                                              shape=obs_shape,
                                                              testval=-2.)
        age = pymc3.Bound(pymc3.Normal, *grid.range('Age'))('age',
                                                            mu=5.,
                                                            sd=10.,
                                                            shape=obs_shape,
                                                            testval=2.5)
        #xid = theano.shared(0.46)

        # dust laws come from PCA fits
        tauV_mu_norm = pymc3.Bound(pymc3.Normal,
                                   lower=-tauV_mu_loc / tauV_mu_sd)(
                                       'tauV mu norm',
                                       mu=0,
                                       sd=1.,
                                       shape=obs_shape,
                                       testval=0.)
        tauV_mu = pymc3.Deterministic('tauV mu',
                                      tauV_mu_loc + tauV_mu_sd * tauV_mu_norm)
        tauV_1mmu_norm = pymc3.Bound(pymc3.Normal,
                                     lower=-tauV_1mmu_loc / tauV_1mmu_sd)(
                                         'tauV 1mmu norm',
                                         mu=0,
                                         sd=1.,
                                         shape=obs_shape,
                                         testval=0.)
        tauV_1mmu = pymc3.Deterministic(
            'tauV 1mmu', tauV_1mmu_loc + tauV_1mmu_sd * tauV_1mmu_norm)

        #tauV = tauV_mu + tauV_1mmu
        #logGMSD = pymc3.Deterministic(
        #    'logGMSD', theano.tensor.log10(0.2 * tauV / (xid * Z)))

        grid_params = theano.tensor.stack([logZ, logU, age], axis=0)

        # the attenuation power-laws
        dense_powerlaw = theano.shared(
            (line_ls.quantity.value.astype('float32') / 5500)**-1.3)
        diffuse_powerlaw = theano.shared(
            (line_ls.quantity.value.astype('float32') / 5500)**-0.7)

        transmission = pymc3.math.exp(
            -(theano.tensor.outer(tauV_1mmu, dense_powerlaw) + \
              theano.tensor.outer(tauV_mu, diffuse_powerlaw)))

        # dim lines based on distance
        distmod = theano.shared(four_pi_r2)
        one_e17 = theano.shared(1.0e17)
        obsnorm = one_e17 / distmod

        # next on normalization of emission line strengths
        logQHnorm = pymc3.Normal('logQHnorm',
                                 mu=0.,
                                 sd=1.,
                                 testval=0.,
                                 shape=obs_shape)
        logQH = pymc3.Deterministic('logQH', logQH_loc + logQH_sd * logQHnorm)

        eff_QH = pymc3.Kumaraswamy('effQH',
                                   a=3.,
                                   b=3.,
                                   shape=obs_shape,
                                   testval=0.66)

        linelumnorm = theano.tensor.outer(
            eff_QH * 10**logQH, grid.observable_norms_t.astype('float32'))

        norm = obsnorm * linelumnorm * transmission

        for i, (name, alpha, cov) in enumerate(
                zip(grid.observable_names, grid.alphas, grid.covs)):
            pymc3.StudentT(
                '-'.join(('obsflux', name)),
                nu=1.,
                mu=((gp_grid.gp_predictt(cov, alpha, grid.X0, grid_params) +
                     1.) * norm[:, i]),
                sd=unc[:, i],
                observed=f[:, i])

        model_graph = pymc3.model_to_graphviz()
        model_graph.format = 'svg'
        model_graph.render()

        step, start = densemass_sample(model,
                                       cores=1,
                                       chains=1,
                                       nstart=200,
                                       nburn=200,
                                       ntune=5000)

        try:
            nchains = 10
            trace = pymc3.sample(step=step,
                                 start=start * nchains,
                                 draws=500,
                                 tune=500,
                                 burn=500,
                                 cores=1,
                                 chains=nchains,
                                 nuts_kwargs=dict(target_accept=.95),
                                 init='adapt_diag')
        except Exception as e:
            print(e)
            trace = None

    return model, trace, f, unc, Rreff
예제 #8
0
X_masked = np.ma.masked_invalid(X)

# model
with pm.Model() as model:
    # priors
    intercept = pm.Normal('intercept', mu=0, sigma=100)
    beta = pm.Normal('beta', mu=0, sigma=100, shape=X_masked.shape[1])
    alpha = pm.HalfCauchy('alpha', beta=5)

    # impute missing X
    chol, stds, corr = pm.LKJCholeskyCov('chol',
                                         n=X_masked.shape[1],
                                         eta=2,
                                         sd_dist=pm.Exponential.dist(1),
                                         compute_corr=True)
    cov = pm.Deterministic('cov', chol.dot(chol.T))
    X_mu = pm.Normal('X_mu',
                     mu=0,
                     sigma=100,
                     shape=X_masked.shape[1],
                     testval=X_masked.mean(axis=0))
    X_modeled = pm.MvNormal('X', mu=X_mu, chol=chol, observed=X_masked)

    # observation
    mu_ = intercept + tt.dot(X_modeled, beta)

    # likelihood
    mu = tt.exp(mu_)
    likelihood = pm.Gamma('y', alpha=alpha, beta=alpha / mu, observed=y)

    # sample
예제 #9
0
    def gp_modeling(self,
                    time=None,
                    flux=None,
                    flux_err=None,
                    mask=None,
                    sigma=3,
                    niters=8,
                    iterative=False):
        """Applies GP model to trend normalized light curve.
        """
        if flux is None:
            flux = self.norm_flux
        if time is None:
            time = self.time
        if flux_err is None:
            flux_err = self.flux_err
        if mask is None:
            mask = np.zeros(len(time), dtype=bool)

        if (len(time) != len(flux)) or (len(time) != len(flux_err)):
            raise ValueError(
                "Please ensure you're passing in arrays of the same length.")

        self.mask = mask

        x = np.array(time)
        y = np.array(flux)
        yerr = np.array(flux_err)

        x = np.array(x[~mask])
        y = np.array(y[~mask])
        yerr = np.array(yerr[~mask])

        x = np.ascontiguousarray(x, dtype=np.float64)
        y = np.ascontiguousarray(y, dtype=np.float64)
        yerr = np.ascontiguousarray(yerr, dtype=np.float64)
        time = np.ascontiguousarray(self.time, dtype=np.float64)

        mu = np.nanmean(y)
        y = (y / mu - 1) * 1e3
        yerr = yerr * 1e3 / mu

        results = xo.estimators.lomb_scargle_estimator(
            x, y, min_period=self.p_rot * 0.5, max_period=self.p_rot * 2)
        peak_per = results['peaks'][0]['period']
        per_uncert = results['peaks'][0]['period_uncert']
        self.xo_LS_results = results

        peak = results["peaks"][0]
        freq, power = results["periodogram"]

        with pm.Model() as model:
            mean = pm.Normal("mean", mu=0.0, sd=5.0)

            # white noise
            logs2 = pm.Normal("logs2",
                              mu=np.log(np.nanmin(yerr) / 2.0),
                              sd=10.0)

            # The parameters of the RotationTerm kernel
            logamp = pm.Normal("logamp", mu=np.log(np.var(y) / 2.0), sd=20.0)

            # Bounds on period
            #            BoundedNormal = pm.Bound(pm.Normal, lower=np.log(peak_per*0.5),
            #                                     upper=np.log(peak_per*3))
            #            logperiod = BoundedNormal("logperiod", mu=np.log(2*peak["period"]), sd=per_uncert)

            # Q from simple harmonic oscillator
            logQ0 = pm.Normal("logQ0", mu=1.0, sd=10.0)
            logdeltaQ = pm.Normal("logdeltaQ", mu=2.0, sd=10.0)

            # TRY WITH NORMAL MU 0.5 SD LOW
            mix = pm.Uniform("mix", lower=0, upper=1.0)

            # Track the period as a deterministic
            #            period = pm.Deterministic("period", tt.exp(logperiod))

            # Set up the Gaussian Process model

            # TRY WITH SHOTERM INSTEAD OF ROTATIONTERM
            kernel = xo.gp.terms.RotationTerm(log_amp=logamp,
                                              period=peak_per,
                                              log_Q0=logQ0,
                                              log_deltaQ=logdeltaQ,
                                              mix=mix)
            gp = xo.gp.GP(kernel, x, yerr**2 + tt.exp(logs2), J=4)

            # Compute the Gaussian Process likelihood and add it into the
            # the PyMC3 model as a "potential"
            pm.Potential("loglike", gp.log_likelihood(y - mean))

            # Compute the mean model prediction for plotting purposes
            pm.Deterministic("pred", gp.predict())

            # Fit mean model first
            # Fit period and amplitude together
            # Fit over Q
            # Fit over mean
            # Fit period and amplitude together again
            map_soln = xo.optimize(start=model.test_point)
            map_soln = xo.optimize(start=model.test_point, vars=[mean])
            map_soln = xo.optimize(start=map_soln, vars=[logamp])
            #            map_soln = xo.optimize(start=map_soln, vars=[logperiod])
            map_soln = xo.optimize(start=map_soln, vars=[logQ0])
            map_soln = xo.optimize(start=map_soln, vars=[logdeltaQ])
            map_soln = xo.optimize(start=map_soln, vars=[logs2])
            map_soln = xo.optimize(start=map_soln, vars=[mix])
            map_soln = xo.optimize(start=map_soln, vars=[mean])
            map_soln = xo.optimize(start=map_soln,
                                   vars=[logamp])  #, logperiod])
            map_soln = xo.optimize(start=map_soln, vars=[mix])

            map_soln = xo.optimize(start=map_soln)

        with model:
            mu, var = xo.eval_in_model(gp.predict(time, return_var=True),
                                       map_soln)

        if iterative is False:
            self.gp_soln = map_soln
            self.gp_model = mu
            self.gp_flux = self.norm_flux - (mu + 1)

        else:
            self.gp_it_soln = map_soln
            self.gp_it_model = mu
            self.gp_it_glux = self.norm_flux - (mu + 1)
def calibration_main(locator, config):
    # INITIALIZE TIMER
    t0 = time.clock()

    # Local variables
    building_name = config.single_calibration.building
    building_load = config.single_calibration.load
    iteration_pymc3 = config.single_calibration.iterations
    with open(locator.get_calibration_problem(building_name, building_load), 'r') as input_file:
        problem = pickle.load(input_file)
    emulator = joblib.load(locator.get_calibration_gaussian_emulator(building_name, building_load))
    distributions = problem['probabiltiy_vars']
    variables = problem['variables']

    # Create function to call predictions (mu)
    @as_op(itypes=[tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
    def predict_y(var1, var2, var3, var4, var5, var6):
        input_sample = np.array([var1, var2, var3, var4, var5, var6]).reshape(1, -1)
        prediction = emulator.predict(input_sample)
        return prediction

    # Create function to call predictions (sigma)
    @as_op(itypes=[tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
    def predict_sigma(var1, var2, var3, var4, var5, var6):
        input_sample = np.array([var1, var2, var3, var4, var5, var6]).reshape(1, -1)
        _, sigma = emulator.predict(input_sample, return_std=True)
        return sigma

    with pymc3.Model() as basic_model:

        # DECLARE PRIORS
        for i, variable in enumerate(variables):
            arguments = np.array([distributions.loc[variable, 'min'], distributions.loc[variable, 'max'],
                                  distributions.loc[variable, 'mu']]).reshape(-1, 1)
            min_max_scaler = preprocessing.MinMaxScaler(copy=True, feature_range=(0, 1))
            arguments_norm = min_max_scaler.fit_transform(arguments)
            globals()['var' + str(i + 1)] = pymc3.Triangular('var' + str(i + 1), lower=arguments_norm[0][0],
                                                             upper=arguments_norm[1][0], c=arguments_norm[2][0])

        # DECLARE OBJECTIVE FUNCTION
        mu = pymc3.Deterministic('mu', predict_y(var1, var2, var3, var4, var5, var6))
        sigma = pymc3.HalfNormal('sigma', 0.15)
        # sigma = pm.Deterministic('sigma', predict_sigma(var1, var2, var3, var4, var5, var6))
        y_obs = pymc3.Normal('y_obs', mu=mu, sd=sigma, observed=0)

        # RUN MODEL, SAVE TO DISC AND PLOT RESULTS
        with basic_model:
            # Running
            step = pymc3.Metropolis()
            trace = pymc3.sample(iteration_pymc3, tune=1000, njobs=1, step=step)
            # Saving
            df_trace = pymc3.trace_to_dataframe(trace)

            #CREATE GRAPHS AND SAVE TO DISC
            df_trace.to_csv(locator.get_calibration_posteriors(building_name, building_load))
            pymc3.traceplot(trace)

            columns = ["var1", "var2", "var3", "var4", "var5", "var6"]
            seaborn.pairplot(df_trace[columns])

            if config.single_calibration.show_plots:
                plt.show()


    #SAVING POSTERIORS IN PROBLEM
    problem['posterior_norm'] = df_trace.as_matrix(columns=columns)
    pickle.dump(problem, open(locator.get_calibration_problem(building_name, building_load), 'w'))

    return
    def __init__(
            self,
            cell_state_mat: np.ndarray,
            X_data: np.ndarray,
            n_comb: int = 50,
            data_type: str = 'float32',
            n_iter=20000,
            learning_rate=0.005,
            total_grad_norm_constraint=200,
            verbose=True,
            var_names=None, var_names_read=None,
            obs_names=None, fact_names=None, sample_id=None,
            gene_level_prior={'mean': 1 / 2, 'sd': 1 / 4},
            gene_level_var_prior={'mean_var_ratio': 1},
            cell_number_prior={'cells_per_spot': 8,
                               'factors_per_spot': 7,
                               'combs_per_spot': 2.5},
            cell_number_var_prior={'cells_mean_var_ratio': 1,
                                   'factors_mean_var_ratio': 1,
                                   'combs_mean_var_ratio': 1},
            phi_hyp_prior={'mean': 3, 'sd': 1},
            spot_fact_mean_var_ratio=0.5
    ):

        ############# Initialise parameters ################
        super().__init__(cell_state_mat, X_data,
                         data_type, n_iter,
                         learning_rate, total_grad_norm_constraint,
                         verbose, var_names, var_names_read,
                         obs_names, fact_names, sample_id)

        for k in gene_level_var_prior.keys():
            gene_level_prior[k] = gene_level_var_prior[k]

        self.gene_level_prior = gene_level_prior
        self.phi_hyp_prior = phi_hyp_prior
        self.n_comb = n_comb
        self.spot_fact_mean_var_ratio = spot_fact_mean_var_ratio

        cell_number_prior['factors_per_combs'] = (cell_number_prior['factors_per_spot'] /
                                                  cell_number_prior['combs_per_spot'])
        for k in cell_number_var_prior.keys():
            cell_number_prior[k] = cell_number_var_prior[k]
        self.cell_number_prior = cell_number_prior

        ############# Define the model ################
        self.model = pm.Model()

        with self.model:

            # =====================Gene expression level scaling======================= #
            # Explains difference in expression between genes and 
            # how it differs in single cell and spatial technology
            # compute hyperparameters from mean and sd
            shape = gene_level_prior['mean'] ** 2 / gene_level_prior['sd'] ** 2
            rate = gene_level_prior['mean'] / gene_level_prior['sd'] ** 2
            shape_var = shape / gene_level_prior['mean_var_ratio']
            rate_var = rate / gene_level_prior['mean_var_ratio']
            self.gene_level_alpha_hyp = pm.Gamma('gene_level_alpha_hyp',
                                                 mu=shape, sigma=np.sqrt(shape_var),
                                                 shape=(1, 1))
            self.gene_level_beta_hyp = pm.Gamma('gene_level_beta_hyp',
                                                mu=rate, sigma=np.sqrt(rate_var),
                                                shape=(1, 1))

            self.gene_level = pm.Gamma('gene_level', self.gene_level_alpha_hyp,
                                       self.gene_level_beta_hyp, shape=(self.n_genes, 1))

            # scale cell state factors by gene_level
            self.gene_factors = pm.Deterministic('gene_factors', self.cell_state)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0).shape)
            # tt.printing.Print('gene_factors sum')(gene_factors.sum(0))

            # =====================Spot factors======================= #
            # prior on spot factors reflects the number of cells, fraction of their cytoplasm captured, 
            # times heterogeniety in the total number of mRNA between individual cells with each cell type
            self.cells_per_spot = pm.Gamma('cells_per_spot',
                                           mu=cell_number_prior['cells_per_spot'],
                                           sigma=np.sqrt(cell_number_prior['cells_per_spot'] \
                                                         / cell_number_prior['cells_mean_var_ratio']),
                                           shape=(self.n_cells, 1))
            self.comb_per_spot = pm.Gamma('combs_per_spot',
                                          mu=cell_number_prior['combs_per_spot'],
                                          sigma=np.sqrt(cell_number_prior['combs_per_spot'] \
                                                        / cell_number_prior['combs_mean_var_ratio']),
                                          shape=(self.n_cells, 1))

            shape = self.comb_per_spot / np.array(self.n_comb).reshape((1, 1))
            rate = tt.ones((1, 1)) / self.cells_per_spot * self.comb_per_spot
            self.combs_factors = pm.Gamma('combs_factors', alpha=shape, beta=rate,
                                          shape=(self.n_cells, self.n_comb))

            self.factors_per_combs = pm.Gamma('factors_per_combs',
                                              mu=cell_number_prior['factors_per_combs'],
                                              sigma=np.sqrt(cell_number_prior['factors_per_combs'] \
                                                            / cell_number_prior['factors_mean_var_ratio']),
                                              shape=(self.n_comb, 1))
            c2f_shape = self.factors_per_combs / np.array(self.n_fact).reshape((1, 1))
            self.comb2fact = pm.Gamma('comb2fact', alpha=c2f_shape, beta=self.factors_per_combs,
                                      shape=(self.n_comb, self.n_fact))

            self.spot_factors = pm.Gamma('spot_factors', mu=pm.math.dot(self.combs_factors, self.comb2fact),
                                         sigma=pm.math.sqrt(pm.math.dot(self.combs_factors, self.comb2fact) \
                                                            / self.spot_fact_mean_var_ratio),
                                         shape=(self.n_cells, self.n_fact))

            # =====================Spot-specific additive component======================= #
            # molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed between all genes not just expressed genes
            self.spot_add_hyp = pm.Gamma('spot_add_hyp', 1, 1, shape=2)
            self.spot_add = pm.Gamma('spot_add', self.spot_add_hyp[0],
                                     self.spot_add_hyp[1], shape=(self.n_cells, 1))

            # =====================Gene-specific additive component ======================= #
            # per gene molecule contribution that cannot be explained by cell state signatures
            # these counts are distributed equally between all spots (e.g. background, free-floating RNA)
            self.gene_add_hyp = pm.Gamma('gene_add_hyp', 1, 1, shape=2)
            self.gene_add = pm.Gamma('gene_add', self.gene_add_hyp[0],
                                     self.gene_add_hyp[1], shape=(self.n_genes, 1))

            # =====================Gene-specific overdispersion ======================= #
            self.phi_hyp = pm.Gamma('phi_hyp', mu=phi_hyp_prior['mean'],
                                    sigma=phi_hyp_prior['sd'], shape=(1, 1))
            self.gene_E = pm.Exponential('gene_E', self.phi_hyp, shape=(self.n_genes, 1))

            # =====================Expected expression ======================= #
            # expected expression
            self.mu_biol = pm.math.dot(self.spot_factors, self.gene_factors.T) * self.gene_level.T \
                           + self.gene_add.T + self.spot_add
            # tt.printing.Print('mu_biol')(self.mu_biol.shape)

            # =====================DATA likelihood ======================= #
            # Likelihood (sampling distribution) of observations & add overdispersion via NegativeBinomial / Poisson
            self.data_target = pm.NegativeBinomial('data_target', mu=self.mu_biol,
                                                   alpha=1 / (self.gene_E.T * self.gene_E.T),
                                                   observed=self.x_data,
                                                   total_size=self.X_data.shape)

            # =====================Compute nUMI from each factor in spots  ======================= #                          
            self.nUMI_factors = pm.Deterministic('nUMI_factors',
                                                 (self.spot_factors * (self.gene_factors * self.gene_level).sum(0)))
예제 #12
0
# plt.show()
# ======================================================================
# unpooled_model
# ======================================================================
with pm.Model() as unpooled_model:
    # define priors
    sigma = pm.HalfCauchy('sigma', beta=10, testval=1.)

    # mu = pm.Uniform('mu', 0, 10)
    beta = pm.Normal('beta', 0, 20, shape=companiesABC)
    beta1 = pm.Normal('beta1', 0, 20, shape=companiesABC)
    beta2 = pm.Normal('beta2', 0, 10)
    # theta = pm.Uniform('theta', lower=0, upper=10)

    mu = pm.Deterministic(
        'mu',
        tt.exp(beta[companyABC] + beta1[companyABC] * elec_year +
               beta2 * elec_tem))
    # mu = tt.exp(beta + beta1 * elec_year + beta2 * elec_tem)
    # mu = pm.math.exp(theta)
    Observed_pred = pm.NegativeBinomial("Observed_pred",
                                        mu=mu,
                                        alpha=sigma,
                                        shape=elec_faults.shape)  # 观测值
    Observed = pm.NegativeBinomial("Observed",
                                   mu=mu,
                                   alpha=sigma,
                                   observed=elec_faults)  # 观测值

    start = pm.find_MAP()
    # step1 = pm.Slice([beta, beta1, beta2])
    # step = pm.Metropolis()
# we use a shared variable from theano to feed the x values into the model
# this is need for PPC
# when using the model for predictions we can set this shared variable to x_test
shared_x = shared(x_train)

# training the model
# model specifications in PyMC3 are wrapped in a with-statement
with pm.Model() as model:
    # Define priors
    x_coeff = pm.Normal('x', 0, sd=20)  # prior for coefficient of x
    intercept = pm.Normal('Intercept', 0, sd=20)  # prior for the intercept
    sigma = pm.HalfCauchy('sigma', beta=10)  # prior for the error term of due to the noise

    reg = intercept + tt.dot(shared_x, x_coeff)
    p = pm.Deterministic("p", invlogit(reg))  # represent the logistic regression relationship

    # Define likelihood
    likelihood = pm.Bernoulli('y', p=p, observed=y_train)

    # Inference!
    trace = pm.sample(1000)  # draw 3000 posterior samples using NUTS sampling

# predicting the unseen y values
# uses posterior predictive checks (PPC)
shared_x.set_value(x_test)  # let's set the shared x to the test dataset
ppc = pm.sample_ppc(trace, model=model, samples=1000)  # performs PPC
predictions = ppc['y'].mean(axis=0)  # compute the mean of the samples draws from each new y

predictions = predictions >= 0.5
# now you can check the error
예제 #14
0
from pandas_datareader import data
import pymc3 as pm
import matplotlib.pyplot as plt
import numpy as np

returns = data.get_data_google('SPY', start='2008-5-1',
                               end='2009-12-1')['Close'].pct_change()
print(returns)

with pm.Model() as sp500_model:
    nu = pm.Exponential('nu', 1. / 10, testval=5.)
    sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
    s = pm.GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = pm.Deterministic('volatility_process',
                                          pm.math.exp(-2 * s))
    r = pm.StudentT('r', nu, lam=volatility_process, observed=returns)

with sp500_model:
    trace = pm.sample(2000)
pm.traceplot(trace, [nu, sigma])

fig, ax = plt.subplots(figsize=(15, 8))
returns.plot(ax=ax)
ax.plot(returns.index, 1 / np.exp(trace['s', ::5].T), 'r', alpha=.03)
ax.set(title='volatility_process', xlabel='time', ylabel='volatility')
ax.legend(['S&P500', 'stochastic volatility process'])
plt.show()
예제 #15
0
def main(args):
    print("Begin", file=sys.stderr)
    check_args(args)
    #######################################
    ##
    ## Import Data, remove missing guides
    ##
    #######################################
    print("Import Data, remove missing guides", file=sys.stderr)
    data = pd.read_table(args.input_data, sep="\t", header=0)
    hs_zero = data['HS_reads'] > 0
    ls_zero = data['LS_reads'] > 0
    rm_zero = hs_zero & ls_zero
    data = data[rm_zero]
    #######################################
    ##
    ## Downsample larger lib to comparible
    ##
    #######################################
    print("Downsample", file=sys.stderr)
    ## Rescale to floats
    rescale = min(data['LS_reads'].sum(),
                  data['HS_reads'].sum()) / data.loc[:,
                                                     ('HS_reads',
                                                      'LS_reads')].sum(axis=0)
    data.loc[:, ('HS_reads', 'LS_reads')] *= rescale
    ## Sample downsized library
    runif = np.random.uniform(size=data.loc[:, ('HS_reads', 'LS_reads')].shape)
    int_part, sample_p = np.divmod(data.loc[:, ('HS_reads', 'LS_reads')], 1)
    data.loc[:, ('HS_reads', 'LS_reads')] = int_part + (runif < sample_p)
    ## Return as int
    data.loc[:,
             ('HS_reads',
              'LS_reads')] = data.loc[:,
                                      ('HS_reads', 'LS_reads')].astype(int) + 1
    #######################################
    ##
    ## Calc. simple data representations
    ##
    #######################################
    data['beta_mean'] = data['LS_reads'] / (data['LS_reads'] +
                                            data['HS_reads'])
    data['log(LS/HS)'] = np.log(data['LS_reads'] / data['HS_reads'])
    #######################################
    ##
    ## Organize positional information
    ##
    #######################################
    print("Parse positional information", file=sys.stderr)
    ## Line guide effects up to genome
    targ_data = data[ (~data['Coordinates'].str.contains("NT")) &\
                      (~data['Coordinates'].str.contains('CTRL')) &\
                      (~data['Coordinates'].str.contains('FILLER-LV2')) &\
                      (~data['Coordinates'].str.contains('FILLER-SgO')) ]
    if args.no_offsets:
        plus_offsets = [0, 0]
        minus_offsets = [0, 0]
    else:
        plus_offsets = [152, 147]
        minus_offsets = [146, 153]
    uniq_chrom = np.unique(
        [coord.split(':')[0] for coord in targ_data['Coordinates']])
    chrom2idx = OrderedDict([(x, i) for i, x in enumerate(uniq_chrom)])
    idx2chrom = OrderedDict([(i, x) for i, x in enumerate(uniq_chrom)])
    pos_array = np.array([
        (chrom2idx[coord.split(':')[0]],
         int(coord.split(':')[1].split('-')[1]) - plus_offsets[0],
         int(coord.split(':')[1].split('-')[1]) +
         plus_offsets[1]) if coord.split(':')[2] == '+' else
        (chrom2idx[coord.split(':')[0]],
         int(coord.split(':')[1].split('-')[1]) - minus_offsets[0],
         int(coord.split(':')[1].split('-')[1]) + minus_offsets[1])
        for coord in targ_data['Coordinates']
    ])
    ## Get genomic windows
    genome_lims = OrderedDict([
        (idx, (pos_array[pos_array[:, 0] == idx,
                         1].min(), pos_array[pos_array[:, 0] == idx, 2].max()))
        for idx, chrom in idx2chrom.items()
    ])
    sliding_window = [
        (idx,
         np.vstack((np.arange(*lims, args.step_size),
                    np.minimum(
                        np.arange(*lims, args.step_size) + args.window_size,
                        lims[1]))).T) for idx, lims in genome_lims.items()
    ]
    sliding_window = np.concatenate([
        np.concatenate((np.tile([[idx]], (a_window.shape[0], 1)), a_window),
                       axis=1) for idx, a_window in sliding_window
    ])
    sliding_window = sliding_window[[
        np.any(check_overlap_bed(interval, pos_array))
        for interval in sliding_window
    ]]
    ## Get chromosome
    chrom = targ_data['Coordinates'].iloc[0].split(':')[0]
    #######################################
    ##
    ## Process guide data
    ##
    #######################################
    print("Process guide data", file=sys.stderr)
    ovl_array = np.stack([
        check_overlap_bed(guide_interval, sliding_window)
        for guide_interval in pos_array
    ],
                         axis=0).astype(int)
    ovl_array = np.concatenate((np.zeros_like(ovl_array[:, 0:1]), ovl_array),
                               axis=1)
    ovl_dex = pd.DataFrame(
        ovl_array,
        columns=["wnd_{}".format(i) for i in np.arange(ovl_array.shape[1])])

    NT_count = data.loc[(data['Coordinates'].str.contains("NT")
                         | data['Coordinates'].str.contains("CTRL")),
                        ('Coordinates', 'HS_reads', 'LS_reads')].shape[0]
    NT_hold = np.zeros((NT_count, ovl_array.shape[1])).astype(int)
    NT_hold[:, 0] = 1
    NT_dex = pd.DataFrame(
        NT_hold,
        columns=["wnd_{}".format(i) for i in np.arange(ovl_array.shape[1])])

    wind_data = pd.concat(
        (pd.concat(
            (data.loc[(data['Coordinates'].str.contains("NT")
                       | data['Coordinates'].str.contains("CTRL")),
                      ('Coordinates', 'HS_reads', 'LS_reads')].reset_index(
                          drop=True), NT_dex.reset_index(drop=True)),
            axis=1).reset_index(drop=True),
         pd.concat((targ_data.loc[:, ('Coordinates', 'HS_reads',
                                      'LS_reads')].reset_index(drop=True),
                    ovl_dex.reset_index(drop=True)),
                   axis=1).reset_index(drop=True)),
        axis=0,
        ignore_index=True)
    max_idx = max([
        int(item.replace('wnd_', '')) for item in wind_data.columns
        if 'wnd' in item
    ])
    #######################################
    ##
    ## Call peaks on chunk
    ##
    #######################################
    print("Call peaks", file=sys.stderr)
    chunk_size = math.ceil(float(max_idx) / args.job_range)
    start_idx = 1 + (chunk_size * args.job_index)
    end_idx = start_idx + chunk_size

    peak_calls = []
    diff_hdr = []

    for i in range(start_idx, min(max_idx, end_idx)):
        print("Starting wnd_{}".format(i))
        group0 = (wind_data['wnd_0'] == 1).astype(int)
        group1 = (wind_data['wnd_{}'.format(i)] == 1).astype(int)
        slicer = np.vstack([group0, group1]).T
        use_data = wind_data[np.sum(slicer, axis=1) == 1]
        slicer = slicer[np.sum(slicer, axis=1) == 1]
        slicer = np.argmax(slicer, axis=1)
        e_mean = np.mean(np.log(wind_data['LS_reads'] / wind_data['HS_reads']))
        e_sd = np.std(np.log(wind_data['LS_reads'] / wind_data['HS_reads']))
        ct_mean = np.mean(wind_data['LS_reads'].values +
                          wind_data['HS_reads'].values)
        ct_sd = np.std(wind_data['LS_reads'].values +
                       wind_data['HS_reads'].values)
        g_var = (ct_sd**2) - ct_mean
        if g_var <= 0:
            g_sigma = ct_sd
            print(
                "Warning! Count data is underdispersed, results may be inaccurate."
            )
        else:
            g_sigma = np.sqrt(g_var)

        with pm.Model() as model:
            g = pm.Gamma('guide_intensity',
                         mu=ct_mean,
                         sigma=g_sigma,
                         shape=slicer.shape[0])

            e = pm.Normal('enhancer_activity', mu=e_mean, sigma=e_sd, shape=2)
            p = pm.Deterministic('bin_bias', tt.nnet.sigmoid(e))

            l = pm.Deterministic('low_bin_theta', g * p[slicer])
            h = pm.Deterministic('high_bin_theta', g * (1 - p[slicer]))

            diff = pm.Deterministic('enhancer_boost', e[1] - e[0])

            l_ct = pm.Poisson('low_reads', mu=l, observed=use_data['LS_reads'])
            h_ct = pm.Poisson('high_reads',
                              mu=h,
                              observed=use_data['HS_reads'])

        with model:
            trace = pm.sample(1000, tune=4000, cores=8)

        hdr = pm.stats.hpd(trace['enhancer_boost'], alpha=0.001)
        thresh = [-args.rope_threshold, args.rope_threshold]
        the_call = check_overlap(np.array(thresh), np.expand_dims(hdr,
                                                                  axis=0))[0]

        peak_calls.append(the_call)
        diff_hdr.append(hdr)

    with open(args.output_data, 'w') as f:
        for i, j in enumerate(range(start_idx, min(max_idx, end_idx))):
            peak_position = sliding_window[j - 1]
            region_hdr = diff_hdr[i]
            region_call = peak_calls[i] == False
            interval_info = [
                idx2chrom[peak_position[0]], peak_position[1],
                peak_position[2], "{},{}".format(*region_hdr), region_call, '.'
            ]
            print("{}\t{}\t{}\t{}\t{}\t{}".format(*interval_info), file=f)

    print("Done.", file=sys.stderr)
예제 #16
0
#Setup inversion
pi = 3.14
Niter = 300000
conds_mod = 3.5

path_results = '../../../results/'
with pm.Model() as model:
    gpsconst = pm.Uniform('gpsconst', lower=-15, upper=15)
    A_mod = pm.Uniform('A_mod', lower=0, upper=1000)
    B_mod = pm.Uniform('B_mod', lower=0, upper=1000)
    E_mod = pm.Uniform('E_mod', lower=0, upper=1000)

    Vd_exp = pm.Uniform('Vd_exp', lower=8, upper=11)
    Vs_exp = pm.Uniform('Vs_exp', lower=8, upper=12)
    kd_exp = pm.Uniform('kd_exp', lower=7, upper=10)
    Vd_mod = pm.Deterministic('Vd_mod', 10**Vd_exp)
    Vs_mod = pm.Deterministic('Vs_mod', 10**Vs_exp)
    #ratio = pm.Uniform('ratio',lower = 0.1,upper = 5e+3)
    kd_mod = pm.Deterministic('kd_mod', 10**kd_exp)
    pspd_mod = pm.Uniform('pspd_mod', lower=1e+5, upper=1e+7)
    #conds_mod = pm.Uniform('conds_mod',lower=1,upper=10)
    condd_mod = pm.Uniform('condd_mod', lower=1, upper=30)
    dsh_mod = pm.Normal('dsh_mod', mu=dsh, sigma=dshErr)
    xsh_mod = pm.Normal('xsh_mod', mu=xsh, sigma=xshErr)
    ysh_mod = pm.Normal('ysh_mod', mu=ysh, sigma=yshErr)
    coeffx = cs * dsh_mod * (x - xsh_mod) / (dsh_mod**2 + (x - xsh_mod)**2 +
                                             (y - ysh_mod)**2)**(5. /
                                                                 2) * Vd_mod
    coeffy = cs * dsh_mod * (y - ysh_mod) / (dsh_mod**2 + (x - xsh_mod)**2 +
                                             (y - ysh_mod)**2)**(5. /
                                                                 2) * Vd_mod
예제 #17
0
# pm.traceplot(trace1)
# plt.show()
with pm.Model() as unpooled_model:
    # define priors
    alpha = pm.HalfCauchy('alpha', 10, testval=.9)

    beta = pm.Normal('beta', 0, 100, shape=companiesABC, testval=-3.)
    # beta1 = pm.Normal('beta1', 0, 10, shape=companiesABC, testval=.3)
    # beta2 = pm.Normal('beta2', 0, 100, testval=0.01)
    # beta3 = pm.Normal('beta3', 0, 100)

    theta = pm.Normal('theta', 0, 100, shape=companiesABC)
    theta1 = pm.Normal('theta1', 0, 20, shape=companiesABC)
    beta1 = theta[companyABC] + theta1[companyABC] * x_shared1
    # mu = tt.exp(beta[companyABC] + beta1[companyABC]*elec_year + beta2*elec_tem)
    beta_mu = pm.Deterministic(
        'beta_mu', tt.exp(beta[companyABC] + beta1[companyABC] * x_shared))

    # Observed_pred = pm.Weibull("Observed_pred",  alpha=mu, beta=sigma, shape=elec_faults.shape)  # 观测值
    Observed = pm.Weibull("Observed",
                          alpha=alpha,
                          beta=beta_mu,
                          observed=y_shared)  # 观测值

    start = pm.find_MAP()
    # step = pm.Slice([beta1, u])
    trace2 = pm.sample(2000, start=start)
chain2 = trace2[1000:]
varnames1 = ['alpha', 'beta_mu']
# varnames2 = ['beta', 'beta1', 'beta2', 'alpha', 'beta3']
# pm.plot_posterior(chain2, varnames2, ref_val=0)
pm.traceplot(chain2)
예제 #18
0
def density_bhm_harmonic_dht(data,
                             omega,
                             use_mcmc=True,
                             nchains=2,
                             ncores=2,
                             tune=1500):
    # Full model
    tdays = shared(data['tdays'])
    nparams = 6
    nt = data['n_times']
    nomega = len(omega)

    with pm.Model() as rho_model:
        ###
        # Create priors for each of our means
        BoundedNormal = pm.Bound(pm.Normal, lower=0.0)
        aa = pm.Normal('aa', mu=0, sd=2, shape=4)
        # Order the mid-points
        aa_mid = pm.Normal('aa_mid',
                           mu=np.array([1, 2]),
                           sd=np.array([0.25, 0.25]),
                           shape=2,
                           transform=pm.distributions.transforms.ordered,
                           testval=np.array([0.5, 1.2]))

        Aa = pm.Normal('Aa', mu=0, sd=1, shape=(nomega, nparams))
        Ba = pm.Normal('Ba', mu=0, sd=1, shape=(nomega, nparams))

        mu_beta_0 = pm.Deterministic(
            'mu_beta_0', harmonic_beta(aa[0], Aa[:, 0], Ba[:, 0], omega,
                                       tdays))
        mu_beta_1 = pm.Deterministic(
            'mu_beta_1', harmonic_beta(aa[1], Aa[:, 1], Ba[:, 1], omega,
                                       tdays))
        mu_beta_2 = pm.Deterministic(
            'mu_beta_2',
            harmonic_beta(aa_mid[0], Aa[:, 2], Ba[:, 2], omega, tdays))
        mu_beta_3 = pm.Deterministic(
            'mu_beta_3', harmonic_beta(aa[2], Aa[:, 3], Ba[:, 3], omega,
                                       tdays))
        mu_beta_4 = pm.Deterministic(
            'mu_beta_4',
            harmonic_beta(aa_mid[1], Aa[:, 4], Ba[:, 4], omega, tdays))
        mu_beta_5 = pm.Deterministic(
            'mu_beta_5', harmonic_beta(aa[3], Aa[:, 5], Ba[:, 5], omega,
                                       tdays))

        # Half-normal priors
        #sigma_beta = pm.HalfNormal('sigma_beta', sd=1.0, shape=(nparams,))
        #sigma_curve = pm.HalfNormal('sigma_curve', sd=2.0 )

        # Inverse Gamma priors
        sigma_beta = pm.InverseGamma('sigma_beta', 1, 1, shape=(nparams, ))
        sigma_curve = pm.InverseGamma('sigma_curve', 1, 1)

        beta_0 = pm.Normal('beta_0', mu=mu_beta_0, sd=sigma_beta[0], shape=nt)
        beta_1 = BoundedNormal('beta_1',
                               mu=mu_beta_1,
                               sd=sigma_beta[1],
                               shape=nt)
        beta_3 = BoundedNormal('beta_3',
                               mu=mu_beta_3,
                               sd=sigma_beta[3],
                               shape=nt)
        beta_5 = BoundedNormal('beta_5',
                               mu=mu_beta_5,
                               sd=sigma_beta[5],
                               shape=nt)

        # This is a trick for ordering along the last axis of a multivariate distribution
        # (it seems to work...)
        beta_mid = BoundedNormal('beta_mid',
                                 mu=tt.stack([mu_beta_2, mu_beta_4]).T,
                                 sd=tt.stack([sigma_beta[2], sigma_beta[4]]).T,
                                 shape=(nt, 2),
                                 transform=pm.distributions.transforms.ordered)

        beta_s = [
            beta_0,
            beta_1,
            beta_mid[..., 0],
            beta_3,
            beta_mid[..., 1],
            beta_5,
        ]

        ###
        # Generate the likelihood function using the deterministic variable as the mean
        mu_x = double_tanh_pm(beta_s, data['timeidx'], data['z'])

        # shape parameter not requires as shape is specified in the priors...
        rho_out = pm.Normal('rho',
                            mu=mu_x,
                            sd=sigma_curve,
                            observed=data['rho'])

        ###
        # Inference step
        #trace = pm.sample(500)
        if use_mcmc:
            trace = pm.sample(500,
                              tune=tune,
                              step=pm.NUTS(),
                              cores=ncores,
                              chains=nchains)
        else:
            # Use variational inference
            inference = pm.ADVI()
            approx = pm.fit(n=20000, method=inference)
            trace = approx.sample(draws=500)

    return trace, rho_model, tdays
outcome=df['deposit']
data = df[['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 
           'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'euribor3m']]
data['outcome'] = outcome
data.corr()['outcome'].sort_values(ascending=False)
#%%  this is really slow convergence
y_simple = data['outcome']
x_n = 'duration' 
x_0 = data[x_n].values
x_c = x_0 - x_0.mean()

with pm.Model() as model_simple:
    α = pm.Normal('α', mu=0, sd=10)
    β = pm.Normal('β', mu=0, sd=10)
    μ = α + pm.math.dot(x_c, β)    
    θ = pm.Deterministic('θ', pm.math.sigmoid(μ))
    bd = pm.Deterministic('bd', -α/β)
    y_1 = pm.Bernoulli('y_1', p=θ, observed=y_simple)
    trace_simple = pm.sample(1000, tune=1000)
#%% this is slow convergence
theta = trace_simple['θ'].mean(axis=0)
idx = np.argsort(x_c)
plt.plot(x_c[idx], theta[idx], color='C2', lw=3)
plt.vlines(trace_simple['bd'].mean(), 0, 1, color='k')
bd_hpd = az.hpd(trace_simple['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='k', alpha=0.5)
plt.scatter(x_c, np.random.normal(y_simple, 0.02),
            marker='.', color=[f'C{x}' for x in y_simple])
az.plot_hpd(x_c, trace_simple['θ'], color='C2')
plt.xlabel(x_n)
plt.ylabel('θ', rotation=0)
    p2 = 1 - p1
    p = tt.stack([p1, p2])
    assignment = pm.Categorical("assignment", p,
                                shape = data.shape[0],
                                testval = np.random.randint(0, 2, data.shape[0]))

print("prior assignment, with p = %.2f:" % p1.tag.test_value)
print(assignment.tag.test_value[:10])

with model:
    sds = pm.Uniform("sds", 0, 100, shape =2)
    centers = pm.Normal("centers",
                        mu = np.array([120, 190]),
                        sd = np.array([10, 10]),
                        shape = 2)
    center_i = pm.Deterministic('center_i', centers[assignment])
    sd_i = pm.Deterministic('sd_i', sds[assignment])

    # and to combine it with observations:
    observations = pm.Normal("obs", mu=center_i, sd=sd_i, observed=data)

print("Random assignments: ", assignment.tag.test_value[:4], "...")
print("Assigned center: ", center_i.tag.test_value[:4], "...")
print("Assigned standard deviation: ", sd_i.tag.test_value[:4])

with model:
    step1 = pm.Metropolis(vars=[p, sds, centers])
    step2 = pm.ElemwiseCategorical(vars=[assignment])
    trace = pm.sample(25000, step=[step1, step2])

figsize(12.5, 9)
import pymc3 as pm
from numpy import ones, array

# Samples for each dose level
n = 5 * ones(4, dtype=int)
# Log-dose
dose = array([-.86, -.3, -.05, .73])

with pm.Model() as model:

    # Logit-linear model parameters
    alpha = pm.Normal('alpha', 0, sd=100.)
    beta = pm.Normal('beta', 0, sd=1.)

    # Calculate probabilities of death
    theta = pm.Deterministic('theta', pm.math.invlogit(alpha + beta * dose))

    # Data likelihood
    deaths = pm.Binomial('deaths', n=n, p=theta, observed=[0, 1, 3, 5])


def run(n=1000):
    if n == "short":
        n = 50
    with model:
        pm.sample(n, tune=1000)


if __name__ == '__main__':
    run()
예제 #22
0
    def learn_bayesian_linear_model(self,
                                    encoded_plans,
                                    prior_weights,
                                    number_of_dimensions,
                                    sd=1,
                                    sampling_count=2000,
                                    num_chains=2,
                                    bias_preference=0.0):

        #the encoded plans contains a list of [<encoding>,<rating>]
        input_dataset = np.array([x[0] for x in encoded_plans], dtype=np.float)
        output_dataset = np.array([x[1] for x in encoded_plans],
                                  dtype=np.float)

        #TODO USE SAME MODEL AND TEST ON DUMMY DATA WITH CLEARLY KNOWN FUNCTION
        # maybe it is ok that it does not converge, but works with metropolis sampling. Expected? in early stages
        bias_preference = tt.constant(bias_preference)
        #todo Make bias A  learnable parameter
        with pm.Model() as linear_model:
            # Intercept
            # alpha = pm.Normal('alpha', mu=0.5, sd=sd)
            alpha = pm.Deterministic('alpha', bias_preference)

            cov = np.diag(np.full((number_of_dimensions, ),
                                  sd))  #for both mu and beta (slope)
            #todo note: may consider making mu and cov as parameters sampled from distributions too
            # mu = pm.MvNormal('mu', mu=prior_weights, cov=cov, shape=(number_of_dimensions,))

            # Slope
            prior_weights = np.random.rand(number_of_dimensions)
            betas = pm.MvNormal('betas',
                                mu=prior_weights,
                                cov=cov,
                                shape=(number_of_dimensions, ))

            # Standard deviation

            sigma = pm.HalfNormal('sigma', sd=sd)
            # sigma = sd #seems to work better

            # Estimate of mean
            mean = alpha + tt.dot(input_dataset, betas)

            # Observed values
            Y_obs = pm.Normal('Y_obs',
                              mu=mean,
                              sd=sigma,
                              observed=output_dataset)

            # Sampler
            step = pm.NUTS()
            # step = pm.Metropolis()
            # step = pm.HamiltonianMC()

            # Posterior distribution
            linear_params_trace = pm.sample(
                sampling_count, step, chains=num_chains, cores=num_chains
            )  #todo NOTE do not add tuning if deterministic. Fails spectacularly, not it's intended use.
        #end with
        # todo look into the aplha values that were sampled, because they didn't appear in the plot
        self.full_param_trace = linear_params_trace  # we only take the last 2000, and assume it is after sufficient mixing and good values.
        self.linear_params_values = linear_params_trace[
            -2000:]  # we only take the last 2000, and assume it is after sufficient mixing and good values.
        self.set_normal_distr_params()
예제 #23
0
def SEIR_with_extensions(
    new_cases_obs,
    change_points_list,
    date_begin_simulation,
    num_days_sim,
    diff_data_sim,
    N,
    priors_dict=None,
    with_random_walk=True,
):
    """
        This model includes 3 extensions to the `SIR_model_with_change_points`:
            1.  The SIR model now includes a incubation period during which infected
                people are not infectious, in the spirit of an SEIR model.
                In contrast to the SEIR model, the length of incubation period is not
                exponentially distributed but has a lognormal distribution.
            2.  People that are infectious are observed with a delay that is now
                lognormal distributed. In the `SIR_model_with_change_points` we assume
                a fixed delay between infection and observation.
            3.  `lambda_t` has an additive term given by a Gaussian random walk.
                Thereby, we want to fit any deviation in `lambda_t` that is not
                captured by the change points. If the change points are wisely
                chosen, and the rest of the model captures the dynamics well, one
                would expect that the amplitude of the random walk is small.
                In this case, the posterior distribution of `sigma_random_walk`
                will be small.

        Parameters
        ----------
        new_cases_obs : list or array
            Timeseries (day over day) of newly reported cases (not the total number)

        change_points_list : list of dicts
            List of dictionaries, each corresponding to one change point

            Each dict can have the following key-value pairs. If a pair is not provided,
            the respective default is used.
                * pr_mean_date_begin_transient: datetime.datetime, NO default
                * pr_median_lambda:             float, default: 0.4
                * pr_sigma_lambda:              float, default: 0.5
                * pr_sigma_begin_transient:     float, default: 3
                * pr_median_transient_len:      float, default: 3
                * pr_sigma_transient_len:       float, default: 0.3

        date_begin_simulation: datetime.datetime.
            The begin of the simulation data

        num_days_sim : integer
            Number of days to forecast into the future

        diff_data_sim : integer
            Number of days that the simulation-begin predates the first data point in
            `new_cases_obs`. This is necessary so the model can fit the reporting delay.
            Set this parameter to a value larger than what you expect to find for
            the reporting delay.

        N : number
            The population size. For Germany, we used 83e6

        priors_dict : dict
            Dictionary of the prior assumptions

            Possible key-value pairs (and default values) are:
                * pr_beta_I_begin :               number, default: 100
                * pr_beta_E_begin_scale :         number, default: 10
                * pr_median_lambda_0 :            number, default: 2
                * pr_sigma_lambda_0 :             number, default: 0.7
                * pr_median_mu :                  number, default: 1/3
                * pr_sigma_mu :                   number, default: 0.3
                * pr_median_delay :               number, default: 5
                * pr_sigma_delay :                number, default: 0.2
                * scale_delay :                   number, default: 0.3
                * pr_beta_sigma_obs :             number, default: 10
                * pr_sigma_random_walk :          number, default: 0.05
                * pr_mean_median_incubation :     number, default: 5
                    https://www.ncbi.nlm.nih.gov/pubmed/32150748
                    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7014672/
                    about -1 day compared to the sources day because persons likely become infectious before.
                * pr_sigma_median_incubation :    number, default: 1
                    The error from the sources above is smaller, but as the -1 day is a very rough estimate, we take here a larger error.
                * sigma_incubation :              number, default: 0.418
                    https://www.ncbi.nlm.nih.gov/pubmed/32150748

        with_random_walk: boolean
            whether to add a Gaussian walk to `lambda_t`. computationolly expensive

        Returns
        -------
        : pymc3.Model
            Returns an instance of pymc3 model with the change points

    """
    if priors_dict is None:
        priors_dict = dict()

    default_priors = dict(
        pr_beta_I_begin=100,
        pr_beta_E_begin_scale=10,
        pr_median_lambda_0=2,
        pr_sigma_lambda_0=0.7,
        pr_median_mu=1 / 3,
        pr_sigma_mu=0.3,
        pr_median_delay=5,
        pr_sigma_delay=0.2,
        scale_delay=0.3,
        pr_beta_sigma_obs=10,
        pr_sigma_random_walk=0.05,
        pr_mean_median_incubation=5,
        # https://www.ncbi.nlm.nih.gov/pubmed/32150748
        # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7014672/
        # about -1 day because persons likely become infectious before
        pr_sigma_median_incubation=1,
        sigma_incubation=0.418,
        #  https://www.ncbi.nlm.nih.gov/pubmed/32150748
    )
    if not with_random_walk:
        del default_priors["pr_sigma_random_walk"]

    default_priors_change_points = dict(
        pr_median_lambda=default_priors["pr_median_lambda_0"],
        pr_sigma_lambda=default_priors["pr_sigma_lambda_0"],
        pr_sigma_date_begin_transient=3,
        pr_median_transient_len=3,
        pr_sigma_transient_len=0.3,
        pr_mean_date_begin_transient=None,
    )

    for prior_name in priors_dict.keys():
        if prior_name not in default_priors:
            raise RuntimeError(f"Prior with name {prior_name} not known")
    for change_point in change_points_list:
        for prior_name in change_point.keys():
            if prior_name not in default_priors_change_points:
                raise RuntimeError(f"Prior with name {prior_name} not known")

    for prior_name, value in default_priors.items():
        if prior_name not in priors_dict:
            priors_dict[prior_name] = value
            print(f"{prior_name} was set to default value {value}")
    for prior_name, value in default_priors_change_points.items():
        for i_cp, change_point in enumerate(change_points_list):
            if prior_name not in change_point:
                change_point[prior_name] = value
                print(
                    f"{prior_name} of change point {i_cp} was set to default value {value}"
                )

    if (
        diff_data_sim
        < priors_dict["pr_median_delay"]
        + 3 * priors_dict["pr_median_delay"] * priors_dict["pr_sigma_delay"]
    ):
        raise RuntimeError("diff_data_sim is to small compared to the prior delay")
    if num_days_sim < len(new_cases_obs) + diff_data_sim:
        raise RuntimeError(
            "Simulation ends before the end of the data. Increase num_days_sim."
        )

    with pm.Model() as model:
        # all pm functions now apply on the model instance
        # true cases at begin of loaded data but we do not know the real number
        I_begin = pm.HalfCauchy(name="I_begin", beta=priors_dict["pr_beta_I_begin"])
        E_begin_scale = pm.HalfCauchy(
            name="E_begin_scale", beta=priors_dict["pr_beta_E_begin_scale"]
        )
        new_E_begin = pm.HalfCauchy("E_begin", beta=E_begin_scale, shape=9)

        # fraction of people that are newly infected each day
        lambda_list = []
        lambda_list.append(
            pm.Lognormal(
                name="lambda_0",
                mu=np.log(priors_dict["pr_median_lambda_0"]),
                sigma=priors_dict["pr_sigma_lambda_0"],
            )
        )
        for i, cp in enumerate(change_points_list):
            lambda_list.append(
                pm.Lognormal(
                    name="lambda_{}".format(i + 1),
                    mu=np.log(cp["pr_median_lambda"]),
                    sigma=cp["pr_sigma_lambda"],
                )
            )

        # set the start dates of the two periods
        tr_begin_list = []
        dt_before = None
        for i, cp in enumerate(change_points_list):
            date_begin_transient = cp["pr_mean_date_begin_transient"]
            if dt_before is not None and dt_before > date_begin_transient:
                raise RuntimeError("Dates of change points are not temporally ordered")
            prior = (date_begin_transient - date_begin_simulation).days
            tr_begin = pm.Normal(
                name="transient_begin_{}".format(i),
                mu=prior,
                sigma=cp["pr_sigma_date_begin_transient"],
            )
            tr_begin_list.append(tr_begin)
            dt_before = date_begin_transient

        # transient time
        tr_len_list = []
        for i, cp in enumerate(change_points_list):
            transient_len = pm.Lognormal(
                name="transient_len_{}".format(i),
                mu=np.log(cp["pr_median_transient_len"]),
                sigma=cp["pr_sigma_transient_len"],
            )
            tr_len_list.append(transient_len)

        # build the time-dependent spreading rate
        if with_random_walk:
            sigma_random_walk = pm.HalfNormal(
                name="sigma_random_walk", sigma=priors_dict["pr_sigma_random_walk"]
            )
            lambda_t_random_walk = pm.distributions.timeseries.GaussianRandomWalk(
                name="lambda_t_random_walk",
                mu=0,
                sigma=sigma_random_walk,
                shape=num_days_sim,
                init=pm.Normal.dist(sigma=priors_dict["pr_sigma_random_walk"]),
            )
            lambda_base = lambda_t_random_walk + lambda_list[0]
        else:
            lambda_base = lambda_list[0] * tt.ones(num_days_sim)

        lambda_t_list = [lambda_base]
        lambda_step_before = lambda_list[0]
        for tr_begin, transient_len, lambda_step in zip(
            tr_begin_list, tr_len_list, lambda_list[1:]
        ):
            lambda_t = mh.smooth_step_function(
                start_val=0,
                end_val=1,
                t_begin=tr_begin,
                t_end=tr_begin + transient_len,
                t_total=num_days_sim,
            ) * (lambda_step - lambda_step_before)
            lambda_step_before = lambda_step
            lambda_t_list.append(lambda_t)

        lambda_t = sum(lambda_t_list)

        # fraction of people that recover each day, recovery rate mu
        mu = pm.Lognormal(
            name="mu",
            mu=np.log(priors_dict["pr_median_mu"]),
            sigma=priors_dict["pr_sigma_mu"],
        )

        # delay in days between contracting the disease and being recorded
        delay = pm.Lognormal(
            name="delay",
            mu=np.log(priors_dict["pr_median_delay"]),
            sigma=priors_dict["pr_sigma_delay"],
        )

        # prior of the error of observed cases
        sigma_obs = pm.HalfCauchy(
            name="sigma_obs", beta=priors_dict["pr_beta_sigma_obs"]
        )

        # -------------------------------------------------------------------------- #
        # training the model with loaded data provided as argument
        # -------------------------------------------------------------------------- #

        median_incubation = pm.Normal(
            name="median_incubation",
            mu=priors_dict["pr_mean_median_incubation"],
            sigma=priors_dict["pr_sigma_median_incubation"],
        )
        # sources: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7014672/
        #

        S_begin = N - I_begin
        S_t, new_E_t, I_t, new_I_t = _SEIR_model_with_delay(
            lambda_t=lambda_t,
            mu=mu,
            S_begin=S_begin,
            new_E_begin=new_E_begin,
            I_begin=I_begin,
            N=N,
            median_incubation=median_incubation,
            sigma_incubation=0.418,
            # https://www.ncbi.nlm.nih.gov/pubmed/32150748
        )

        new_cases_inferred = mh.delay_cases_lognormal(
            input_arr=new_I_t,
            len_input_arr=num_days_sim,
            len_output_arr=num_days_sim - diff_data_sim,
            median_delay=delay,
            scale_delay=priors_dict["scale_delay"],
            delay_betw_input_output=diff_data_sim,
        )
        num_days_data = new_cases_obs.shape[-1]

        # likelihood of the model:
        # observed cases are distributed following studentT around the model.
        # we want to approximate a Poisson distribution of new cases.
        # we choose nu=4 to get heavy tails and robustness to outliers.
        # https://www.jstor.org/stable/2290063
        pm.StudentT(
            name="_new_cases_studentT",
            nu=4,
            mu=new_cases_inferred[:num_days_data],
            sigma=tt.abs_(new_cases_inferred[:num_days_data] + 1) ** 0.5
            * sigma_obs,  # +1 and tt.abs to avoid nans
            observed=new_cases_obs,
        )

        # add these observables to the model so we can extract a time series of them
        # later via e.g. `model.trace['lambda_t']`
        pm.Deterministic("lambda_t", lambda_t)
        pm.Deterministic("new_cases", new_cases_inferred)

    return model
예제 #24
0
    log_like1 = - 0.5 * n * tt.log(2 * np.pi) \
                - 0.5 * tt.log(dsigma) \
                - 0.5 * (x - mu1).tt.dot(isigma).dot(x - mu1)
    log_like2 = - 0.5 * n * tt.log(2 * np.pi) \
                - 0.5 * tt.log(dsigma) \
                - 0.5 * (x - mu2).T.dot(isigma).dot(x - mu2)
    return tt.log(w1 * tt.exp(log_like1) + w2 * tt.exp(log_like2))

with pm.Model() as ATMIP_test:
    X = pm.Uniform('X',
                   shape=n,
                   lower=-2. * np.ones_like(mu1),
                   upper=2. * np.ones_like(mu1),
                   testval=-1. * np.ones_like(mu1),
                   transform=None)
    like = pm.Deterministic('like', two_gaussians(X))
    llk = pm.Potential('like', like)

with ATMIP_test:
    step = atmcmc.ATMCMC(n_chains=n_chains, tune_interval=tune_interval,
                         likelihood_name=ATMIP_test.deterministics[0].name)

trcs = atmcmc.ATMIP_sample(
                        n_steps=n_steps,
                        step=step,
                        njobs=njobs,
                        progressbar=True,
                        trace=test_folder,
                        model=ATMIP_test)

pm.summary(trcs)
예제 #25
0
with pm.Model() as unpooled_model:
    # define priors
    alpha = pm.HalfCauchy('alpha', 10, testval=.9)

    switch = pm.DiscreteUniform('swich',
                                lower=x_shared.min() + 3,
                                upper=x_shared.max() - 0.5)
    early_rate = pm.Normal('early_rate', 0, 100)
    late_rate = pm.Normal('late_rate', 0, 100)
    beta1 = pm.math.switch(x_shared <= switch, early_rate, late_rate)
    beta = pm.Normal('beta', 0, 100, shape=companiesABC)
    u = pm.Normal('u', 0, 0.0001)

    # mu = tt.exp(beta[companyABC] + beta1[companyABC]*elec_year + beta2*elec_tem)
    beta_mu = pm.Deterministic('beta_mu',
                               tt.exp(beta[Num_shared] + beta1 * x_shared + u))

    # Observed_pred = pm.Weibull("Observed_pred",  alpha=mu, beta=sigma, shape=elec_faults.shape)  # 观测值
    Observed = pm.Weibull("Observed",
                          alpha=alpha,
                          beta=beta_mu,
                          observed=train_faults)  # 观测值

    start = pm.find_MAP()
    # step = pm.Slice([beta1, u])
    trace2 = pm.sample(3000, start=start, tune=1000)
chain2 = trace2[1000:]
varnames1 = ['alpha', 'beta_mu', 'swich']
print(pm.df_summary(trace2, varnames1))
varnames2 = ['beta', 'early_rate', 'late_rate', 'alpha', 'u']
# pm.plot_posterior(chain2, varnames2, ref_val=0)
print("Obs from Site B: ", observations_B[:30], "...")

# In[25]:

print(np.mean(observations_A))
print(np.mean(observations_B))

# In[26]:

# Set up the pymc3 model. Again assume Uniform priors for p_A and p_B.
with pm.Model() as model:
    p_A = pm.Uniform("p_A", 0, 1)
    p_B = pm.Uniform("p_B", 0, 1)

    # Define the deterministic delta function. This is our unknown of interest.
    delta = pm.Deterministic("delta", p_A - p_B)

    # Set of observations, in this case we have two observation datasets.
    obs_A = pm.Bernoulli("obs_A", p_A, observed=observations_A)
    obs_B = pm.Bernoulli("obs_B", p_B, observed=observations_B)

    # To be explained in chapter 3.
    step = pm.Metropolis()
    trace = pm.sample(20000, step=step)
    burned_trace = trace[1000:]

# Below we plot the posterior distributions for the three unknowns:

# In[27]:

p_A_samples = burned_trace["p_A"]
예제 #27
0
    def build_model(self):

        base_numbers = self.data.n_safe.unique()
        choices = self.data.chose_risky.values

        mean_safe = np.mean(np.log(base_numbers))
        std_safe = np.std(np.log(base_numbers))

        self.coords = {
            "subject": self.unique_subjects,
            "presentation": ['first', 'second'],
        }

        with pm.Model(coords=self.coords) as self.model:

            inputs = self._get_model_input()
            for key, value in inputs.items():
                inputs[key] = pm.Data(key, value)

            # Hyperpriors for group nodes
            risky_prior_mu_mu = pm.HalfNormal("risky_prior_mu_mu",
                                              sigma=np.log(20.))
            risky_prior_mu_sd = pm.HalfCauchy('risky_prior_mu_sd', .5)
            risky_prior_mu_offset = pm.Normal(
                'risky_prior_mu_offset', mu=0, sd=1,
                dims='subject')  #shape=n_subjects)
            risky_prior_mu = pm.Deterministic(
                'risky_prior_mu',
                risky_prior_mu_mu + risky_prior_mu_sd * risky_prior_mu_offset,
                dims='subject')

            risky_prior_sd_mu = pm.HalfNormal("risky_prior_sd_mu", sigma=1.25)
            risky_prior_sd_sd = pm.HalfCauchy('risky_prior_sd_sd', .5)

            risky_prior_sd = pm.TruncatedNormal('risky_prior_sd',
                                                mu=risky_prior_sd_mu,
                                                sigma=risky_prior_sd_sd,
                                                lower=0,
                                                dims='subject')

            safe_prior_mu = mean_safe
            safe_prior_sd = std_safe

            # ix0 = first presented, ix1=later presented
            evidence_sd_mu = pm.HalfNormal("evidence_sd_mu",
                                           sigma=1.,
                                           dims=('presentation'))
            evidence_sd_sd = pm.HalfCauchy("evidence_sd_sd",
                                           1.,
                                           dims=('presentation'))
            evidence_sd = pm.TruncatedNormal('evidence_sd',
                                             mu=evidence_sd_mu,
                                             sigma=evidence_sd_sd,
                                             lower=0,
                                             dims=('subject', 'presentation'))

            post_risky_mu, post_risky_sd = get_posterior(
                risky_prior_mu[inputs['subject_ix']],
                risky_prior_sd[inputs['subject_ix']], inputs['risky_mu'],
                evidence_sd[inputs['subject_ix'], inputs['risky_ix']])

            post_safe_mu, post_safe_sd = get_posterior(
                safe_prior_mu, safe_prior_sd, inputs['safe_mu'],
                evidence_sd[inputs['subject_ix'], inputs['safe_ix']])

            diff_mu, diff_sd = get_diff_dist(post_risky_mu, post_risky_sd,
                                             post_safe_mu, post_safe_sd)

            p = pm.Deterministic(
                'p', cumulative_normal(tt.log(.55), diff_mu, diff_sd))

            ll = pm.Bernoulli('ll_bernoulli', p=p, observed=choices)
예제 #28
0
    def recreate_mod(self):
        '''
        '''
        with pm.Model() as self.model:

            # Parameters for the stellar properties
            mean = pm.Normal("mean", mu=self.soln['mean'], sd=10.0)
            u_star = xo.distributions.QuadLimbDark("u_star")
            # Stellar parameters from Huang et al (2018)
            M_star_huang = 1.094, 0.039
            R_star_huang = 1.10, 0.023
            BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=3)

            if self.do_even_odd == False:
                logP = pm.Normal("logP", mu=self.soln['logP'], sd=1)
                t0 = pm.Normal("t0", mu=self.soln['t0'], sd=1)
                period = pm.Deterministic("period", tt.exp(logP))
                m_star = BoundedNormal("m_star", mu=self.soln['m_star'], sd=M_star_huang[1])
                r_star = BoundedNormal("r_star", mu=self.soln['r_star'], sd=R_star_huang[1])
                b = pm.Uniform("b", lower=0, upper=0.9, testval=self.soln['b'])
                BoundedNormal_logr = pm.Bound(pm.Normal, lower=-5, upper=0)
                logr = BoundedNormal_logr('logr', mu=self.soln['logr'], sd=1.0)
                r_pl = pm.Deterministic("r_pl", tt.exp(logr))
                ror = pm.Deterministic("ror", r_pl / r_star)
                BoundedBeta = pm.Bound(pm.Beta, lower=0, upper=1-1e-5)
                ecc = BoundedBeta("ecc", alpha=0.867, beta=3.03, testval=self.soln['ecc'])
                omega = xo.distributions.Angle("omega")

            # Even-Odd Test
            else:
                logP_even = pm.Normal("logP_even", mu=self.soln['logP_even'], sd=1)
                t0_even = pm.Normal("t0_even", mu=self.soln['t0_even'], sd=1)
                period_even = pm.Deterministic("period_even", tt.exp(logP_even))
                m_star_even = BoundedNormal("m_star_even", mu=self.soln['m_star_even'], sd=M_star_huang[1])
                r_star_even = BoundedNormal("r_star_even", mu=self.soln['r_star_even'], sd=R_star_huang[1])
                b_even = pm.Uniform("b_even", lower=0, upper=0.9, testval=self.soln['b_even'])
                BoundedNormal_logr = pm.Bound(pm.Normal, lower=-5, upper=0)
                logr_even = BoundedNormal_logr('logr_even', mu=self.soln['logr_even'], sd=1.0)
                r_pl_even = pm.Deterministic("r_pl_even", tt.exp(logr_even))
                ror_even = pm.Deterministic("ror_even", r_pl_even / r_star_even)
                BoundedBeta = pm.Bound(pm.Beta, lower=0, upper=1-1e-5)
                ecc_even = BoundedBeta("ecc_even", alpha=0.867, beta=3.03, testval=self.soln['ecc_even'])
                omega_even = xo.distributions.Angle("omega_even")

                logP_odd = pm.Normal("logP_odd", mu=self.soln['logP_odd'], sd=1)
                t0_odd = pm.Normal("t0_odd", mu=self.soln['t0_odd'], sd=1)
                period_odd = pm.Deterministic("period_odd", tt.exp(logP_odd))
                m_star_odd = BoundedNormal("m_star_odd", mu=self.soln['m_star_odd'], sd=M_star_huang[1])
                r_star_odd = BoundedNormal("r_star_odd", mu=self.soln['r_star_odd'], sd=R_star_huang[1])
                b_odd = pm.Uniform("b_odd", lower=0, upper=0.9, testval=self.soln['b_odd'])
                logr_odd = BoundedNormal_logr('logr_odd', mu=self.soln['logr_odd'], sd=1.0)
                r_pl_odd = pm.Deterministic("r_pl_odd", tt.exp(logr_odd))
                ror_odd = pm.Deterministic("ror_odd", r_pl_odd / r_star_odd)
                ecc_odd = BoundedBeta("ecc_odd", alpha=0.867, beta=3.03, testval=self.soln['ecc_odd'])
                omega_odd = xo.distributions.Angle("omega_odd")

            # The parameters of the RotationTerm kernel
            logamp = pm.Normal("logamp", mu=self.soln['logamp'], sd=5.0)
            logrotperiod = pm.Normal("logrotperiod", mu=self.soln['logrotperiod'], sd=5.0)
            logQ0 = pm.Normal("logQ0", mu=self.soln['logQ0'], sd=10.0)
            logdeltaQ = pm.Normal("logdeltaQ", mu=self.soln['logdeltaQ'], sd=10.0)
            mix = pm.Uniform("mix", lower=0, upper=1.0, testval=self.soln['mix'])

            # Transit jitter & GP parameters
            logs2 = pm.Normal("logs2", mu=self.soln['logs2'], sd=5.0)

            # Track the rotation period as a deterministic
            rotperiod = pm.Deterministic("rotation_period", tt.exp(logrotperiod))

            # GP model for the light curve
            kernel = xo.gp.terms.RotationTerm(log_amp=logamp, period=rotperiod, log_Q0=logQ0, log_deltaQ=logdeltaQ, mix=mix)
            gp = xo.gp.GP(kernel, self.time[self.mask], ((self.flux_err[self.mask])**2 + tt.exp(logs2)), J=4)


            if self.do_even_odd == False:
                # Orbit model
                orbit = xo.orbits.KeplerianOrbit(r_star=r_star, m_star=m_star, period=period, t0=t0, b=b, ecc=ecc, omega=omega)
                light_curves = xo.StarryLightCurve(u_star).get_light_curve(orbit=orbit, r=r_pl, t=self.time[self.mask], texp=0.021)

                light_curve = pm.math.sum(light_curves, axis=-1)
                pm.Deterministic("light_curves", light_curves)

                # Compute the Gaussian Process likelihood and add it into the
                # the PyMC3 model as a "potential"
                pm.Potential("loglike", gp.log_likelihood(self.flux[self.mask] - mean - light_curve))

                # Compute the mean model prediction for plotting purposes
                pm.Deterministic("pred", gp.predict())
                pm.Deterministic("loglikelihood", gp.log_likelihood(self.flux[self.mask] - mean - light_curve))


            else:
                orbit_even = xo.orbits.KeplerianOrbit(r_star=r_star_even, m_star=m_star_even, period=period_even, t0=t0_even, b=b_even, ecc=ecc_even, omega=omega_even)

                orbit_odd = xo.orbits.KeplerianOrbit(r_star=r_star_odd, m_star=m_star_odd, period=period_odd, t0=t0_odd, b=b_odd, ecc=ecc_odd, omega=omega_odd)

                light_curves_even = xo.StarryLightCurve(u_star).get_light_curve(orbit=orbit_even, r=r_pl_even, t=self.time[self.mask], texp=0.021)
                light_curves_odd = xo.StarryLightCurve(u_star).get_light_curve(orbit=orbit_odd, r=r_pl_odd, t=self.time[self.mask], texp=0.021)

                light_curve_even = pm.math.sum(light_curves_even, axis=-1)
                light_curve_odd = pm.math.sum(light_curves_odd, axis=-1)

                pm.Deterministic("light_curves_even", light_curves_even)
                pm.Deterministic("light_curves_odd", light_curves_odd)

                # Compute the Gaussian Process likelihood and add it into the
                # the PyMC3 model as a "potential"
                pm.Potential("loglike", gp.log_likelihood(self.flux[self.mask] - mean - (light_curve_even + light_curve_odd)))

                # Compute the mean model prediction for plotting purposes
                pm.Deterministic("pred", gp.predict())
                pm.Deterministic("loglikelihood", gp.log_likelihood(self.flux[self.mask] - mean - (light_curve_even + light_curve_odd)))
예제 #29
0
# This reparameterizations is implemented in *exoplanet* as custom *PyMC3* distribution :class:`exoplanet.distributions.QuadLimbDark`.

# +
import pymc3 as pm

with pm.Model() as model:

    # The baseline flux
    mean = pm.Normal("mean", mu=0.0, sd=1.0)

    # The time of a reference transit for each planet
    t0 = pm.Normal("t0", mu=t0s, sd=1.0, shape=2)

    # The log period; also tracking the period itself
    logP = pm.Normal("logP", mu=np.log(periods), sd=0.1, shape=2)
    period = pm.Deterministic("period", pm.math.exp(logP))

    # The Kipping (2013) parameterization for quadratic limb darkening paramters
    u = xo.distributions.QuadLimbDark("u", testval=np.array([0.3, 0.2]))

    r = pm.Uniform("r",
                   lower=0.01,
                   upper=0.1,
                   shape=2,
                   testval=np.array([0.04, 0.06]))
    b = xo.distributions.ImpactParameter("b",
                                         ror=r,
                                         shape=2,
                                         testval=np.random.rand(2))

    # Set up a Keplerian orbit for the planets
예제 #30
0
Bx = basis_funcs(elec_year)  # 表示在取值为x时的插值函数值
# shared:符号变量(symbolic variable),a之所以叫shared variable是因为a的赋值在不同的函数中都是一致的搜索,即a是被shared的
Bx_ = shared(Bx)

# #样条模型
with pm.Model() as partial_model:
    # define priors
    sigma = pm.HalfCauchy('sigma', 5)

    σ_a = pm.HalfCauchy('σ_a', 5.)
    a0 = pm.Normal('a0', 0., 10.)
    Δ_a = pm.Normal('Δ_a', 0., 10., shape=Num_5)
    δ_1 = pm.Gamma('δ_1', alpha=5, beta=1)
    δ = pm.Normal('δ', 0, sd=(δ_1 * δ_1))
    # δ = pm.Normal('δ', 0, sd=100) # 若模型收敛差则δ改用这个语句
    theta1 = pm.Deterministic('theta1', a0 + (σ_a * Δ_a).cumsum())
    # theta1 = a0 + (σ_a * Δ_a).cumsum()

    theta = Bx_.dot(theta1) + δ
    Observed = pm.Normal('Observed',
                         mu=theta,
                         sd=sigma,
                         observed=elec_faults_miss)  # 观测值

    start = pm.find_MAP()
    # step = pm.Metropolis()
    # trace2 = pm.sample(nuts_kwargs={'target_accept': 0.95})
    trace2 = pm.sample(3000, tune=1000, start=start)
chain2 = trace2
varnames1 = ['σ_a', 'a0', 'Δ_a', 'δ', 'theta1']
pm.traceplot(chain2, varnames1)