예제 #1
0
def build_model(historic_prices: pd.DataFrame, base_days: int, vault: Vault, config: Config) -> pd.DataFrame:
    pct_changes = historic_prices.price.pct_change().dropna().values[-base_days * 24:]
    with pm.Model() as model:
        mu = pm.Normal("mu", mu=0, sigma=0.1)
        sd = pm.HalfNormal("sd", sigma=0.1)
        obs = pm.Normal("obs", mu=mu, sigma=sd, observed=pct_changes)
        trace = pm.sample(5000, cores=config.SAMPLING_CORES, tune=5000)
    mus = np.random.choice(trace.get_values('mu'), size=config.N_POSTERIOR_SAMPLES, replace=True)
    sds = np.random.choice(trace.get_values('sd'), size=config.N_POSTERIOR_SAMPLES, replace=True)
    posterior_samples = np.random.normal(mus, sds, size=(config.MODEL_HORIZON_DAYS*24, config.N_POSTERIOR_SAMPLES))
    posterior_samples = np.transpose(posterior_samples)
    posterior_growths = np.cumsum(posterior_samples, axis=1)
    latest_known_price = historic_prices.loc[historic_prices.index.max()].price
    price_projections = to_float((1 + posterior_growths) * latest_known_price, vault.decimals)
    hpd_95 = pm.hpd(price_projections, hdi_prob=0.95)
    hpd_50 = pm.hpd(price_projections, hdi_prob=0.5)
    model = pd.DataFrame.from_dict(
        {
            'hpd_95_lower': hpd_95[:,0],
            'hpd_95_upper': hpd_95[:,1],
            'hpd_50_lower': hpd_50[:,0],
            'hpd_50_upper': hpd_50[:,1]
        }
    )
    index = pd.DatetimeIndex(pd.date_range(historic_prices.index.max() + pd.DateOffset(hours=1), periods=config.MODEL_HORIZON_DAYS*24, freq='H'))
    model = model.set_index(index).resample('1D').nearest()
    model['vault'] = vault.name
    model['base_days'] = base_days
    return model
예제 #2
0
def generate_ebbinghaus_data_figure_3():
    fig, (ax1, ax2) = plt.subplots(ncols=1, nrows=2)
    fig.set_size_inches(5.5, 4.5)
    # plot 1
    ax1.plot(delay, savings, marker='o', linestyle='--')
    ax1.plot(delay, np.median(np.exp(mu), axis=0), color='red', linestyle='-')
    ax1.set_title('c. Log-log plot (blue) and power law model estimates (red)')
    ax1.set_xlabel('Delay (log hours)')
    ax1.set_xscale('log', basex=10)
    ax1.set_ylabel('Savings (log \\%)')
    ax1.set_yscale('log', basey=10)
    ax1.grid(b=True, which='minor', color='w', linewidth=1.0)
    # plot 2
    yerr = [
        np.median(np.exp(mu), axis=0) - pm.hpd(np.exp(mu))[:, 0],
        pm.hpd(np.exp(mu))[:, 1] - np.median(np.exp(mu), axis=0)
    ]
    ax2.errorbar(savings,
                 np.median(np.exp(mu), axis=0),
                 yerr=yerr,
                 marker='o',
                 linestyle='')
    ax2.plot(np.linspace(0, 100, 10),
             np.linspace(0, 100, 10),
             color='red',
             linestyle=':')
    ax2.set_title('Power law model: Observed vs. predicted savings')
    ax2.set_xlabel('Observed savings (\\%)')
    ax2.set_ylabel('Predicted savings (\\%)')
    ax2.grid(b=True, which='minor', color='w', linewidth=1.0)
    # clean up and save
    plt.tight_layout(pad=0.5, w_pad=0.2, h_pad=0.7)
    plt.savefig('./figures/ebbinghaus_data_3.eps')
    plt.savefig('./figures/ebbinghaus_data_3.png')
    plt.savefig('./figures/ebbinghaus_data_3.pdf')
def generate_log_freq_figure():
    fig, (ax1, ax2) = plt.subplots(ncols=1, nrows=2)
    fig.set_size_inches(5.5, 5.5)
    # plot 1
    ax1.plot(freq, rt, marker='o', linestyle='')
    ax1.plot(freq, mu.mean(axis=0), color='red', linestyle='-')
    ax1.set_title('Observed (blue) \& predicted (red) RTs\
                   against log frequency')
    ax1.set_xlabel('Log frequency (log of \# tokens/1 million words)')
    ax1.set_xscale('log', basex=10)
    ax1.set_ylabel('RTs (s)')
    ax1.grid(b=True, which='minor', color='w', linewidth=1.0)
    # plot 2
    yerr=[mu.mean(axis=0)-pm.hpd(mu)[:,0],\
          pm.hpd(mu)[:,1]-mu.mean(axis=0)]
    ax2.errorbar(rt, mu.mean(axis=0), yerr=yerr,\
                 marker='o', linestyle='')
    ax2.plot(np.linspace(0.5, 0.7, 10),
             np.linspace(0.5, 0.7, 10),
             color='red',
             linestyle=':')
    ax2.set_title('Log frequency model: Observed vs. predicted RTs')
    ax2.set_xlabel('Observed RTs (s)')
    ax2.set_ylabel('Predicted RTs (s)')
    ax2.grid(b=True, which='minor', color='w', linewidth=1.0)
    # clean up and save
    plt.tight_layout(pad=0.5, w_pad=0.2, h_pad=1.9)
    plt.savefig('./figures/log_freq_model_figure.eps')
    plt.savefig('./figures/log_freq_model_figure.png')
    plt.savefig('./figures/log_freq_model_figure.pdf')
예제 #4
0
def plot_fits_w_estimates(y_obs, ppc, ax=None, legend=False):
    """ Plot Fits with Uncertainty Estimates"""
    iy = np.argsort(y_obs)
    ix = np.arange(iy.size)
    lik_mean = ppc.mean(axis=0)
    lik_hpd = pm.hpd(ppc)
    lik_hpd_05 = pm.hpd(ppc, alpha=0.5)
    r2 = r2_score(y_obs, lik_mean)
    mae = mean_absolute_error(y_obs, lik_mean)
    if ax is None:
        _, ax = pl.subplots(figsize=(12, 8))
    ax.scatter(ix,
               y_obs.values[iy],
               label='observed',
               edgecolor='k',
               s=40,
               color='w',
               marker='d',
               zorder=2)
    ax.scatter(ix,
               lik_mean[iy],
               label='model mean -- $r^2$=%.2f -- mae=%.2f' % (r2, mae),
               edgecolor='k',
               s=40,
               color='w',
               zorder=3)

    ax.fill_between(ix,
                    y1=lik_hpd_05[iy, 0],
                    y2=lik_hpd_05[iy, 1],
                    color='gray',
                    label='model output 50%CI',
                    zorder=1,
                    linestyle='-',
                    lw=2,
                    edgecolor='k')
    ax.fill_between(
        ix,
        y1=lik_hpd[iy, 0],
        y2=lik_hpd[iy, 1],
        color='k',
        alpha=0.75,
        label='model output 95%CI',
        zorder=0,
    )
    if legend:
        ax.legend(loc='upper left')
    return ax
예제 #5
0
def mcmc_stats(runs, burnin, prob, batch):
    """
        入力
        runs:   モンテカルロ標本
        burnin: バーンインの回数
        prob:   区間確率 (0 < prob < 1)
        batch:  乱数系列の分割数
        出力
        事後統計量のデータフレーム
    """
    traces = runs[burnin:, :]
    n = traces.shape[0] // batch
    k = traces.shape[1]
    alpha = 100 * (1.0 - prob)
    post_mean = np.mean(traces, axis=0)
    post_median = np.median(traces, axis=0)
    post_sd = np.std(traces, axis=0)
    mc_err = pm.mc_error(traces, batches=batch)
    ci_lower = np.percentile(traces, 0.5 * alpha, axis=0)
    ci_upper = np.percentile(traces, 100 - 0.5 * alpha, axis=0)
    hpdi = pm.hpd(traces, 1.0 - prob)
    rhat = [pm.gelman_rubin(traces[:, i].reshape((n, batch), order='F')) \
            for i in range(k)]
    stats = np.vstack((post_mean, post_median, post_sd, mc_err,
                       ci_lower, ci_upper, hpdi.T, rhat)).T
    stats_string = ['平均', '中央値', '標準偏差', '近似誤差', 
                    '信用区間(下限)', '信用区間(上限)', 
                    'HPDI(下限)', 'HPDI(上限)', '$\\hat R$']
    param_string = ['平均 $\\mu$', '分散 $\\sigma^2$']
    return pd.DataFrame(stats, index=param_string, columns=stats_string)
예제 #6
0
    def plot_model_ppc_stats(self,
                             ppc,
                             y_obs,
                             alpha_level1=0.05,
                             alpha_level2=0.5,
                             ax=None):
        if ax is None:
            _, ax = pl.subplots()
        iy = np.argsort(y_obs)
        ix = np.arange(iy.size)
        ppc_mean = ppc.mean(axis=0)
        ax.scatter(ix,
                   y_obs.values[iy],
                   label='observed',
                   edgecolor='k',
                   s=50,
                   color='steelblue')
        ax.scatter(ix,
                   ppc_mean[iy],
                   label='prediction mean',
                   edgecolor='k',
                   s=50,
                   color='red')

        if alpha_level2:
            lik_hpd_2 = pm.hpd(ppc, alpha=alpha_level2)
            ax.fill_between(
                ix,
                y1=lik_hpd_2[iy, 0],
                y2=lik_hpd_2[iy, 1],
                alpha=0.5,
                color='k',
                label=f'prediction {1-alpha_level2:.2f}%CI',
            )
        if alpha_level1:
            lik_hpd_1 = pm.hpd(ppc, alpha=alpha_level1)
            ax.fill_between(
                ix,
                y1=lik_hpd_1[iy, 0],
                y2=lik_hpd_1[iy, 1],
                alpha=0.5,
                color='k',
                label=f'prediction {1-alpha_level1:.2f}%CI',
            )
        ax.legend(loc='best')
        return ax
예제 #7
0
def compare_parameters_hierarchical(model,
                           parameters=['v', 'gamma', 's', 'tau'],
                           comparisons=None):
    """Compute comparisons of group level parameters between groups / conditions.
    
    Args:
        model (glambox.GLAM): Fitted GLAM instance
        parameters (list, optional): List of parameters. Defaults to ['v', 'gamma', 's', 'tau'].
        comparisons (list of tuples, optional): List of comparisons to perform. Must be a list of tuples, e.g., `[('A', 'B'), ('A', 'C')]`. Defaults to None.
    
    Returns:
        pandas.DataFrame: Distribution statistics of group level parameter differences.
    """

    if comparisons is None:
        comparisons = []
    n_params = len(parameters)
    n_comps = len(comparisons)

    comparison_df = []

    for p, parameter in enumerate(parameters):

        # Comparisons
        for c, comparison in enumerate(comparisons):
            comparison_string = '{}-{}'.format(*comparison)
            df_pc = pd.DataFrame(dict(parameter=parameter, comparison=comparison_string),
                                      index=[0])
            # Check if parameter has dependence
            if model.design[parameter]['dependence'] is not None:
                # Then, if both conditions are present, compute posterior of the difference
                c0_present = (comparison[0] in model.design[parameter]['conditions'])
                c1_present = (comparison[1] in model.design[parameter]['conditions'])
                if c0_present & c1_present:
                    difference = (
                        model.trace[0].get_values(parameter + '_' +
                                                  comparison[0] + '_mu') -
                        model.trace[0].get_values(parameter + '_' +
                                                  comparison[1] + '_mu'))
                    
                    hpd_lower, hpd_upper = hpd(difference, alpha=0.05)
                    df_pc['hpd_2.5'] = hpd_lower
                    df_pc['hpd_97.5'] = hpd_upper
                    df_pc['mean'] = np.mean(difference)
                    df_pc['p>0'] = np.mean(difference > 0)
                else:
                    # Otherwise, state that at least one condition is not present.
                    df_pc['warning'] = 'At least one condition is missing.'
            else:
                # Or that the parameter has no dependencies.
                df_pc['warning'] = 'Parameter has no dependencies.'
            
            comparison_df.append(df_pc)
    
    comparison_df = pd.concat(comparison_df, sort=False).reset_index(drop=True)

    return comparison_df
def getAngelRate(data, n_sample=10000, n_chain=3, ax=None):
    # データの整理
    data_0 = data.query('campaign != 1')
    data_1 = data.query('campaign == 1')
    d = np.array([[
        sum(data_0['angel'] == 0),
        sum(data_0['angel'] == 1),
        sum(data_0['angel'] == 2)
    ],
                  [
                      sum(data_1['angel'] == 0),
                      sum(data_1['angel'] == 1),
                      sum(data_1['angel'] == 2)
                  ]])
    weight = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 2.0]])
    # パラメータ推定
    with pm.Model() as model:
        alpha = [1., 1., 1.]  # hyper-parameter of DirichletDist.
        pi = pm.Dirichlet('pi', a=np.array(alpha))
        for i in np.arange(d.shape[0]):
            piw = pi * weight[i]
            m = pm.Multinomial('m_%s' % (i),
                               n=np.sum(d[i]),
                               p=piw,
                               observed=d[i])
        trace = pm.sample(n_sample, chains=n_chain)
    np.savetxt('trace_pi.csv', trace['pi'], delimiter=',')
    # Silver
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 1])
    print('Silver : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Silver ExpectedValue : {}'.format(trace['pi'][:, 1].mean()))
    # Gold
    hpd_l, hpd_u = pm.hpd(trace['pi'][:, 2])
    print('Gold : 95% HPD : {}-{}'.format(hpd_l, hpd_u))
    print('Gold ExpectedValue : {}'.format(trace['pi'][:, 2].mean()))
    # save fig
    if ax is not None:
        pm.plot_posterior(trace['pi'][:, 0], ax=ax[0])
        pm.plot_posterior(trace['pi'][:, 1], ax=ax[1])
        pm.plot_posterior(trace['pi'][:, 2], ax=ax[2])
        ax[0].set_title('Nothing')
        ax[1].set_title('SilverAngel')
        ax[2].set_title('GoldAngel')
    return trace
예제 #9
0
def compare_parameters_individual(model,
                                  parameters,
                                  comparisons=None):

    if comparisons is None:
        comparisons = []
    n_params = len(parameters)
    n_comps = len(comparisons)

    subjects = model.data['subject'].unique().astype(int)
    summaries = [summary(trace) for trace in model.trace]

    comparison_df = []
    
    for p, parameter in enumerate(parameters):

        # Comparisons
        for c, comparison in enumerate(comparisons):
            comparison_string = '{}-{}'.format(*comparison)
            df_pc = pd.DataFrame(dict(subject=subjects, parameter=parameter, comparison=comparison_string),
                                 index=subjects)

            # Check if parameter has dependence
            if model.design[parameter]['dependence'] is not None:
                # Then, if both conditions are present, plot posterior of the difference
                c0_present = (
                    comparison[0] in model.design[parameter]['conditions'])
                c1_present = (
                    comparison[1] in model.design[parameter]['conditions'])
                if c0_present & c1_present:
                    differences = np.array([(model.trace[i].get_values(parameter + '_' + comparison[0]) -
                                             model.trace[i].get_values(parameter + '_' + comparison[1]))
                                            for i in subjects])[:, :, 0, 0]

                    means = np.mean(differences, axis=1)
                    hpdlower, hpdupper = hpd(differences.T, alpha=0.05).T
                    plarger0 = np.mean(differences > 0, axis=1)
                    df_pc['mean'] = means
                    df_pc['hpd_2.5'] = hpdlower
                    df_pc['hpd_97.5'] = hpdupper
                    df_pc['p>0'] = plarger0

                else:
                    # Otherwise, state that at least one condition is not present.
                    df_pc['warning'] = 'At least one condition is missing.'
            else:
                # Or that the parameter has no dependencies.
                df_pc['warning'] = 'Parameter has no dependencies.'

            comparison_df.append(df_pc)
    
    comparison_df = pd.concat(comparison_df, sort=False).sort_values('subject').reset_index(drop=True)

    return comparison_df
def visualize_posteriors(samples, param_names):
    '''
    Function visualizes the approximate posteriors for all parameters based on the generated samples.
    Returns posterior means, precision and HDI for all parameters.
    '''
    fig, axes = plt.subplots(1,samples.shape[-1], figsize=(15,6))
    hdi = []; means = []
    for c, vals in enumerate(samples.reshape(-1,samples.shape[-1]).T):
        mean = np.round(np.mean(vals),3); hdi_low, hdi_high = np.round(pymc3.hpd(vals),3)  # calc mean and HDI of params
        means.append(mean); hdi.append([hdi_low, hdi_high])
        axes[c].axvline(x=hdi_low, c='r'); axes[c].axvline(x=hdi_high, c='r')
        sns.distplot(vals, bins=50, ax=axes[c])  # create histogramms for params with approx. density plots 
        axes[c].set_title("{0} mean: {1} \n HDI_low: {2} - HDI_high: {3}".format(param_names[c],np.round(mean, 3),
                                                                                 np.round(hdi_low,3), np.round(hdi_high,3)))
    fig.suptitle('Posterior Distributions', fontsize=20)   
    fig.tight_layout(rect=[0, 0.03, 1, 0.90])
    return np.asarray(means), np.asarray(hdi)
예제 #11
0
def plot_counterfactual(data,
                        trace,
                        variables,
                        parameters,
                        intercept='a',
                        hpdi=0.10,
                        xlab='',
                        ylab=''):
    """
    x = predictor of interest
    intercept = The string used to denote the intercept (i.e., alpha, or beta_0). Defaults to 'a'
    variables and parameters are lists that must be in the same order (corresponding data column + coefficient). 
    The first value in each list should be the predictor of interest. Do not include the intercept term.
    """
    # Calculate value of x term (predictor of interest)
    x = data[variables[0]]
    x_coef = parameters[0]
    x_space = np.linspace(x.min(), x.max(), 50)
    x_value = trace[x_coef] * x_space[:, None]

    # Calculate value of other variables, holding them to the mean value.
    controls = np.array(data[variables[1]].mean())
    for item in variables[2:]:
        controls = np.hstack((controls, [data[item].mean()]))

    control_coefficients = []
    for item in parameters[1:]:
        control_coefficients.append(trace[item])

    control_values = np.dot(controls, control_coefficients)

    # Calculate the predicted mean.
    mu_predicted = trace[intercept] + x_value + control_values

    mu_hpd = pm.hpd(mu_predicted.T, alpha=hpdi)

    plt.plot(x_space, mu_predicted.mean(1), 'k')
    plt.plot(x_space, mu_hpd[:, 0], 'k--')
    plt.plot(x_space, mu_hpd[:, 1], 'k--')

    plt.xlabel(xlab)
    plt.ylabel(ylab)
예제 #12
0
 def plot_model_fits2(self,
                      y_obs,
                      y_pred=None,
                      title=None,
                      ax=None,
                      ci=0.95):
     if y_pred is None:
         y_pred = self.trace_.get_values('mu')
     y_obs = y_obs.values
     mask = np.logical_not(np.isnan(y_obs))
     y_obs = y_obs[mask]
     y_pred_mean = np.mean(y_pred, axis=0)[mask]
     y_pred_hpd = pm.hpd(y_pred, alpha=1 - ci)[mask]
     xi = np.arange(y_obs.size)
     iy = np.argsort(y_obs)
     if ax is None:
         _, ax = pl.subplots(figsize=(12, 8), )
     ax.set_title(title)
     ax.plot(xi,
             y_obs[iy],
             marker='.',
             ls='',
             markeredgecolor='darkblue',
             markersize=13,
             label='observed')
     ax.plot(xi,
             y_pred_mean[iy],
             marker='o',
             color='indigo',
             ls='',
             markeredgecolor='k',
             alpha=0.5,
             label='predicted avg.')
     ax.fill_between(xi,
                     y_pred_hpd[iy, 0],
                     y_pred_hpd[iy, 1],
                     color='k',
                     alpha=0.5,
                     label=f'{ci*100}%CI on pred.')
     ax.legend(loc='best')
     return ax
예제 #13
0
    def _summarize_one_variable(ppc_samples, variable):
        """
        Provide mean and hpd summaries of given variable.

        :param ppc_samples: pymc3 ppc samples
        :param variable: key of dict ppc_samples
        :return: DataFrame, (variable_mean, variable_hpd_lower, variable_hpd_upper), n rows = n columns in ppcs_samples[variable]
        which should correspond to number of input data points
        """
        # row = sample, column = original data point
        sample_array = ppc_samples[variable]
        hpds = pm.hpd(sample_array, alpha=.3)

        d = dict()
        # collect all samples into one field per input row
        # more elegant way?
        d[f'{variable}_samples'] = list(sample_array.T)
        d[f'{variable}_hpd_lower'] = hpds[:, 0]
        d[f'{variable}_hpd_upper'] = hpds[:, 1]
        d[f'{variable}_mean'] = np.mean(sample_array, axis=0)

        return (pd.DataFrame(d))
예제 #14
0
파일: model.py 프로젝트: pindash/best
    def hdi(self, var_name: str, credible_mass: float = 0.95):
        """Calculate the highest posterior density interval (HDI)

        This function calculates a *credible interval* which contains the
        ``credible_mass`` most likely values of the parameter, given the data.
        Also known as an HPD interval.

        Parameters
        ----------
        var_name : str
            Name of variable.
        credible_mass : float
            The HDI will cover credible_mass * 100% of the probability mass.
            Default: 0.95, i.e. a 95% HDI.

        Returns
        -------
        (float, float)
            The endpoints of the HPD
        """
        check_credible_mass(credible_mass)

        return tuple(pm.hpd(self.trace[var_name], alpha=(1 - credible_mass)))
예제 #15
0
def plot_regression_line(x, y, mu, hdpi=0.1, xlab='', ylab=''):
    '''
    x: The predictor variable
    y: The  response variable
    mu: The mu value from the PyMC3 model trace. (eg., trace['mu'])
    hdpi: The alpha value for the HDPI. 0.1 corresponds to the 90% interval
    
    Plots a scatter plot of the data and then the regression line with the HDPI interval shaded.
    '''
    mu_hpd = pm.hpd(mu, alpha=hdpi)

    plt.scatter(x, y, alpha=0.5)
    plt.plot(x, mu.mean(0), 'C2')  # MAP line (column-wise mean of mu)

    # HPDI fill-in
    index = np.argsort(x)
    plt.fill_between(x[index],
                     mu_hpd[:, 0][index],
                     mu_hpd[:, 1][index],
                     color='C2',
                     alpha=0.25)
    plt.xlabel(str(xlab))
    plt.ylabel(str(ylab))
예제 #16
0
파일: result.py 프로젝트: ncjllld/byase
    def stats(self) -> pd.DataFrame:
        """Stats task result."""
        import pymc3 as pm

        idxes = [col for col in self.trace.columns if not col.startswith('RAW')]

        means = []
        sds = []
        hpd_2_5s = []
        hpd_97_5s = []

        cols = ['mean', 'sd', 'hpd_2.5', 'hpd_97.5']
        data = [means, sds, hpd_2_5s, hpd_97_5s]

        for var_name in idxes:
            d_i = self.trace[var_name]
            means.append(np.mean(d_i))
            sds.append(np.std(d_i))
            hpd_2_5, hpd_97_5 = pm.hpd(d_i)
            hpd_2_5s.append(hpd_2_5)
            hpd_97_5s.append(hpd_97_5)

        df = pd.DataFrame({col: data[i] for i, col in enumerate(cols)}, index=idxes, columns=cols)
        return df
예제 #17
0
    # out_pai = pm.Deterministic('out_pai', mu + beta * dag_time + gamma1 * z1 + gamma2 * z2 + gamma3 * z3 + gamma4 * z4)

    Observed = pm.Binomial("Observed", dag_sum, out_pai,
                           observed=dag_fault)  # 观测值

    # start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(10000, step=step)
chain = trace
# logistic(chain, locals())
varnames = ['gamma1', 'mu', 'beta', 'out_pai']
varnames1 = ['out_pai']
pm.traceplot(chain, varnames)
plt.show()

sig0 = pm.hpd(trace['out_pai'], alpha=0.6)[0]
sig = pm.hpd(trace['out_pai'], alpha=0.6)[1]
sig1 = pm.hpd(trace['out_pai'], alpha=0.6)[2]
sig2 = pm.hpd(trace['out_pai'], alpha=0.6)[3]
sig3 = pm.hpd(trace['out_pai'], alpha=0.6)[4]
sig4 = pm.hpd(trace['out_pai'], alpha=0.6)[5]

plt.figure()
ax = sns.distplot(sig0)
ax = sns.distplot(sig1)
ax = sns.distplot(sig2)
ax = sns.distplot(sig3)
ax = sns.distplot(sig4)
ax = sns.distplot(sig)
plt.show()
예제 #18
0
# Make a summary dataframe
# Because this is pretty gaussian, median is pretty much the same
# as the mean, although we might as well compute it since we have the distribution
# one standard deviation covers 68% of the distribution, might be better to
# keep 95% HPD

# get strain names first
strains = [*df_mcmc.columns]
# dataframe to store summary statistics
df_summary = pd.DataFrame(index=['median', '_hpd', 'hpd_', 'mean','std'],
        columns=strains)
for strain in strains:
    # median
    df_summary.loc['median', strain] = np.median(df_mcmc[strain])
    # 95% highest posterior density
    df_summary.loc[['_hpd', 'hpd_'], strain] = pm.hpd(df_mcmc[strain], alpha=0.05)
    # mean
    df_summary.loc['mean', strain] = np.mean(df_mcmc[strain])
    # standard deviation
    df_summary.loc['std', strain] = np.std(df_mcmc[strain])

# plot sample histograms
for strain in strains:
     plt.hist(df_mcmc[strain], bins=100, normed=True, histtype='step', linewidth=2)
plt.xlabel('prob. of dauer, $p$')
plt.ylabel(r'$P(p\mid d, n)$')
plt.legend(strains, loc='upper center');
sns.despine()
plt.tight_layout()
#plt.savefig('./output/probdistrib_dauer.pdf', transparent=True, bbox_inches='tight')
plt.close('all')
예제 #19
0
    trace_0 = pm.sample(5000)

chain_0 = trace_0[1000:]
varnames = ['alpha', 'beta', 'bd']
pm.traceplot(chain_0, varnames)
plt.savefig('img505.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()

pm.summary(chain_0, varnames)

theta = chain_0['theta'].mean(axis=0)
idx = np.argsort(x_0)
plt.plot(x_0[idx], theta[idx], color='b', lw=3)
plt.axvline(chain_0['bd'].mean(), ymax=1, color='r')
bd_hpd = pm.hpd(chain_0['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='r', alpha=0.5)

plt.plot(x_0, y_0, 'o', color='k')
theta_hpd = pm.hpd(chain_0['theta'])[idx]
plt.fill_between(x_0[idx],
                 theta_hpd[:, 0],
                 theta_hpd[:, 1],
                 color='b',
                 alpha=0.5)

plt.xlabel(x_n, fontsize=16)
plt.ylabel(r'$\theta$', rotation=0, fontsize=16)
plt.savefig('img506.png', dpi=300, figsize=(5.5, 5.5))

plt.figure()
예제 #20
0
plt.savefig('img408.png')

plt.clf()
plt.plot(x, alpha_m + beta_m * x, c='k', label='y ={:.2f} + {:.2f} * x'.format(alpha_m, beta_m))
idx = np.argsort(x)
x_ord = x[idx]
sig = pm.hpd(trace_n['mu'], alpha=0.02)[idx]
plt.fill_between(x_ord, sig[:, 0], sig[:, 1], color='gray')
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.savefig('img409.png')

"""

ppc = pm.sample_ppc(trace_n, samples=100000, model=model)

idx = np.argsort(x)
x_ord = x[idx]
#plt.plot(x, y, 'b.')
plt.plot(x,
         alpha_m + beta_m * x,
         c='k',
         label='y = {:.2f} + {:.2f} * x'.format(alpha_m, beta_m))
sig0 = pm.hpd(ppc['y_pred'], alpha=0.5)[idx]
sig1 = pm.hpd(ppc['y_pred'], alpha=0.05)[idx]
plt.fill_between(x_ord, sig0[:, 0], sig0[:, 1], color='gray', alpha=1)
plt.fill_between(x_ord, sig1[:, 0], sig1[:, 1], color='gray', alpha=0.5)
plt.xlabel('$x$', fontsize=16)
plt.ylabel('$y$', fontsize=16, rotation=0)
plt.savefig('img410.png')
def hpd_range(x):
    hpd = pm.hpd(x)
    return hpd[1] - hpd[0]
예제 #22
0
        num_already_found = np.mean(
            [num_true_hits - i.shape[0] for i in normalized_ranks_holder[idx]])

        num_remaining = n_hits_pulled - num_already_found
        num_needed = desired_num_hits - num_already_found
        fraction_required = num_needed / num_remaining

        #expected performance on undocked ligands:
        trace = estimate_holder[idx][0]
        mu = trace['mu']
        nu = trace['nu']
        sig = trace['sig']
        samples = t(nu, mu, sig).ppf(fraction_required)

        #this is the fraction of remaining ligands we need to dock to reach the goal.
        hpd = expit(pm.hpd(samples))

        time_hpd = hpd * (n_ligands_to_pull - size) + size
        time_days = time_hpd / 60 / 60 / 24
        time_mean = expit(samples.mean()) * (n_ligands_to_pull - size) + size
        time_mean = time_mean / 60 / 60 / 24

        print(time_mean)

        df.loc[count] = [
            time_mean, time_hpd[0], time_hpd[1], size, desired_num_hits
        ]
        count += 1

import altair as alt
#now plot :)
예제 #23
0
    yl = pm.Bernoulli('yl', p=theta, observed=y_0)
    trace_0 = pm.sample(5000)

chain_0 = trace_0[1000:]
varnames = ['alpha', 'beta', 'bd']
pm.traceplot(chain_0, varnames)
plt.savefig('img505b.png')

pm.summary(trace_0, varnames)

#print(chain_0['theta'])
plt.figure()

theta = chain_0['theta'].mean(axis=0)
idx = np.argsort(x_0)
plt.plot(x_0[idx], theta[idx], color='b', lw=3)
plt.axvline(chain_0['bd'].mean(), ymax=1, color='r')
bd_hpd = pm.hpd(chain_0['bd'])
plt.fill_betweenx([0, 1], bd_hpd[0], bd_hpd[1], color='r', alpha=0.5)

plt.plot(x_0, y_0, 'o', color='k')
theta_hpd = pm.hpd(chain_0['theta'])[idx]
plt.fill_between(x_0[idx],
                 theta_hpd[:, 0],
                 theta_hpd[:, 1],
                 color='b',
                 alpha=0.5)
plt.xlabel(x_n, fontsize=16)
plt.ylabel(r'$\theta$', rotation=0, fontsize=16)
plt.savefig('img506b.png')
예제 #24
0
    alpha = pm.Normal(name='alpha', mu=mean_data, sd=std_data)
    beta = pm.Normal(name='beta', mu=0, sd=10, shape=4)
    sigma = pm.Uniform(name='sigma', lower=0, upper=std_data)
    mu = pm.Deterministic('mu', alpha + beta[0] * data_s + beta[1] * data_s2 + beta[2] * data_s3 + beta[3] * data_s4)
    ret = pm.Normal(name='returns', mu=mu, sd=sigma, observed=ror)
    trace_model = pm.sample(1000, tune=2000)

print(pm.summary(trace_model, ['alpha', 'beta', 'sigma']))

pm.traceplot(trace_model, varnames=['alpha', 'beta', 'sigma'])
plt.title('model parameters')
plt.show()

mu_pred = trace_model['mu']
idx = np.argsort(data_s)
mu_hpd = pm.hpd(mu_pred, alpha=0.11)[idx]
ret_pred = pm.sample_ppc(trace_model, 10000, model)
ret_pred_hpd = pm.hpd(ret_pred['returns'], alpha=0.11)[idx]

for r in ror:
    plt.plot(r)
plt.plot(ret_pred_hpd)
plt.show()

for r in ror:
    # plt.scatter(data_s, r, c='C0', alpha=0.3)
    plt.plot(data_s[idx], r, c='C0', alpha=0.3)
plt.fill_between(data_s[idx], mu_hpd[:, 0], mu_hpd[:, 1], color='C2', alpha=0.25)
plt.fill_between(data_s[idx], ret_pred_hpd[:, 0], ret_pred_hpd[:, 1], color='C2', alpha=0.25)
plt.show()
f.close()

# Save the trace to the output folder as a numpy array, for later reference
# Save every 10th sample from the trace, to avoid any autocorrelation issues
np.save("palatability_regression_trace.npy", tr[::10]["coeff_pal"])

# Convert the trace to a dataframe, and save that too
# Save every 10th sample from the trace, to avoid any autocorrelation issues
tr_df = pm.trace_to_dataframe(tr[::10])
tr_df.to_csv("palatability_regression_trace.csv")

# Plot the results of the palatability regression analysis
# First just plot the mean regression coefficients for every laser condition, across time
fig = plt.figure()
mean_coeff = np.mean(tr[::10]["coeff_pal"], axis = 0)
hpd_coeff = pm.hpd(tr[::10]["coeff_pal"], alpha = 0.05)
for condition in range(unique_lasers[0].shape[0]):
	plt.plot(x[analyze_indices], mean_coeff[:, condition], linewidth = 3.0, label = "Dur:{}ms, Lag:{}ms".format(unique_lasers[0][condition][0], unique_lasers[0][condition][1]))
plt.legend()
plt.xlabel("Time post taste delivery (ms)")
plt.ylabel("Mean posterior regression coefficient")
fig.savefig("palatability_regression_coefficients_mean.png", bbox_inches = "tight")
plt.close("all")
# Now plot the mean and SD of the regression coefficients for every laser condition, across time
fig = plt.figure()
for condition in range(unique_lasers[0].shape[0]):
	plt.plot(x[analyze_indices], np.mean(tr[::10]["coeff_pal"], axis = 0)[:, condition], linewidth = 3.0, label = "Dur:{}ms, Lag:{}ms".format(unique_lasers[0][condition][0], unique_lasers[0][condition][1]))
	plt.fill_between(x[analyze_indices], hpd_coeff[:, condition, 0], hpd_coeff[:, condition, 1], alpha = 0.5)
plt.legend()
plt.xlabel("Time post taste delivery (ms)")
plt.ylabel("Mean posterior regression coefficient")
예제 #26
0
from sklearn.model_selection import train_test_split
import os
import pandas as pd

data_file = 'btc.data.csv'

if not os.path.isfile(data_file):
    data = data_reader.get_data_yahoo('BTC-USD')
    data.to_csv(data_file)

data = pd.read_csv(data_file)

print(data.Open.head())
print(data.Open.tail())
print(data.Open.iloc[-1])
hpd = pm.hpd(data.Open, alpha=0.05)
print('[%f %f]' % (hpd[0], hpd[1]))

# _, (ax0, ax1) = plt.subplots(2, 1)
# sns.kdeplot(data.Open, ax=ax0)
# ax1.plot(data.Open)
# plt.show()

data = data.as_matrix(columns=['Open'])
print(data.shape)

standard_scaler = StandardScaler()
standard_scaler.fit(data)
d = standard_scaler.transform(data)
# d = standard_scaler.inverse_transform(d)
예제 #27
0
f.close()

# Save the trace to the output folder as a numpy array, for later reference
# Save every 10th sample from the trace, to avoid any autocorrelation issues
np.save("palatability_regression_trace.npy", tr[::10]["coeff_pal"])

# Convert the trace to a dataframe, and save that too
# Save every 10th sample from the trace, to avoid any autocorrelation issues
tr_df = pm.trace_to_dataframe(tr[::10])
tr_df.to_csv("palatability_regression_trace.csv")

# Plot the results of the palatability regression analysis
# First just plot the mean regression coefficients for every laser condition, across time
fig = plt.figure()
mean_coeff = np.mean(tr[::10]["coeff_pal"], axis=0)
hpd_coeff = pm.hpd(tr[::10]["coeff_pal"], alpha=0.05)
for condition in range(unique_lasers[0].shape[0]):
    plt.plot(x[analyze_indices],
             mean_coeff[:, condition],
             linewidth=3.0,
             label="Dur:{}ms, Lag:{}ms".format(unique_lasers[0][condition][0],
                                               unique_lasers[0][condition][1]))
plt.legend()
plt.xlabel("Time post taste delivery (ms)")
plt.ylabel("Mean posterior regression coefficient")
fig.savefig("palatability_regression_coefficients_mean.png",
            bbox_inches="tight")
plt.close("all")
# Now plot the mean and SD of the regression coefficients for every laser condition, across time
fig = plt.figure()
for condition in range(unique_lasers[0].shape[0]):
    start = pm.find_MAP()
    step = pm.NUTS(scaling=start)
    trace_1 = pm.sample(5000, step=step, start=start)

chain_1 = trace_1[100:]
varnames = ['alpha', 'beta']
pm.traceplot(chain_1)
plt.show()

idx = np.argsort(x_1[:, 0])
bd = chain_1['bd'].mean(0)[idx]
plt.scatter(x_1[:, 0], x_1[:, 1], c=y_1)
plt.plot(x_1[:, 0][idx], bd, color='r')

bd_hpd = pm.hpd(chain_1['bd'])[idx]
plt.fill_between(x_1[:, 0][idx],
                 bd_hpd[:, 0],
                 bd_hpd[:, 1],
                 color='r',
                 alpha=0.5)

plt.xlabel(x_n[0], fontsize=16)
plt.ylabel(x_n[1], fontsize=16)
plt.show()

corr = iris[iris['species'] != 'virginica'].corr()
mask = np.tri(*corr.shape).T
sns.heatmap(corr.abs(), mask=mask, annot=True)
plt.show()
예제 #29
0
    ax2.set_xlabel('Observed probabilities')
    ax2.set_ylabel('Predicted probabilities')
    ax2.grid(b=True, which='minor', color='w', linewidth=1.0)
    # clean up and save
    plt.tight_layout(pad=0.5, w_pad=0.2, h_pad=0.7)
    plt.savefig('../figures/lex_dec_model_pyactr_no_imaginal.eps')
    plt.savefig('../figures/lex_dec_model_pyactr_no_imaginal.png')
    plt.savefig('../figures/lex_dec_model_pyactr_no_imaginal.pdf')
    #plt.show()


generate_lex_dec_pyactr_no_imaginal_figure()

decay_posterior = trace["decay"]
decay_posterior.mean()
pm.hpd(decay_posterior)

threshold_posterior = trace["threshold"]
threshold_posterior.mean()
pm.hpd(threshold_posterior)

noise_posterior = trace["noise"]
noise_posterior.mean()
pm.hpd(noise_posterior)

latency_factor_posterior = trace["lf"]
latency_factor_posterior.mean()
pm.hpd(latency_factor_posterior)

latency_exponent_posterior = trace["le"]
latency_exponent_posterior.mean()
	# Save these findings in a table specific to this unit
	unit_table = hf5.create_table('/laser_effects_bayesian/unit_summaries', 'unit{:d}'.format(chosen_units[unit]), description = laser_effects)

	# Now run through the tastes and laser conditions
	for laser in range(diff.shape[0]):
		for taste in range(diff.shape[1]):	
			# Get a new row for this taste and laser condition		
			this_condition_results = unit_table.row

			# Fill in the taste and laser conditions
			this_condition_results['laser'] = laser + 1		
			this_condition_results['taste'] = taste + 1
		
		
			# First check if the control firing was close to zero for this taste/laser combo (comparing it to a sufficiently small number because the control firing rate is always > 0 by definition)
			if pm.hpd(bayesian_results[laser, taste, :, 0], alpha = sig_level)[0] <= 1e-4:
				this_condition_results['control_zero'] = 1.0
			else:
				this_condition_results['control_zero'] = 0.0
			
			# Then check if the laser condition has no effect on firing (the diff HPD will overlap zero)
			diff_hpd = pm.hpd(diff[laser, taste, :], alpha = sig_level)
			if diff_hpd[0] * diff_hpd[1] < 0:
				this_condition_results['unchanged'] = 1.0
				this_condition_results['enhanced'] = 0.0
				this_condition_results['suppressed'] = 0.0
			# Firing is enhanced if the diff (control-laser) lies consistently below zero
			elif diff_hpd[0] < 0 and diff_hpd[1] < 0:
				this_condition_results['unchanged'] = 0.0
				this_condition_results['enhanced'] = 1.0
				this_condition_results['suppressed'] = 0.0
예제 #31
0
org_beta111 = post_beta111 * faults_sd / year_std
org_beta00 = post_beta00 * faults_sd + faults_m - (
    post_beta111 * faults_sd * year_m / year_std) - (post_beta2 * faults_sd *
                                                     tem_m / tem_std)

#
beta_plot = chain2['beta'][:, 0]
beta1_plot = chain2['beta1'][:, 0]
beta2_plot = chain2['beta2']
# # 后验
plt.figure(figsize=(10, 10))
idx = np.argsort(elec_year)
x_ord = elec_year[idx]

ppc = pm.sample_ppc(chain2, samples=500, model=unpooled_model)
sig_y = pm.hpd(ppc['Observed'][0:42], alpha=0.05)[idx]
sig_y1 = pm.hpd(ppc['Observed'][42:91], alpha=0.05)[idx]
# plt.fill_between(x_ord, sig_y[:, 0], sig_y[:, 1], color='gray', alpha=0.4)
# plt.fill_between(x_ord, sig_y1[:, 0], sig_y1[:, 1], color='red', alpha=0.3)
#
# # sig_y0 = pm.hpd(ppc['Observed'][1], alpha=0.5)[idx]
# # sig_y11 = pm.hpd(ppc['Observed'][1], alpha=0.05)[idx]
# # plt.fill_between(x_ord, sig_y[:, 0], sig_y[:, 1], color='gray', alpha=1)
# # plt.fill_between(x_ord, sig_y1[:, 0], sig_y1[:, 1], color='gray', alpha=0.5)
# idd = range(0, len(chain2['beta2']), 100)

plt.figure(figsize=(5, 3), facecolor=(1, 1, 1))
ax = plt.subplot(1, 1, 1)
# j, k1 = 0, 6
# for jx in range(7):
#     k1 = 6
예제 #32
0
def process_one_etf(top, result_df):
    print(top)
    bah_investor = compute_bah([top],period,cash_sum)
    print('invested:' + str(bah_investor.invested_history[-1]))
    print('value gained:' + str(bah_investor.history[-1]))
    print('returns:' + str(bah_investor.ror_history[-1]))
    investors = []
    while len(investors) < MAX_RUNS:
        investor = run_bah_sim([top],period,cash_sum)
        if len(investor.ror_history) == 0:
            continue
        investors.append(investor)
        print('%d:%f:%f:%f' % (len(investors), investor.invested, investor.history[-1], investor.ror_history[-1]))
    returns_bah = [investor.ror_history[-1] for investor in investors]
    returns_bah = np.array(returns_bah)
    # returns_bah = np.sort(returns_bah)
    print('original:%f' % bah_investor.ror_history[-1])
    print('observed:%f +/- %f' % (np.mean(returns_bah), np.std(returns_bah)))
    with pm.Model() as model:
        mu = pm.Normal('mu', mu=np.mean(returns_bah), sd=np.std(returns_bah))
        sigma = pm.Uniform('sigma', lower=0., upper=np.std(returns_bah))
        mean_returns = pm.Normal('mean_returns', mu=mu, sd=sigma, observed=np.array(returns_bah))
        trace_model = pm.sample(1000, tune=2000)
    samples_bah = pm.sample_ppc(trace_model, size=10000, model=model)
    hpd89_bah = pm.hpd(samples_bah['mean_returns'], alpha=0.11)
    print('mean 89 percentile:' + str(np.mean(hpd89_bah)))
    investors = []
    while len(investors) < MAX_RUNS:
        investor = compute_one_etf([top],period,cash_sum)
        if investor.cash == investor.invested:
            continue
        if len(investor.ror_history) == 0:
            continue
        investors.append(investor)
        print('%d:%f:%f:%f' % (len(investors), investor.invested, investor.history[-1], investor.ror_history[-1]))
    returns_chaos = [investor.ror_history[-1] for investor in investors]
    returns_chaos = np.array(returns_chaos)
    # returns_chaos = np.sort(returns_chaos)
    print('original:%f' % (bah_investor.ror_history[-1]))
    print('observed:%f +/- %f' % (np.mean(returns_chaos), np.std(returns_chaos)))
    with pm.Model() as model:
        mu = pm.Normal('mu', mu=np.mean(returns_chaos), sd=np.std(returns_chaos))
        sigma = pm.Uniform('sigma', lower=0., upper=np.std(returns_chaos))
        mean_returns = pm.Normal('mean_returns', mu=mu, sd=sigma, observed=np.array(returns_chaos))
        trace_model = pm.sample(1000, tune=2000)
    samples_chaos = pm.sample_ppc(trace_model, size=10000, model=model)
    hpd89_chaos = pm.hpd(samples_chaos['mean_returns'], alpha=0.11)
    print('mean 89 percentile:' + str(np.mean(hpd89_chaos)))
    validity_chaos = np.count_nonzero(np.abs(returns_chaos - bah_investor.ror_history[-1]) < 0.05) / len(
        returns_chaos) * 100.
    validity_bah = np.count_nonzero(np.abs(returns_bah - bah_investor.ror_history[-1]) < 0.05) / len(returns_bah) * 100.
    result_df = result_df.append({
        'ticket': top,
        'original_returns': bah_investor.ror_history[-1],
        'hpd89_bah': np.mean(hpd89_bah),
        'hpd89_chaos': np.mean(hpd89_chaos),
        'validity_bah': validity_bah,
        'validity_chaos': validity_chaos}, ignore_index=True)
    report = Report(top,
                    bah_investor,
                    returns_bah,
                    hpd89_bah,
                    samples_bah['mean_returns'],
                    returns_chaos,
                    hpd89_chaos,
                    samples_chaos['mean_returns'])
    report.gen_report()
    result_df.to_csv(result_csv, index=False)
    return result_df