def compute_max_corr(strace):
    df = trace_to_dataframe(strace)
    max_corr = max_corr = np.max(np.tril(np.abs(np.corrcoef(df.values, rowvar=0)), k=-1), axis=1)
    # redundant = max_corr > 0.99
    # red_names = df.columns.values[redundant]
    # print(red_names)
    return max_corr
Example #2
0
    def run(self) -> Tuple[int, pd.DataFrame]:
        """Bayesian inference."""
        observed_data = self._extract_observed_data()
        self.mc.log_debug('{} reads (read pairs) are used for Bayesian inference.'.format(len(observed_data)))

        p_count = self.isoforms_count * self.ploidy

        with pm.Model():
            p = pm.Dirichlet(name='p', a=tt.stack([1 for _ in range(p_count)]), shape=p_count)
            c = np.tile(self.isoform_lens, self.ploidy)
            p_rescaled = pm.Deterministic(name='p_rescaled', var=(p * c) / tt.dot(p, c))
            _ObservedDistribution(name='observed', _p=p_rescaled, observed=observed_data)

            # Inference.
            trace = pm.sample(self.mcmc_samples, tune=self.tune_samples, chains=1, progressbar=False)

        # Convert trace to data frame.
        trace = trace_to_dataframe(trace)

        # Remove rescaled variables.
        cols = [col for col in trace.columns if not col.startswith('p_rescaled')]
        assert len(cols) == p_count
        trace = trace[cols]

        # Rename trace columns.
        rename_map = {}
        for i in range(self.isoforms_count):
            for j in range(self.ploidy):
                n = i + j * self.isoforms_count
                rename_map['p__{}'.format(n)] = 'RAW_I{}_A{}'.format(i, j)
        trace = trace.rename(columns=rename_map)

        return len(observed_data), trace
Example #3
0
def dump(name, trace, chains=None):
    """
    Store values from NDArray trace as CSV files.

    Parameters
    ----------
    name : str
        Name of directory to store CSV files in
    trace : :class:`pymc3.backend.base.MultiTrace` of NDArray traces
        Result of MCMC run with default NDArray backend
    chains : list
        Chains to dump. If None, all chains are dumped.
    """

    if not os.path.exists(name):
        os.mkdir(name)
    if chains is None:
        chains = trace.chains

    var_shapes = trace._straces[chains[0]].var_shapes
    flat_names = {
        v: ttab.create_flat_names(v, shape)
        for v, shape in var_shapes.items()
    }

    for chain in chains:
        filename = os.path.join(name, 'chain-{}.csv'.format(chain))
        df = ttab.trace_to_dataframe(trace,
                                     chains=chain,
                                     flat_names=flat_names)
        df.to_csv(filename, index=False)
Example #4
0
def format_trace(trace, to_df=True):
    """
    Convert the trace into the necessary format. The current format is a
    numpy array.
    """
    df = trace_to_dataframe(trace)
    if to_df:
        return df
    else:
        return pd.DataFrame.as_matrix(df)
Example #5
0
def plot_corner(trace, outpath):
    # corner plot of posterior samples
    plt.close('all')
    corvars = [x for x in trace.varnames if x[-1] != '_']
    trace_df = trace_to_dataframe(trace, varnames=corvars)
    fig = corner.corner(trace_df,
                        quantiles=[0.16, 0.5, 0.84],
                        show_titles=True,
                        title_kwargs={"fontsize": 12},
                        title_fmt='.2g')
    savefig(fig, outpath, writepdf=0, dpi=100)
Example #6
0
    def test_trace_to_dataframe_chain_arg(self):
        mtrace = self.mtrace
        df = ttab.trace_to_dataframe(mtrace, chains=0)
        self.assertEqual(len(mtrace), df.shape[0])

        checked = False
        for varname in self.test_point.keys():
            vararr = mtrace.get_values(varname, chains=0)
            # With `shape` above, only one variable has to have that
            # `shape`.
            if vararr.shape[1:] != self.shape:
                continue
            npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values)
            npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values)
            npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values)
            checked = True
        self.assertTrue(checked)
Example #7
0
    def test_trace_to_dataframe_chain_arg(self):
        mtrace = self.mtrace
        df = ttab.trace_to_dataframe(mtrace, chains=0)
        assert len(mtrace) == df.shape[0]

        checked = False
        for varname in self.test_point.keys():
            vararr = mtrace.get_values(varname, chains=0)
            # With `shape` above, only one variable has to have that
            # `shape`.
            if vararr.shape[1:] != self.shape:
                continue
            npt.assert_equal(vararr[:, 0, 0], df[varname + '__0_0'].values)
            npt.assert_equal(vararr[:, 1, 0], df[varname + '__1_0'].values)
            npt.assert_equal(vararr[:, 1, 2], df[varname + '__1_2'].values)
            checked = True
        assert checked
Example #8
0
    def test_trace_to_dataframe(self):
        mtrace = self.mtrace
        df = ttab.trace_to_dataframe(mtrace)
        assert len(mtrace) * mtrace.nchains == df.shape[0]

        checked = False
        for varname in self.test_point.keys():
            vararr = mtrace.get_values(varname)
            # With `shape` above, only one variable has to have that
            # `shape`.
            if vararr.shape[1:] != self.shape:
                continue
            npt.assert_equal(vararr[:, 0, 0], df[varname + "__0_0"].values)
            npt.assert_equal(vararr[:, 1, 0], df[varname + "__1_0"].values)
            npt.assert_equal(vararr[:, 1, 2], df[varname + "__1_2"].values)
            checked = True
        assert checked
def compute_max_fisher_scale(strace, model):
    epsilon = 1e-10
    n_checks = 20

    df = trace_to_dataframe(strace)

    idx_list = np.linspace(0, len(strace) - 1, n_checks, dtype=int)
    f = model.fastd2logp()

    # max_scale can be what we log
    max_scale = np.zeros(df.shape[1])
    for idx in idx_list:
        FI = f(strace[idx])
        FS = 0.5 * (FI + FI.T)  # sym to be safe
        Q, R = np.linalg.qr(FS)
        max_scale = np.fmax(max_scale, np.abs(np.diag(R)))
    # redundant = max_scale < epsilon
    # red_names = df.columns.values[redundant]
    # print(red_names)
    return max_scale
    # Priors
    theta_list = []
    for label in NN.labels:
        if label in labels_to_fit:
            theta_list.append(pm.Uniform(label, lower=-0.5, upper=0.5))
        else:
            theta_list.append(0.0)
    theta = tt.stack(theta_list)
    # Model
    inside = sigmoid(tt.tensordot(a=w0, b=theta, axes=1) + b0)
    middle = sigmoid(tt.tensordot(a=w1, b=inside, axes=1) + b1)
    outside = tt.tensordot(a=w2, b=middle, axes=1) + b2
    model_spec = pm.Deterministic('model_spec', outside)
    # Likelihood
    spec = pm.Normal('spec', mu=model_spec, sd=1 / SNR, observed=spec_true)
    # Sampling
    backend = HDF5('../NN_data/trace.h5')
    trace = pm.sample(nsamples,
                      tune=ntune,
                      chains=chains,
                      cores=cores,
                      trace=backend)

samples = pd.DataFrame(columns=NN.labels)
samples[labels_to_fit] = trace_to_dataframe(trace, varnames=labels_to_fit)
samples = NN.rescale_labels(samples)
samples.to_hdf(samples_file, f'SNR={SNR}')

fig = corner(samples[labels_to_fit], labels=labels_to_fit, show_titles=True)
plt.savefig(f'{nn_file[:-4]}_{SNR}.png')
Example #11
0
    with open(pklpath, 'wb') as buff:
        pickle.dump(
            {
                'model': model,
                'trace': trace,
                'map_estimate': map_estimate
            }, buff)
else:
    d = pickle.load(open(pklpath, 'rb'))
    model, trace, map_estimate = d['model'], d['trace'], d['map_estimate']

##################
# analyze output #
##################
# trace plot from PyMC3
plt.figure(figsize=(7, 7))
pm.traceplot(trace[100:])
plt.tight_layout()
plt.savefig('../results/test_results/test_{}_traceplot.png'.format(modelid))
plt.close('all')

# corner
trace_df = trace_to_dataframe(trace)
truths = [true_d[k] for k in list(trace_df.columns)]
fig = corner.corner(trace_df,
                    quantiles=[0.16, 0.5, 0.84],
                    show_titles=True,
                    title_kwargs={"fontsize": 12},
                    truths=truths)
fig.savefig('../results/test_results/test_{}_corner.png'.format(modelid))
Example #12
0
def get_abs_m(overwrite=0):
    # overwrite: whether to overwrite the pickle (~20s sampling)

    datestr = '20200624'
    cleanrvpath = os.path.join(DATADIR, 'spectra',
                               'RVs_{}_clean.csv'.format(datestr))
    df = pd.read_csv(cleanrvpath)

    # NOTE: the FEROS data contain all the information about the long-term
    # trend.
    sel = (df.tel == "FEROS")
    df = df[sel]
    df = df.sort_values(by='time')

    delta_t = (df.time.max() - df.time.min())
    t0 = df.time.min() + delta_t / 2
    df['x'] = df['time'] - t0  # np.nanmean(df['time'])
    df['y'] = df['mnvel'] - np.nanmean(df['mnvel'])
    df['y_err'] = df['errvel']

    force_err = 100
    print('WRN: inflating error bars to account for rot jitter')
    print(f'{df.y_err.median()} to {force_err}')
    df.y_err = force_err

    pklpath = os.path.join(rdir, 'rvlim_method2.pkl')
    if os.path.exists(pklpath) and overwrite:
        os.remove(pklpath)

    # y = mx + c
    if not os.path.exists(pklpath):
        with pm.Model() as model:
            # Define priors
            c = pm.Uniform('c', lower=-100, upper=100)
            m = pm.Uniform('m', lower=-100, upper=100)

            abs_m = pm.Deterministic("abs_m", pm.math.abs_(m))

            # Define likelihood
            # Here Y ~ N(Xβ, σ^2), for β the coefficients of the model. Note though
            # the error bars are not _observed_ in this case; they are part of the
            # model!
            likelihood = pm.Normal('y',
                                   mu=m * nparr(df.x) + c,
                                   sigma=nparr(df.y_err),
                                   observed=nparr(df.y))

            # Inference!  draw 1000 posterior samples using NUTS sampling
            n_samples = 6000
            trace = pm.sample(n_samples, cores=16)

        with open(pklpath, 'wb') as buff:
            pickle.dump({'model': model, 'trace': trace}, buff)

    else:
        d = pickle.load(open(pklpath, 'rb'))
        model, trace = d['model'], d['trace']

    # corner
    trace_df = trace_to_dataframe(trace)
    fig = corner.corner(trace_df,
                        quantiles=[0.16, 0.5, 0.84],
                        show_titles=True)
    fig.savefig(os.path.join(rdir, 'corner.png'))

    # data + model
    plt.close('all')
    plt.figure(figsize=(7, 7))
    plt.scatter(df.x, df.y, label='data', zorder=2, color='k')
    plt.errorbar(df.x,
                 df.y,
                 yerr=df.y_err,
                 ecolor='k',
                 elinewidth=1,
                 capsize=2,
                 zorder=2,
                 ls='none')

    N_samples = 100
    lm = lambda x, sample: sample['m'] * x + sample['c']
    for rand_loc in np.random.randint(0, len(trace), N_samples):
        rand_sample = trace[rand_loc]
        plt.plot(nparr(df.x),
                 lm(nparr(df.x), rand_sample),
                 zorder=1,
                 alpha=0.5,
                 color='C0')

    plt.legend(loc=0)
    plt.xlabel('time [d]')
    plt.ylabel('rv [m/s]')
    plt.savefig(os.path.join(rdir, 'datamodel.png'))
    plt.close('all')

    printparams = ['c', 'm', 'abs_m']
    print(42 * '-')
    for p in printparams:
        med = np.percentile(trace[p], 50)
        up = np.percentile(trace[p], 84)
        low = np.percentile(trace[p], 36)
        threesig = np.percentile(trace[p], 99.7)
        print(f'{p} : {med:.3f} +{up-med:.3f} -{med-low:.3f}')
        print(f'{p} 99.7: {threesig:.3f}')
    print(42 * '-')

    absm_threesig = threesig * u.m / u.s / u.day
    delta_time = df.time.max() - df.time.min()

    return absm_threesig, delta_time
Example #13
0
def format_trace(trace):
    df = trace_to_dataframe(trace)
    return df.values
Example #14
0
    sampled = 1

else:
    d = pickle.load(open(pklpath, 'rb'))
    model, trace, map_estimate = d['model'], d['trace'], d['map_estimate']

print(pm.summary(trace, varnames=["period", "t0", "r", "b", "u", "mean"]))

true_d = OrderedDict(
    {'mean':1, 't0__0':t0s[0], 't0__1':t0s[1], 'period__0':periods[0],
     'period__1':periods[1], 'u__0':us[0], 'u__1':us[1], 'r__0':rs[0],
     'r__1':rs[1], 'b__0':bs[0], 'b__1':bs[1]}
)

trace_df = trace_to_dataframe(trace, varnames=["period", "t0", "r", "b", "u", "mean"])
truths = [true_d[k] for k in true_d.keys()]
fig = corner.corner(trace_df, quantiles=[0.16, 0.5, 0.84], show_titles=True,
                    title_kwargs={"fontsize": 12}, truths=truths)
fig.savefig('../results/test_results/test_{}_corner.png'.format(modelid))

# phase plots
if sampled:
    for n, letter in enumerate("bc"):
        plt.figure()

        # Get the posterior median orbital parameters
        p = np.median(trace["period"][:, n])
        t0 = np.median(trace["t0"][:, n])

        # Compute the median of posterior estimate of the contribution from
def write_output(trace, uri, filepath, client):
    trace_dataframe = trace_to_dataframe(trace)
    trace_dataframe.to_parquet(filepath,
                               engine="pyarrow",
                               compression="snappy")
    client.file(uri).putFile(filepath)