Example #1
0
def prepare_model(rv_single_epoch_variance, target=0.01, model_path="model.stan",
    S=None, design_matrix_function=None, mask=None,
    **source_params):
    """
    Compile the Stan model, and prepare the data and initialisation dictionaries
    for optimization and sampling.

    :param rv_single_epoch_variance:
        The variance in single epoch measurements of radial velocity.

    :param target: [optional]
        The target radial velocity variance to use when initialising the model
        coefficients.

    :param model_path: [optional]
        The local path of the Stan model.

    :param S: [optional]
        If not `None`, draw a random `S` valid sources from the data rather than
        using the full data set.


    :Keyword Arguments:
        * *source_params* (``dict``) These are passed directly to the
        `_rvf_design_matrix`, so they should include all of the source labels
        needed to construct the design matrix (e.g., `phot_rp_mean_flux`). The
        array length for each source label should match that of
        `rv_single_epoch_variance`.

    :returns:
        The compiled Stan model, the data dictionary, the initialsiation
        dictionary, and a mask corresponding to which `sources` were used to
        construct the data dictionary.
    """

    data, indices = prepare_data(rv_single_epoch_variance, S=S, mask=mask,
                                 design_matrix_function=design_matrix_function,
                                 **source_params)
    coeff = _rvf_initial_coefficients(data["design_matrix"].T, target=target)

    init = dict(theta=0.1, mu_coefficients=coeff, sigma_coefficients=coeff)

    model = stan.load_stan_model(model_path)

    return (model, data, init, indices)
Example #2
0
    def _hierarchically_fit_waveform_parameters(self, waveform_parameters,
                                                **kwargs):
        r"""
        Fit a hierarchical model to the waveform parameters.

        :param waveform_parameters:
            A table of parameters for each of the numerical relativity waveforms
            that has [n_waveforms, ] rows and contains at least the following
            properties (as columns):

            The compactness `C`,
            the calculated :math:`\kappa` value `Kappa_calc`,
            the mass of the primary `M1`,
            the frequency of the high-frequency peak `f2`,
            the dimensionless tidal deformability `Lambda`

            ..math:
                \Lambda = \left(\frac{\lambda}{M_{1}^{5}}\right)^{1/5}

            where

            ..math:
                \lambda = \frac{2}{3}\overbar{kappa}_{2}^{T}\overbar{R}^5
        """


        y = np.array([waveform_parameters[pn] \
                      for pn in self.hierarchical_parameter_names]).T

        N, D = y.shape
        M1 = waveform_parameters["M1"]
        eos_param = waveform_parameters[self.eos_parameter_name]

        data_dict = dict(M1=M1, eos_param=eos_param, N=N, D=D, y=y)
        kwds = dict(iter=10000,
                    tol_param=1e-12,
                    tol_obj=1e-12,
                    tol_grad=1e-12,
                    tol_rel_grad=1e8)
        kwds.update(kwargs)

        p_opt = stan.load_stan_model("hpm.stan").optimizing(data_dict, **kwds)

        return (p_opt, data_dict)
Example #3
0
N, D = X.shape
F = finite_indices.size
L = 4 * len(config["predictor_label_names"]) + 1 # + 1 if using mu_multiple_uv
C = config["share_optimised_result_with_nearest"]

kdt, scales, offsets = npm.build_kdtree(
    X[finite], relative_scales=config["kdtree_relative_scales"])

kdt_kwds = dict(offsets=offsets, scales=scales, full_output=False)
kdt_kwds.update(
    minimum_radius=config["kdtree_minimum_radius"],
    minimum_points=config["kdtree_minimum_points"],
    maximum_points=config["kdtree_maximum_points"],
    minimum_density=config.get("kdtree_minimum_density", None))

model = stan.load_stan_model(config["model_path"], verbose=False)

default_opt_kwds = config.get("optimisation_kwds", {})

# Make sure that some entries have the right units.
for key in ("tol_obj", "tol_grad", "tol_rel_grad", "tol_rel_obj"):
    if key in default_opt_kwds:
        default_opt_kwds[key] = float(default_opt_kwds[key])

logging.info("k-d tree keywords: {}".format(kdt_kwds))
logging.info("optimization keywords: {}".format(default_opt_kwds))


done = np.zeros(N, dtype=bool)
queued = np.zeros(N, dtype=bool)
results = np.nan * np.ones((N, L), dtype=float)
Example #4
0
import numpy as np
import matplotlib.pyplot as plt

import stan_utils as stan

x, y, x_err, y_err = np.loadtxt("data.txt")

x_err, y_err = (np.sqrt(np.abs(x_err)), np.sqrt(np.abs(y_err)))

model = stan.load_stan_model("line.stan")

data_dict = dict(x=x, y=y, x_err=x_err, y_err=y_err, N=x.size)

# Don't really need an initialisation but f**k it.
DM = np.vstack((np.ones_like(x), x)).T
C = np.diag(y_err**2)
cov = np.linalg.inv(DM.T @ np.linalg.solve(C, DM))
c, m = cov @ (DM.T @ np.linalg.solve(C, y))

init_dict = dict(c=c, m=m, x_t=x)

# Optimize
p_opt = model.optimizing(data=data_dict, init=init_dict, iter=2000)

# Sample
samples = model.sampling(
    **stan.sampling_kwds(data=data_dict, chains=2, init=p_opt, iter=2000))

fig = samples.traceplot()
fig.savefig("trace.png")
Example #5
0
    binary_sigma_mta[i] = v_std
    binary_sigma_rv[i] = sigma_mta_to_sigma_rv(v_std, O)

    assert np.isfinite(v_std)

print(
    f"Number of single stars with finite values: {np.isfinite(single_sigma_rv).sum()}"
)
print(
    f"Number of binary stars with finite values: {np.isfinite(binary_sigma_rv).sum()}"
)

# Now let us imagine that we were fitting these data with a mixture model in order to estimate the
# intrinsic uncertainty.
model = stan.load_stan_model("nlnmm-fixed.stan")

y_rv = np.hstack([single_sigma_rv, binary_sigma_rv])
y_rv = y_rv[np.isfinite(y_rv)]

data_dict = dict(y=y_rv, N=y_rv.size)
data_dict.update(data_kwds)

init_dict = dict(mu_single=np.min([np.median(y_rv, axis=0), 10]))
init_dict.update(init_kwds)

p_opt_rv = model.optimizing(data=data_dict, init=init_dict)

fig, ax = plt.subplots()
ax.hist(y_rv, bins=np.linspace(0, 10, 100))
Example #6
0
X_kdt = X_kdt[subset]
X_scale = np.ptp(X_kdt, axis=0)
X_mean = np.mean(X_kdt, axis=0)

_scale = lambda a: (a - X_mean) / X_scale
_descale = lambda a: a * X_scale + X_mean

# Normalise the array for the KD-tree
X_norm = _scale(X_kdt)

# Construct the KD-Tree
kdt = npm_utils.build_kdt(X_norm)

data = data[subset]

model = stan.load_stan_model("npm.stan")

# Calculate the total number of parameters
M = len(data)
L = len(predictor_label_names)
K = 1 + 4 * L

opt_params = np.empty((M, K))

#subset_points = np.random.choice(M, size=1000, replace=False)
#for i in range(M):

#for j, i in enumerate(subset_points):
for j, i in enumerate(range(M)):

    print("At point {}/{}: {}".format(j, M, i))
Example #7
0
    def fit(self, waveform_parameters, frequencies, amplitudes, **kwargs):
        r"""
        Fit the model given the parameters of the waveforms, the common
        frequencies that those waveforms are calculated on, and the amplitudes
        of the Fourier spectrum for those waveforms.

        :param waveform_parameters:
            A table of parameters for each of the numerical relativity waveforms
            that has [n_waveforms, ] rows and may contain the following
            properties (as columns):

            The compactness `C`,
            the calculated :math:`\kappa` value `Kappa_calc`,
            the mass of the primary `M1`,
            the frequency of the high-frequency peak `f2`,
            the dimensionless tidal deformability `Lambda`

            ..math:
                \Lambda = \left(\frac{\lambda}{M_{1}^{5}}\right)^{1/5}

            where

            ..math:
                \lambda = \frac{2}{3}\overbar{kappa}_{2}^{T}\overbar{R}^5

        :param frequencies:
            The common frequencies (in kHz) that each numerical relativity
            waveform is calculated on. This should be an array of shape
            [n_frequencies, ].

        :param amplitudes:
            The amplitudes of the Fourier spectrum for the numerical relativity
            waveforms. This should be an array of shape
            [n_waveforms, n_frequencies].
        """

        self._frequencies = frequencies
        self._amplitudes = amplitudes
        self._waveform_parameters = waveform_parameters

        # Hierarchically fit the waveform parameters.
        p_opt_hpm, data_dict = self._hierarchically_fit_waveform_parameters(
            waveform_parameters)

        # Frequency-shift waveforms so that they are aligned at mean f2.
        shifted_amplitudes, f2_coeff, f2_mean = self._frequency_shift_waveforms(
            waveform_parameters, frequencies, amplitudes)

        # Fit the waveforms with a linear model of the whitened labels, and the
        # mass and dimensionless tidal deformability.
        labels = np.vstack([
            waveform_parameters["M1"],
            waveform_parameters[self.eos_parameter_name], data_dict["y"].T
        ]).T

        mu, sigma = (np.mean(labels, axis=0), np.std(labels, axis=0))
        whitened_labels = (labels - mu) / sigma

        model = stan.load_stan_model("waveform.stan")

        N, P = whitened_labels.shape
        F = frequencies.size
        data_dict = dict(F=frequencies.size,
                         N=N,
                         P=P,
                         y=shifted_amplitudes,
                         whitened_labels=whitened_labels)

        # TODO: move default op  kwds to somewhere else.
        kwds = dict(iter=100000,
                    tol_param=1e-12,
                    tol_obj=1e-12,
                    tol_grad=1e-12,
                    tol_rel_grad=1e8)
        kwds.update(kwargs)

        p_opt_waveform = model.optimizing(data=data_dict, **kwds)

        # Save attributes so that we can make predictions.
        self._p_opt_waveform = p_opt_waveform
        self._p_opt_hpm = p_opt_hpm
        self._p_opt_f2 = (f2_mean, f2_coeff)
        self._p_opt_label_whiten = (mu, sigma)

        return self
axes[0, 0].set_ylabel(r"$J_{{{0}}}$".format(2))

axes[0, 1].set_visible(False)
axes[1, 1].scatter(truths["theta"].T[2], truths["theta"].T[1])
axes[1, 1].set_xlabel(r"$J_{{{0}}}$".format(2))
axes[1, 1].set_ylabel(r"$J_{{{0}}}$".format(1))

fig.tight_layout()

op_kwds = dict(init_alpha=1,
               tol_obj=1e-16,
               tol_rel_grad=1e-16,
               tol_rel_obj=1e-16)
op_kwds = dict(data=data, seed=seed)

model = stan.load_stan_model("mlf.stan")

s_opt = model.optimizing(**op_kwds)

fig, ax = plt.subplots()
#ax.scatter(np.diag(psi), s_opt["psi"], facecolor="b")
ax.scatter(np.diag(truths["psi"]), s_opt["psi"])
ax.set_title(r"$\psi$")
limits = np.hstack([ax.get_xlim(), ax.get_ylim()])
limits = np.array([limits.min(), limits.max()])
ax.plot(limits, limits, c="#666666", zorder=-1)
ax.set_xlim(limits)
ax.set_ylim(limits)

fig, axes = plt.subplots(truths["L"].shape[0], figsize=(4, 12))
for i, ax in enumerate(axes):
Example #9
0
    # Require finite entries for all predictors across all models.
    sources = data["sources"]
    M = config["number_of_sources_for_gaussian_process"]
    N = sources["source_id"].size

    # Create results file.
    with h5.File(results_path, "w") as results:

        # Add config.
        results.attrs.create("config", np.string_(yaml.dump(config)))
        results.attrs.create("config_path", np.string_(config_path))

    # Load model and check optimization keywords
    model_path = os.path.join(pwd, config["model_path"])
    model = stan.load_stan_model(model_path, verbose=False)

    # Make sure that some entries have the right type.
    default_opt_kwds = config.get("optimisation_kwds", {})
    for key in ("tol_obj", "tol_grad", "tol_rel_grad", "tol_rel_obj"):
        if key in default_opt_kwds:
            default_opt_kwds[key] = float(default_opt_kwds[key])

    logger.info(f"Optimization keywords:\n{utils.repr_dict(default_opt_kwds)}")

    default_bounds = dict(theta=[0.5, 1],
                          mu_single=[0.5, 15],
                          sigma_single=[0.05, 10],
                          sigma_multiple=[0.2, 1.6])

    # Plotting
Example #10
0
y_err_intrinsic = 20
y_err = np.abs(np.random.normal(0, y_err_intrinsic, size=N))

x_true = np.random.uniform(0, 30, N)
x = x_true + np.random.normal(0, 1, size=N) * x_err

y_true = m_true * x + b_true
y = y_true + np.random.normal(0, 1, size=N) * y_err


fig, ax = plt.subplots()
ax.scatter(x, y)
ax.errorbar(x, y, yerr=y_err)



data_dict = dict(N=N, x=x, y=y, y_err=y_err, x_err=x_err)
init_dict = dict(m=0, b=0, x_true=x)

sm = stan.load_stan_model("advanced_line.stan")

opt = sm.optimizing(data=data_dict)

sampling = sm.sampling(**stan.sampling_kwds(init=init_dict, data=data_dict, chains=2, iter=1000))

chains = sampling.extract()

X = np.array([chains["m"], chains["b"]]).T

fig = corner(X, truths=truths)
plt.show()