def rate(self):
        """Calculates the rate parameter"""
        state = compute_state(self._initial_state,
                              self._events,
                              self._stoichiometry,
                              closed=True)
        tms_state = tf.transpose(state, perm=(1, 0, 2))

        def fn(elems):
            return tf.stack(self._transition_rate_fn(*elems), axis=-1)

        rates = tf.vectorized_map(
            fn=fn,
            elems=(
                self._initial_step + tf.range(tms_state.shape[0]),
                tms_state,
            ),
        )

        def integrated_rate_fn():
            """Use mid-point integration to estimate the constant rate
            over time.
            """
            integrated_rates = tms_state[..., :-1] * rates
            return (integrated_rates[:-1, ...] +
                    integrated_rates[1:, ...]) / 2.0

        return (tf.reduce_sum(integrated_rate_fn(), axis=(-3, -2)) +
                self._baseline_hazard_rate_priors["rate"])
Ejemplo n.º 2
0
def within_between(input_files, output_file):
    """Calculates PAF for within- and between-location infection.

    :param input_files: a list of [data pickle, posterior samples pickle]
    :param output_file: a csv with within/between summary
    """

    with open(input_files[0], "rb") as f:
        covar_data = pkl.load(f)

    with open(input_files[1], "rb") as f:
        samples = pkl.load(f)

    beta2 = samples["beta2"]
    events = samples["seir"]
    init_state = samples["init_state"]
    state_timeseries = compute_state(init_state, events,
                                     model_spec.STOICHIOMETRY)

    within, between = calc_pressure_components(covar_data, beta2,
                                               state_timeseries[..., -1, :])

    df = pd.DataFrame(
        dict(
            within_mean=np.mean(within, axis=0),
            between_mean=np.mean(between, axis=0),
            p_within_gt_between=np.mean(within > between),
        ),
        index=pd.Index(covar_data["locations"]["lad19cd"], name="location"),
    )
    df.to_csv(output_file)
Ejemplo n.º 3
0
    def r_fn(args):
        beta1_, beta2_, beta3_, sigma_, xi_, gamma0_, events_ = args
        t = events_.shape[-2] - 1
        state = compute_state(init_state, events_, model_spec.STOICHIOMETRY)
        state = tf.gather(state, t, axis=-2)  # State on final inference day

        model = model_spec.CovidUK(
            covariates=covar_data,
            initial_state=init_state,
            initial_step=0,
            num_steps=events_.shape[-2],
            priors=priors,
        )

        xi_pred = model_spec.conditional_gp(
            model.model["xi"](beta1_, sigma_),
            xi_,
            tf.constant([events.shape[-2] + model_spec.XI_FREQ],
                        dtype=model_spec.DTYPE)[:, tf.newaxis],
        )

        par = dict(
            beta1=beta1_,
            beta2=beta2_,
            beta3=beta3_,
            sigma=sigma_,
            gamma0=gamma0_,
            xi=xi_,
        )
        print("xi shape:", par["xi"].shape)
        ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
        ngm = ngm_fn(t, state)
        return ngm
Ejemplo n.º 4
0
def predicted_incidence(
    posterior_samples,
    init_state,
    covar_data,
    init_step,
    num_steps,
    out_of_sample=False,
):
    """Runs the simulation forward in time from `init_state` at time `init_time`
       for `num_steps`.
    :param param: a dictionary of model parameters
    :covar_data: a dictionary of model covariate data
    :param init_step: the initial time step
    :param num_steps: the number of steps to simulate
    :returns: a tensor of srt_quhape [B, M, num_steps, X] where X is the number of state
              transitions
    """

    posterior_state = compute_state(
        init_state,
        posterior_samples["seir"],
        model_spec.STOICHIOMETRY,
    )
    posterior_samples["new_init_state"] = posterior_state[..., init_step, :]
    del posterior_samples["seir"]

    # For out-of-sample prediction, we have to re-simulate the
    # alpha_t trajectory given the starting point.
    if out_of_sample is True:
        alpha_t = posterior_samples["alpha_0"][:, tf.newaxis] + tf.cumsum(
            posterior_samples["alpha_t"], axis=-1)
        if init_step > 0:
            posterior_samples["alpha_0"] = alpha_t[:, init_step - 1]

        # Remove alpha_t from the posterior to make TFP re-simulate it.
        del posterior_samples["alpha_t"]

    @tf.function
    def do_sim():
        def sim_fn(args):
            par = tf.nest.pack_sequence_as(posterior_samples, args)
            init_ = par["new_init_state"]
            del par["new_init_state"]

            model = model_spec.CovidUK(
                covar_data,
                initial_state=init_,
                initial_step=init_step,
                num_steps=num_steps,
            )
            sim = model.sample(**par)
            return sim["seir"]

        return tf.map_fn(
            sim_fn,
            elems=tf.nest.flatten(posterior_samples),
            fn_output_signature=(tf.float64),
        )

    return posterior_samples["new_init_state"], do_sim()
Ejemplo n.º 5
0
    def body(state_, occults_):
        state_t1 = tf.roll(state_, shift=-1, axis=-2)
        neg_state_idx = tf.where(state_t1 < 0)

        first_neg_state_idx = tf.gather(
            neg_state_idx,
            tf.concat(
                [
                    [[0]],
                    tf.where(neg_state_idx[:-1, 0] - neg_state_idx[1:, 0]) + 1,
                ],
                axis=0,
            ),
        )

        mask = tf.scatter_nd(
            first_neg_state_idx,
            tf.ones([first_neg_state_idx.shape[0], 1], dtype=state_t1.dtype),
            state_t1.shape,
        )
        delta_occults = tf.einsum("mts,xs->mtx", state_t1 * mask, stoichiometry)
        new_occults = tf.clip_by_value(
            occults_ - delta_occults, clip_value_min=0.0, clip_value_max=1.0e6
        )
        new_state = compute_state(
            init_state, events + new_occults, stoichiometry
        )
        return new_state, new_occults
Ejemplo n.º 6
0
def prevalence(input_files, output_file):
    """Reconstruct predicted prevalence from
       original data and projection.

    :param input_files: a list of [data pickle, samples pickle, prediction  pickle]
    :param output_file: a csv containing prevalence summary
    """
    offset = 4  # Account for recording lag
    timepoints = np.array([0, 7, 14, 28, 56], np.int32) + offset

    with open(input_files[0], "rb") as f:
        data = pkl.load(f)

    with open(input_files[1], "rb") as f:
        samples = pkl.load(f)

    with open(input_files[2], "rb") as f:
        prediction = pkl.load(f)

    insample_state = compute_state(samples["init_state"], samples["seir"],
                                   STOICHIOMETRY)
    predicted_state = compute_state(insample_state[..., -1, :], prediction,
                                    STOICHIOMETRY)

    def calc_prev(state, name=None):
        prev = np.sum(state[..., 1:3], axis=-1) / np.squeeze(data["N"])
        return mean_and_ci(prev, name=name)

    idx = prediction.coords["location"]
    prev = pd.DataFrame(
        calc_prev(predicted_state[..., timepoints[0], :], name="prev"),
        index=idx,
    )
    for t in timepoints[1:]:
        tmp = pd.DataFrame(
            calc_prev(predicted_state[..., t, :], name=f"prev{t-offset}"),
            index=idx,
        )
        prev = pd.concat([prev, tmp], axis="columns")

    prev.to_csv(output_file)
Ejemplo n.º 7
0
def predicted_incidence(posterior_samples, covar_data, init_step, num_steps):
    """Runs the simulation forward in time from `init_state` at time `init_time`
       for `num_steps`.
    :param param: a dictionary of model parameters
    :covar_data: a dictionary of model covariate data
    :param init_step: the initial time step
    :param num_steps: the number of steps to simulate
    :returns: a tensor of srt_quhape [B, M, num_steps, X] where X is the number of state
              transitions
    """

    @tf.function
    def sim_fn(args):
        beta1_, beta2_, sigma_, xi_, gamma0_, gamma1_, init_ = args

        par = dict(
            beta1=beta1_,
            beta2=beta2_,
            sigma=sigma_,
            xi=xi_,
            gamma0=gamma0_,
            gamma1=gamma1_,
        )
        model = model_spec.CovidUK(
            covar_data,
            initial_state=init_,
            initial_step=init_step,
            num_steps=num_steps,
        )
        sim = model.sample(**par)
        return sim["seir"]

    posterior_state = compute_state(
        posterior_samples["init_state"],
        posterior_samples["seir"],
        model_spec.STOICHIOMETRY,
    )
    init_state = posterior_state[..., init_step, :]

    events = tf.map_fn(
        sim_fn,
        elems=(
            posterior_samples["beta1"],
            posterior_samples["beta2"],
            posterior_samples["sigma"],
            posterior_samples["xi"],
            posterior_samples["gamma0"],
            posterior_samples["gamma1"],
            init_state,
        ),
        fn_output_signature=(tf.float64),
    )
    return init_state, events
Ejemplo n.º 8
0
def regularize_occults(events, occults, init_state, stoichiometry):
    """Regularizes an occult matrix such that counting
    processes are valid

    :param events: a [M, T, X] events tensor
    :param occults: a [M, T, X] occults tensor
    :param init_state: a [M, S] initial state tensor
    :param stoichiometry: a [X, S] stoichiometry matrix
    :returns: an tuple containing updated (state, occults) tensors
    """

    from gemlib.util import compute_state

    def body(state_, occults_):
        state_t1 = tf.roll(state_, shift=-1, axis=-2)
        neg_state_idx = tf.where(state_t1 < 0)

        first_neg_state_idx = tf.gather(
            neg_state_idx,
            tf.concat(
                [
                    [[0]],
                    tf.where(neg_state_idx[:-1, 0] - neg_state_idx[1:, 0]) + 1,
                ],
                axis=0,
            ),
        )

        mask = tf.scatter_nd(
            first_neg_state_idx,
            tf.ones([first_neg_state_idx.shape[0], 1], dtype=state_t1.dtype),
            state_t1.shape,
        )
        delta_occults = tf.einsum("mts,xs->mtx", state_t1 * mask, stoichiometry)
        new_occults = tf.clip_by_value(
            occults_ - delta_occults, clip_value_min=0.0, clip_value_max=1.0e6
        )
        new_state = compute_state(
            init_state, events + new_occults, stoichiometry
        )
        return new_state, new_occults

    def cond(state_, _):
        return tf.reduce_any(state_ < 0)

    state = compute_state(init_state, events + occults, stoichiometry)
    new_state, new_occults = tf.while_loop(cond, body, (state, occults))

    return new_state, new_occults
    def _log_prob(self, y, **kwargs):
        """Calculates the log probability of observing epidemic events y
        :param y: a list of tensors.  The first is of shape [n_times] containing times,
                  the second is of shape [n_times, n_states, n_states] containing event
                  matrices.
        :param param: a list of parameters
        :returns: a scalar giving the log probability of the epidemic
        """
        dtype = dtype_util.common_dtype([y, self.initial_state],
                                        dtype_hint=self.dtype)
        events = tf.convert_to_tensor(y, dtype)
        with tf.name_scope("DiscreteApproxContStateTransitionModel.log_prob"):

            state_timeseries = compute_state(
                self.initial_state,
                events,
                self.stoichiometry,
                closed=True,
            )

            tms_timeseries = tf.transpose(state_timeseries, perm=(1, 0, 2))
            tmr_events = tf.transpose(events, perm=(1, 0, 2))

            def fn(elems):
                return tf.stack(self.transition_rates(*elems), axis=-1)

            rates = tf.vectorized_map(
                fn=fn,
                elems=(
                    self.initial_step + tf.range(tms_timeseries.shape[0]),
                    tms_timeseries,
                ),
            )

            def integrated_rates():
                """Use mid-point integration to estimate the constant rate 
                   over time
                """
                integrated_rates = tms_timeseries[..., :-1] * rates
                return (integrated_rates[:-1, ...] +
                        integrated_rates[1:, ...]) / 2.0

            log_hazard_rate = tf.reduce_sum(tmr_events *
                                            tf.math.log(integrated_rates()))
            log_survival = tf.reduce_sum(integrated_rates()) * self.time_delta
            log_denom = tf.reduce_sum(tf.math.lgamma(tmr_events + 1.0))

            return log_hazard_rate - log_survival - log_denom
Ejemplo n.º 10
0
def prevalence(prediction, popsize):
    prev = compute_state(prediction["initial_state"], prediction["events"],
                         STOICHIOMETRY)
    prev = xarray.DataArray(
        prev.numpy(),
        coords=[
            np.arange(prev.shape[0]),
            prediction.coords["location"],
            prediction.coords["time"],
            np.arange(prev.shape[-1]),
        ],
        dims=["iteration", "location", "time", "state"],
    )
    prev_per_1e5 = (prev[..., 1:3].sum(dim="state").reset_coords(drop=True) /
                    np.array(popsize)[np.newaxis, :, np.newaxis] * 100000)
    return xarray2summarydf(prev_per_1e5)
Ejemplo n.º 11
0
    def r_fn(args):

        par = tf.nest.pack_sequence_as(samples, args)

        state = compute_state(initial_state, par["seir"],
                              model_spec.STOICHIOMETRY)
        del par["seir"]

        def fn(t):
            state_ = tf.gather(state, t,
                               axis=-2)  # State on final inference day
            ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
            ngm = ngm_fn(t, state_)
            return ngm

        ngm = tf.vectorized_map(fn, elems=times)
        return tf.reduce_sum(ngm, axis=-2)  # sum over destinations
Ejemplo n.º 12
0
def discrete_markov_log_prob(events, init_state, init_step, time_delta,
                             hazard_fn, stoichiometry):
    """Calculates an unnormalised log_prob function for a discrete time epidemic model.
    :param events: a `[M, T, X]` batch of transition events for metapopulation M,
                   times `T`, and transitions `X`.
    :param init_state: a vector of shape `[M, S]` the initial state of the epidemic for
                       `M` metapopulations and `S` states
    :param init_step: the initial time step, as an offset to `range(events.shape[-2])`
    :param time_delta: the size of the time step.
    :param hazard_fn: a function that takes a state and returns a matrix of transition
                      rates.
    :param stoichiometry: a `[X, S]` matrix describing the state update for each
                          transition.
    :return: a scalar log probability for the epidemic.
    """
    num_meta = events.shape[-3]
    num_times = events.shape[-2]
    num_states = stoichiometry.shape[-1]
    state_timeseries = compute_state(init_state, events,
                                     stoichiometry)  # MxTxS

    tms_timeseries = tf.transpose(state_timeseries, perm=(1, 0, 2))

    def fn(elems):
        return hazard_fn(*elems)

    tx_coords = transition_coords(stoichiometry)
    rates = tf.vectorized_map(fn=fn,
                              elems=[tf.range(num_times), tms_timeseries])
    rate_matrix = make_transition_matrix(rates, tx_coords,
                                         tms_timeseries.shape)
    probs = approx_expm(rate_matrix * time_delta)

    # [T, M, S, S] to [M, T, S, S]
    probs = tf.transpose(probs, perm=(1, 0, 2, 3))
    event_matrix = make_transition_matrix(events, tx_coords,
                                          [num_meta, num_times, num_states])
    event_matrix = tf.linalg.set_diag(
        event_matrix, state_timeseries - tf.reduce_sum(event_matrix, axis=-1))
    logp = tfd.Multinomial(
        tf.cast(state_timeseries, dtype=tf.float32),
        probs=tf.cast(probs, dtype=tf.float32),
        name="log_prob",
    ).log_prob(tf.cast(event_matrix, dtype=tf.float32))

    return tf.cast(tf.reduce_sum(logp), dtype=events.dtype)
Ejemplo n.º 13
0
    def r_fn(args):
        beta1_, beta2_, beta3_, sigma_, xi_, gamma0_, events_ = args
        t = events_.shape[-2] - 1
        state = compute_state(samples["init_state"], events_,
                              model_spec.STOICHIOMETRY)
        state = tf.gather(state, t, axis=-2)  # State on final inference day

        par = dict(
            beta1=beta1_,
            beta2=beta2_,
            beta3=beta3_,
            sigma=sigma_,
            gamma0=gamma0_,
            xi=xi_,
        )
        ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
        ngm = ngm_fn(t, state)
        return ngm
Ejemplo n.º 14
0
def predicted_incidence(posterior_samples, init_state, covar_data, init_step,
                        num_steps):
    """Runs the simulation forward in time from `init_state` at time `init_time`
       for `num_steps`.
    :param param: a dictionary of model parameters
    :covar_data: a dictionary of model covariate data
    :param init_step: the initial time step
    :param num_steps: the number of steps to simulate
    :returns: a tensor of srt_quhape [B, M, num_steps, X] where X is the number of state
              transitions
    """

    posterior_state = compute_state(
        init_state,
        posterior_samples["seir"],
        model_spec.STOICHIOMETRY,
    )
    posterior_samples["new_init_state"] = posterior_state[..., init_step, :]
    del posterior_samples["seir"]

    @tf.function
    def do_sim():
        def sim_fn(args):
            par = tf.nest.pack_sequence_as(posterior_samples, args)
            init_ = par["new_init_state"]
            del par["new_init_state"]

            model = model_spec.CovidUK(
                covar_data,
                initial_state=init_,
                initial_step=init_step,
                num_steps=num_steps,
            )
            sim = model.sample(**par)
            return sim["seir"]

        return tf.map_fn(
            sim_fn,
            elems=tf.nest.flatten(posterior_samples),
            fn_output_signature=(tf.float64),
        )

    return posterior_samples["new_init_state"], do_sim()
Ejemplo n.º 15
0
        def r_fn(args):
            beta1_, beta2_, beta_3, sigma_, xi_, gamma0_, events_ = args
            t = events_.shape[-2] - 1
            state = compute_state(init_state, events_,
                                  model_spec.STOICHIOMETRY)
            state = tf.gather(state, t,
                              axis=-2)  # State on final inference day

            model = model_spec.CovidUK(
                covariates=covar_data,
                initial_state=init_state,
                initial_step=0,
                num_steps=events_.shape[-2],
                priors=priors,
            )

            xi_pred = model_spec.conditional_gp(
                model.model["xi"](beta1_, sigma_),
                xi_,
                tf.constant([events.shape[-2] + model_spec.XI_FREQ],
                            dtype=model_spec.DTYPE)[:, tf.newaxis],
            )

            # FNC NOTE:
            # adding another 0.0 to beta3 as TF complains of dimension mismatch otherwise
            par = dict(
                beta1=beta1_,
                beta2=beta2_,
                beta3=tf.concat([beta_3, [0.0, 0.0]], axis=-1),
                sigma=sigma_,
                gamma0=gamma0_,
                xi=xi_,  # tf.reshape(xi_pred.sample(), [1]),
            )
            print("xi shape:", par["xi"].shape)
            ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
            ngm = ngm_fn(t, state)
            return ngm
Ejemplo n.º 16
0
def mcmc(data_file, output_file, config, use_autograph=False, use_xla=True):
    """Constructs and runs the MCMC"""

    if tf.test.gpu_device_name():
        print("Using GPU")
    else:
        print("Using CPU")

    data = xarray.open_dataset(data_file, group="constant_data")
    cases = xarray.open_dataset(data_file, group="observations")[
        "cases"
    ].astype(DTYPE)
    dates = cases.coords["time"]

    # Impute censored events, return cases
    # Take the last week of data, and repeat a further 3 times
    # to get a better occult initialisation.
    extra_cases = tf.tile(cases[:, -7:], [1, 3])
    cases = tf.concat([cases, extra_cases], axis=-1)
    events = model_spec.impute_censored_events(cases).numpy()

    # Initial conditions are calculated by calculating the state
    # at the beginning of the inference period
    #
    # Imputed censored events that pre-date the first I-R events
    # in the cases dataset are discarded.  They are only used to
    # to set up a sensible initial state.
    state = compute_state(
        initial_state=tf.concat(
            [
                tf.constant(data["N"], DTYPE)[:, tf.newaxis],
                tf.zeros_like(events[:, 0, :]),
            ],
            axis=-1,
        ),
        events=events,
        stoichiometry=model_spec.STOICHIOMETRY,
    )
    start_time = state.shape[1] - cases.shape[1]
    initial_state = state[:, start_time, :]
    events = events[:, start_time:-21, :]  # Clip off the "extra" events

    ########################################################
    # Construct the MCMC kernels #
    ########################################################
    model = model_spec.CovidUK(
        covariates=data,
        initial_state=initial_state,
        initial_step=0,
        num_steps=events.shape[1],
    )

    param_bij = tfb.Invert(  # Forward transform unconstrains params
        tfb.Blockwise(
            [
                tfb.Softplus(low=dtype_util.eps(DTYPE)),
                tfb.Identity(),
                tfb.Identity(),
            ],
            block_sizes=[1, 3, events.shape[1]],
        )
    )

    def joint_log_prob(unconstrained_params, events):
        params = param_bij.inverse(unconstrained_params)
        return model.log_prob(
            dict(
                psi=params[0],
                beta_area=params[1],
                gamma0=params[2],
                gamma1=params[3],
                alpha_0=params[4],
                alpha_t=params[5:],
                seir=events,
            )
        ) + param_bij.inverse_log_det_jacobian(
            unconstrained_params, event_ndims=1
        )

    # MCMC tracing functions
    ###############################
    # Construct bursted MCMC loop #
    ###############################
    current_chain_state = [
        tf.concat(
            [
                np.array([0.1, 0.0, 0.0, 0.0], dtype=DTYPE),
                np.full(events.shape[1], -1.75, dtype=DTYPE,),
            ],
            axis=0,
        ),
        events,
    ]
    print("Num time steps:", events.shape[1], flush=True)
    print("alpha_t shape", model.event_shape["alpha_t"], flush=True)
    print("Initial chain state:", current_chain_state[0], flush=True)
    print("Initial logpi:", joint_log_prob(*current_chain_state), flush=True)

    # Output file
    posterior = run_mcmc(
        joint_log_prob_fn=joint_log_prob,
        current_state=current_chain_state,
        param_bijector=param_bij,
        initial_conditions=initial_state,
        config=config,
        output_file=output_file,
    )
    posterior._file.create_dataset("initial_state", data=initial_state)
    posterior._file.create_dataset(
        "time", data=np.array(dates).astype(str).astype(h5py.string_dtype()),
    )

    print(f"Acceptance theta: {posterior['results/hmc/is_accepted'][:].mean()}")
    print(
        f"Acceptance move S->E: {posterior['results/move/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance move E->I: {posterior['results/move/E->I/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult S->E: {posterior['results/occult/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult E->I: {posterior['results/occult/E->I/is_accepted'][:].mean()}"
    )

    del posterior
Ejemplo n.º 17
0
    )

    # Pre-determined thinning of posterior (better done in MCMC?)
    idx = range(6000, 10000, 10)
    param = dict(
        beta1=posterior["samples/beta1"][idx],
        beta2=posterior["samples/beta2"][idx],
        beta3=posterior["samples/beta3"][idx, ],
        sigma=posterior["samples/sigma"][idx, ],
        xi=posterior["samples/xi"][idx],
        gamma0=posterior["samples/gamma0"][idx],
        gamma1=posterior["samples/gamma1"][idx],
    )
    events = posterior["samples/events"][idx]
    init_state = posterior["initial_state"][:]
    state_timeseries = compute_state(init_state, events,
                                     model_spec.STOICHIOMETRY)

    # Build model
    model = model_spec.CovidUK(
        covar_data,
        initial_state=init_state,
        initial_step=0,
        num_steps=events.shape[1],
        priors=config["mcmc"]["prior"],
    )

    ngms = calc_R_it(param, events, init_state, covar_data,
                     config["mcmc"]["prior"])
    b, _ = power_iteration(ngms)
    rt = rayleigh_quotient(ngms, b)
    q = np.arange(0.05, 1.0, 0.05)
Ejemplo n.º 18
0
def mcmc(data_file, output_file, config, use_autograph=False, use_xla=True):
    """Constructs and runs the MCMC"""

    if tf.test.gpu_device_name():
        print("Using GPU")
    else:
        print("Using CPU")

    data = xarray.open_dataset(data_file, group="constant_data")
    cases = xarray.open_dataset(data_file, group="observations")["cases"]

    # We load in cases and impute missing infections first, since this sets the
    # time epoch which we are analysing.
    # Impute censored events, return cases
    print("Data shape:", cases.shape)
    events = model_spec.impute_censored_events(cases.astype(DTYPE))

    # Initial conditions are calculated by calculating the state
    # at the beginning of the inference period
    #
    # Imputed censored events that pre-date the first I-R events
    # in the cases dataset are discarded.  They are only used to
    # to set up a sensible initial state.
    state = compute_state(
        initial_state=tf.concat(
            [
                tf.constant(data["N"], DTYPE)[:, tf.newaxis],
                tf.zeros_like(events[:, 0, :]),
            ],
            axis=-1,
        ),
        events=events,
        stoichiometry=model_spec.STOICHIOMETRY,
    )
    start_time = state.shape[1] - cases.shape[1]
    initial_state = state[:, start_time, :]
    events = events[:, start_time:, :]

    ########################################################
    # Construct the MCMC kernels #
    ########################################################
    model = model_spec.CovidUK(
        covariates=data,
        initial_state=initial_state,
        initial_step=0,
        num_steps=events.shape[1],
    )

    def joint_log_prob(block0, block1, events):
        return model.log_prob(
            dict(
                beta2=block0[0],
                gamma0=block0[1],
                gamma1=block0[2],
                sigma=block0[3],
                beta1=block1[0],
                xi=block1[1:],
                seir=events,
            ))

    # Build Metropolis within Gibbs sampler
    def make_blk0_kernel(shape, name):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.TransformedTransitionKernel(
                inner_kernel=AdaptiveRandomWalkMetropolis(
                    target_log_prob_fn=target_log_prob_fn,
                    initial_covariance=np.eye(shape[0], dtype=model_spec.DTYPE)
                    * 1e-1,
                    covariance_burnin=200,
                ),
                bijector=tfp.bijectors.Blockwise(
                    bijectors=[
                        tfp.bijectors.Exp(),
                        tfp.bijectors.Identity(),
                        tfp.bijectors.Exp(),
                        # tfp.bijectors.Identity(),
                    ],
                    block_sizes=[1, 2, 1],  # , 5],
                ),
                name=name,
            )

        return fn

    def make_blk1_kernel(shape, name):
        def fn(target_log_prob_fn, _):
            return AdaptiveRandomWalkMetropolis(
                target_log_prob_fn=target_log_prob_fn,
                initial_covariance=np.eye(shape[0], dtype=model_spec.DTYPE) *
                1e-1,
                covariance_burnin=200,
                name=name,
            )

        return fn

    def make_partially_observed_step(target_event_id,
                                     prev_event_id=None,
                                     next_event_id=None,
                                     name=None):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.MetropolisHastings(
                inner_kernel=UncalibratedEventTimesUpdate(
                    target_log_prob_fn=target_log_prob_fn,
                    target_event_id=target_event_id,
                    prev_event_id=prev_event_id,
                    next_event_id=next_event_id,
                    initial_state=initial_state,
                    dmax=config["dmax"],
                    mmax=config["m"],
                    nmax=config["nmax"],
                ),
                name=name,
            )

        return fn

    def make_occults_step(prev_event_id, target_event_id, next_event_id, name):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.MetropolisHastings(
                inner_kernel=UncalibratedOccultUpdate(
                    target_log_prob_fn=target_log_prob_fn,
                    topology=TransitionTopology(prev_event_id, target_event_id,
                                                next_event_id),
                    cumulative_event_offset=initial_state,
                    nmax=config["occult_nmax"],
                    t_range=(events.shape[1] - 21, events.shape[1]),
                    name=name,
                ),
                name=name,
            )

        return fn

    def make_event_multiscan_kernel(target_log_prob_fn, _):
        return MultiScanKernel(
            config["num_event_time_updates"],
            GibbsKernel(
                target_log_prob_fn=target_log_prob_fn,
                kernel_list=[
                    (0, make_partially_observed_step(0, None, 1, "se_events")),
                    (0, make_partially_observed_step(1, 0, 2, "ei_events")),
                    (0, make_occults_step(None, 0, 1, "se_occults")),
                    (0, make_occults_step(0, 1, 2, "ei_occults")),
                ],
                name="gibbs1",
            ),
        )

    # MCMC tracing functions
    def trace_results_fn(_, results):
        """Packs results into a dictionary"""
        results_dict = {}
        res0 = results.inner_results

        results_dict["block0"] = {
            "is_accepted":
            res0[0].inner_results.is_accepted,
            "target_log_prob":
            res0[0].inner_results.accepted_results.target_log_prob,
        }
        results_dict["block1"] = {
            "is_accepted": res0[1].is_accepted,
            "target_log_prob": res0[1].accepted_results.target_log_prob,
        }

        def get_move_results(results):
            return {
                "is_accepted":
                results.is_accepted,
                "target_log_prob":
                results.accepted_results.target_log_prob,
                "proposed_delta":
                tf.stack([
                    results.accepted_results.m,
                    results.accepted_results.t,
                    results.accepted_results.delta_t,
                    results.accepted_results.x_star,
                ]),
            }

        res1 = res0[2].inner_results
        results_dict["move/S->E"] = get_move_results(res1[0])
        results_dict["move/E->I"] = get_move_results(res1[1])
        results_dict["occult/S->E"] = get_move_results(res1[2])
        results_dict["occult/E->I"] = get_move_results(res1[3])

        return results_dict

    # Build MCMC algorithm here.  This will be run in bursts for memory economy
    @tf.function(autograph=use_autograph, experimental_compile=use_xla)
    def sample(n_samples, init_state, thin=0, previous_results=None):
        with tf.name_scope("main_mcmc_sample_loop"):

            init_state = init_state.copy()

            gibbs_schema = GibbsKernel(
                target_log_prob_fn=joint_log_prob,
                kernel_list=[
                    (0, make_blk0_kernel(init_state[0].shape, "block0")),
                    (1, make_blk1_kernel(init_state[1].shape, "block1")),
                    (2, make_event_multiscan_kernel),
                ],
                name="gibbs0",
            )

            samples, results, final_results = tfp.mcmc.sample_chain(
                n_samples,
                init_state,
                kernel=gibbs_schema,
                num_steps_between_results=thin,
                previous_kernel_results=previous_results,
                return_final_kernel_results=True,
                trace_fn=trace_results_fn,
            )

            return samples, results, final_results

    ###############################
    # Construct bursted MCMC loop #
    ###############################
    NUM_BURSTS = int(config["num_bursts"])
    NUM_BURST_SAMPLES = int(config["num_burst_samples"])
    NUM_SAVED_SAMPLES = NUM_BURST_SAMPLES * NUM_BURSTS

    # RNG stuff
    tf.random.set_seed(2)

    current_state = [
        tf.constant([0.6, 0.0, 0.0, 0.1],
                    dtype=DTYPE),  # , 0.0, 0.0, 0.0, 0.0, 0.0], dtype=DTYPE),
        tf.zeros(
            model.model["xi"](0.0, 0.1).event_shape[-1] + 1,
            dtype=DTYPE,
        ),
        events,
    ]
    print("Initial logpi:", joint_log_prob(*current_state))

    # Output file
    samples, results, _ = sample(1, current_state)
    posterior = Posterior(
        output_file,
        sample_dict={
            "beta2": samples[0][:, 0],
            "gamma0": samples[0][:, 1],
            "gamma1": samples[0][:, 2],
            "sigma": samples[0][:, 3],
            "beta1": samples[1][:, 0],
            "xi": samples[1][:, 1:],
            "seir": samples[2],
        },
        results_dict=results,
        num_samples=NUM_SAVED_SAMPLES,
    )
    posterior._file.create_dataset("initial_state", data=initial_state)

    # We loop over successive calls to sample because we have to dump results
    #   to disc, or else end OOM (even on a 32GB system).
    # with tf.profiler.experimental.Profile("/tmp/tf_logdir"):
    final_results = None
    for i in tqdm.tqdm(range(NUM_BURSTS),
                       unit_scale=NUM_BURST_SAMPLES * config["thin"]):
        samples, results, final_results = sample(
            NUM_BURST_SAMPLES,
            init_state=current_state,
            thin=config["thin"] - 1,
            previous_results=final_results,
        )
        current_state = [s[-1] for s in samples]
        print(current_state[0].numpy(), flush=True)

        start = perf_counter()
        posterior.write_samples(
            {
                "beta2": samples[0][:, 0],
                "gamma0": samples[0][:, 1],
                "gamma1": samples[0][:, 2],
                "sigma": samples[0][:, 3],
                "beta1": samples[1][:, 0],
                "xi": samples[1][:, 1:],
                "seir": samples[2],
            },
            first_dim_offset=i * NUM_BURST_SAMPLES,
        )
        posterior.write_results(results,
                                first_dim_offset=i * NUM_BURST_SAMPLES)
        end = perf_counter()

        print("Storage time:", end - start, "seconds")
        for k, v in results.items():
            print(
                f"Acceptance {k}:",
                tf.reduce_mean(tf.cast(v["is_accepted"], tf.float32)),
            )

    print(
        f"Acceptance theta: {posterior['results/block0/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance xi: {posterior['results/block1/is_accepted'][:].mean()}")
    print(
        f"Acceptance move S->E: {posterior['results/move/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance move E->I: {posterior['results/move/E->I/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult S->E: {posterior['results/occult/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult E->I: {posterior['results/occult/E->I/is_accepted'][:].mean()}"
    )

    del posterior
Ejemplo n.º 19
0
def runInference(pipelineData):

    # Read in settings
    config = pipelineData['config']

    # Extract data
    if 'covar_data' in pipelineData:
        covar_data = pipelineData['covar_data']
    else:
        covar_data, data = GetData.CovarData(config)
        pipelineData['covar_data'] = covar_data
        pipelineData['data'] = data
    # inference_period = config['dates']['inference_period']
    # date_low = config['dates']['low']
    # date_high = config['dates']['high']
    # weekday = config['dates']['weekday']

    # We load in cases and impute missing infections first, since this sets the
    # time epoch which we are analysing.
    cases = pipelineData['data']['cases_wide']

    # Impute censored events, return cases
    events = model_spec.impute_censored_events(cases)

    # Initial conditions are calculated by calculating the state
    # at the beginning of the inference period
    #
    # Imputed censored events that pre-date the first I-R events
    # in the cases dataset are discarded.  They are only used to
    # to set up a sensible initial state.
    _initial_state = tf.concat(
        [covar_data["N"], tf.zeros_like(events[:, 0, :])], axis=-1)
    state = compute_state(
        initial_state=_initial_state,
        events=events,
        stoichiometry=model_spec.STOICHIOMETRY,
    )
    start_time = state.shape[1] - cases.shape[1]
    initial_state = state[:, start_time, :]
    events = events[:, start_time:, :]
    num_metapop = covar_data["N"].shape[0]

    ########################################################
    # Build the model, and then construct the MCMC kernels #
    ########################################################
    def convert_priors(node):
        if isinstance(node, dict):
            for k, v in node.items():
                node[k] = convert_priors(v)
            return node
        return float(node)

    model = model_spec.CovidUK(
        covariates=covar_data,
        initial_state=initial_state,
        initial_step=0,
        num_steps=events.shape[1],
        priors=convert_priors(config["mcmc"]["prior"]),
    )

    # Full joint log posterior distribution
    # $\pi(\theta, \xi, y^{se}, y^{ei} | y^{ir})$
    # FNC NOTE:
    # adding another 0.0 to beta3 as TF complains of dimension mismatch otherwise
    def logp(block0, block1, events):
        return model.log_prob(
            dict(
                beta2=block0[0],
                gamma0=block0[1],
                gamma1=block0[2],
                sigma=block0[3],
                beta3=tf.concat([block0[4:6], [0.0, 0.0]], axis=-1),
                beta1=block1[0],
                xi=block1[1:],
                seir=events,
            ))

    # Build Metropolis within Gibbs sampler
    #
    # Kernels are:
    #     Q(\theta, \theta^\prime)
    #     Q(\xi, \xi^\prime)
    #     Q(Z^{se}, Z^{se\prime}) (partially-censored)
    #     Q(Z^{ei}, Z^{ei\prime}) (partially-censored)
    #     Q(Z^{se}, Z^{se\prime}) (occult)
    #     Q(Z^{ei}, Z^{ei\prime}) (occult)
    def make_blk0_kernel(shape, name):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.TransformedTransitionKernel(
                inner_kernel=AdaptiveRandomWalkMetropolis(
                    target_log_prob_fn=target_log_prob_fn,
                    initial_covariance=np.eye(shape[0], dtype=model_spec.DTYPE)
                    * 1e-1,
                    covariance_burnin=200,
                ),
                bijector=tfp.bijectors.Blockwise(
                    bijectors=[
                        tfp.bijectors.Exp(),
                        tfp.bijectors.Identity(),
                        tfp.bijectors.Exp(),
                        tfp.bijectors.Identity(),
                    ],
                    block_sizes=[1, 2, 1, 2],
                ),
                name=name,
            )

        return fn

    def make_blk1_kernel(shape, name):
        def fn(target_log_prob_fn, _):
            return AdaptiveRandomWalkMetropolis(
                target_log_prob_fn=target_log_prob_fn,
                initial_covariance=np.eye(shape[0], dtype=model_spec.DTYPE) *
                1e-1,
                covariance_burnin=200,
                name=name,
            )

        return fn

    def make_partially_observed_step(target_event_id,
                                     prev_event_id=None,
                                     next_event_id=None,
                                     name=None):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.MetropolisHastings(
                inner_kernel=UncalibratedEventTimesUpdate(
                    target_log_prob_fn=target_log_prob_fn,
                    target_event_id=target_event_id,
                    prev_event_id=prev_event_id,
                    next_event_id=next_event_id,
                    initial_state=initial_state,
                    dmax=config["mcmc"]["dmax"],
                    mmax=config["mcmc"]["m"],
                    nmax=config["mcmc"]["nmax"],
                ),
                name=name,
            )

        return fn

    def make_occults_step(prev_event_id, target_event_id, next_event_id, name):
        def fn(target_log_prob_fn, _):
            return tfp.mcmc.MetropolisHastings(
                inner_kernel=UncalibratedOccultUpdate(
                    target_log_prob_fn=target_log_prob_fn,
                    topology=TransitionTopology(prev_event_id, target_event_id,
                                                next_event_id),
                    cumulative_event_offset=initial_state,
                    nmax=config["mcmc"]["occult_nmax"],
                    t_range=(events.shape[1] - 21, events.shape[1]),
                    name=name,
                ),
                name=name,
            )

        return fn

    def make_event_multiscan_kernel(target_log_prob_fn, _):
        return MultiScanKernel(
            config["mcmc"]["num_event_time_updates"],
            GibbsKernel(
                target_log_prob_fn=target_log_prob_fn,
                kernel_list=[
                    (0, make_partially_observed_step(0, None, 1, "se_events")),
                    (0, make_partially_observed_step(1, 0, 2, "ei_events")),
                    (0, make_occults_step(None, 0, 1, "se_occults")),
                    (0, make_occults_step(0, 1, 2, "ei_occults")),
                ],
                name="gibbs1",
            ),
        )

    # MCMC tracing functions
    def trace_results_fn(_, results):
        """Packs results into a dictionary"""
        results_dict = {}
        res0 = results.inner_results

        results_dict["block0"] = {
            "is_accepted":
            res0[0].inner_results.is_accepted,
            "target_log_prob":
            res0[0].inner_results.accepted_results.target_log_prob,
        }
        results_dict["block1"] = {
            "is_accepted": res0[1].is_accepted,
            "target_log_prob": res0[1].accepted_results.target_log_prob,
        }

        def get_move_results(results):
            return {
                "is_accepted":
                results.is_accepted,
                "target_log_prob":
                results.accepted_results.target_log_prob,
                "proposed_delta":
                tf.stack([
                    results.accepted_results.m,
                    results.accepted_results.t,
                    results.accepted_results.delta_t,
                    results.accepted_results.x_star,
                ]),
            }

        res1 = res0[2].inner_results
        results_dict["move/S->E"] = get_move_results(res1[0])
        results_dict["move/E->I"] = get_move_results(res1[1])
        results_dict["occult/S->E"] = get_move_results(res1[2])
        results_dict["occult/E->I"] = get_move_results(res1[3])

        return results_dict

    # Build MCMC algorithm here.  This will be run in bursts for memory economy
    @tf.function(autograph=False, experimental_compile=True)
    def sample(n_samples, init_state, thin=0, previous_results=None):
        with tf.name_scope("main_mcmc_sample_loop"):

            init_state = init_state.copy()

            gibbs_schema = GibbsKernel(
                target_log_prob_fn=logp,
                kernel_list=[
                    (0, make_blk0_kernel(init_state[0].shape, "block0")),
                    (1, make_blk1_kernel(init_state[1].shape, "block1")),
                    (2, make_event_multiscan_kernel),
                ],
                name="gibbs0",
            )

            samples, results, final_results = tfp.mcmc.sample_chain(
                n_samples,
                init_state,
                kernel=gibbs_schema,
                num_steps_between_results=thin,
                previous_kernel_results=previous_results,
                return_final_kernel_results=True,
                trace_fn=trace_results_fn,
            )

            return samples, results, final_results

    ####################################
    # Construct bursted MCMC loop here #
    ####################################

    # MCMC Control
    NUM_BURSTS = int(config["mcmc"]["num_bursts"])
    NUM_BURST_SAMPLES = int(config["mcmc"]["num_burst_samples"])
    NUM_EVENT_TIME_UPDATES = int(config["mcmc"]["num_event_time_updates"])
    NUM_SAVED_SAMPLES = NUM_BURST_SAMPLES * NUM_BURSTS

    # RNG stuff
    tf.random.set_seed(2)

    current_state = [
        np.array([0.2, 0.0, 0.0, 0.1, 0.0, 0.0], dtype=DTYPE),
        np.zeros(
            model.model["xi"](0.0, 0.1).event_shape[-1]
            # + model.model["beta3"]().event_shape[-1]
            + 1,
            dtype=DTYPE,
        ),
        events,
    ]
    print("Initial logpi:", logp(*current_state))

    # Output file
    samples, results, _ = sample(1, current_state)
    print('Storing posterior data at', config["PosteriorData"]["address"])
    posterior = Posterior(
        config["PosteriorData"]["address"],
        sample_dict={
            "beta2": (samples[0][:, 0], (NUM_BURST_SAMPLES, )),
            "gamma0": (samples[0][:, 1], (NUM_BURST_SAMPLES, )),
            "gamma1": (samples[0][:, 2], (NUM_BURST_SAMPLES, )),
            "sigma": (samples[0][:, 3], (NUM_BURST_SAMPLES, )),
            "beta3": (samples[0][:, 4:], (NUM_BURST_SAMPLES, 2)),
            "beta1": (samples[1][:, 0], (NUM_BURST_SAMPLES, )),
            "xi": (
                samples[1][:, 1:],
                (NUM_BURST_SAMPLES, samples[1].shape[1] - 1),
            ),
            "events": (samples[2], (NUM_BURST_SAMPLES, 43, 43,
                                    1)),  # Change so it adapts size correctly
        },
        results_dict=results,
        num_samples=NUM_SAVED_SAMPLES,
    )
    posterior._file.create_dataset("initial_state", data=initial_state)
    posterior._file.create_dataset("config", data=yaml.dump(config))

    # We loop over successive calls to sample because we have to dump results
    #   to disc, or else end OOM (even on a 32GB system).
    # with tf.profiler.experimental.Profile("/tmp/tf_logdir"):
    final_results = None
    for i in tqdm.tqdm(range(NUM_BURSTS),
                       unit_scale=NUM_BURST_SAMPLES * config["mcmc"]["thin"]):
        samples, results, final_results = sample(
            NUM_BURST_SAMPLES,
            init_state=current_state,
            thin=config["mcmc"]["thin"] - 1,
            previous_results=final_results,
        )
        current_state = [s[-1] for s in samples]
        print(current_state[0].numpy(), flush=True)

        start = perf_counter()
        posterior.write_samples(
            {
                "beta2": samples[0][:, 0],
                "gamma0": samples[0][:, 1],
                "gamma1": samples[0][:, 2],
                "sigma": samples[0][:, 3],
                "beta3": samples[0][:, 4:],
                "beta1": samples[1][:, 0],
                "xi": samples[1][:, 1:],
                "events": samples[2],
            },
            first_dim_offset=i * NUM_BURST_SAMPLES,
        )
        posterior.write_results(results,
                                first_dim_offset=i * NUM_BURST_SAMPLES)
        end = perf_counter()

        print("Storage time:", end - start, "seconds")
        print(
            "Acceptance theta:",
            tf.reduce_mean(
                tf.cast(results["block0"]["is_accepted"], tf.float32)),
        )
        print(
            "Acceptance xi:",
            tf.reduce_mean(
                tf.cast(results["block1"]["is_accepted"], tf.float32), ),
        )
        print(
            "Acceptance move S->E:",
            tf.reduce_mean(
                tf.cast(results["move/S->E"]["is_accepted"], tf.float32)),
        )
        print(
            "Acceptance move E->I:",
            tf.reduce_mean(
                tf.cast(results["move/E->I"]["is_accepted"], tf.float32)),
        )
        print(
            "Acceptance occult S->E:",
            tf.reduce_mean(
                tf.cast(results["occult/S->E"]["is_accepted"], tf.float32)),
        )
        print(
            "Acceptance occult E->I:",
            tf.reduce_mean(
                tf.cast(results["occult/E->I"]["is_accepted"], tf.float32)),
        )

    print(
        f"Acceptance theta: {posterior['results/block0/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance xi: {posterior['results/block1/is_accepted'][:].mean()}")
    print(
        f"Acceptance move S->E: {posterior['results/move/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance move E->I: {posterior['results/move/E->I/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult S->E: {posterior['results/occult/S->E/is_accepted'][:].mean()}"
    )
    print(
        f"Acceptance occult E->I: {posterior['results/occult/E->I/is_accepted'][:].mean()}"
    )

    del posterior
Ejemplo n.º 20
0
def runSummary(pipelineData):
    # Pipeline data should contain config at a minimium
    config = pipelineData['config']
    settings = config['SummaryData']

    if settings['input'] == 'processed':
        summaryData = GetData.SummaryData.process(config)
        pipelineData['summary'] = summaryData
        return pipelineData

    # as we're running in a function, we need to assign covar_data before defining
    # functions that call it in order for it to be in scope
    # previously, covar_dict was defined in the __name__ == 'main' portion of this script
    # moving to a pipeline necessitates this change.
    # grab all data from dicts
    # inference_period = config['dates']['inference_period']
    # date_low = config['dates']['low']
    # date_high = config['dates']['high']
    # weekday = config['dates']['weekday']

    if 'covar_data' in pipelineData:
        covar_data = pipelineData['covar_data']
    else:
        covar_data, tmp = GetData.CovarData(config)

    # Reproduction number calculation
    def calc_R_it(param, events, init_state, covar_data, priors):
        """Calculates effective reproduction number for batches of metapopulations
        :param theta: a tensor of batched theta parameters [B] + theta.shape
        :param xi: a tensor of batched xi parameters [B] + xi.shape
        :param events: a [B, M, T, X] batched events tensor
        :param init_state: the initial state of the epidemic at earliest inference date
        :param covar_data: the covariate data
        :return a batched vector of R_it estimates
        """
        def r_fn(args):
            beta1_, beta2_, beta_3, sigma_, xi_, gamma0_, events_ = args
            t = events_.shape[-2] - 1
            state = compute_state(init_state, events_,
                                  model_spec.STOICHIOMETRY)
            state = tf.gather(state, t,
                              axis=-2)  # State on final inference day

            model = model_spec.CovidUK(
                covariates=covar_data,
                initial_state=init_state,
                initial_step=0,
                num_steps=events_.shape[-2],
                priors=priors,
            )

            xi_pred = model_spec.conditional_gp(
                model.model["xi"](beta1_, sigma_),
                xi_,
                tf.constant([events.shape[-2] + model_spec.XI_FREQ],
                            dtype=model_spec.DTYPE)[:, tf.newaxis],
            )

            # FNC NOTE:
            # adding another 0.0 to beta3 as TF complains of dimension mismatch otherwise
            par = dict(
                beta1=beta1_,
                beta2=beta2_,
                beta3=tf.concat([beta_3, [0.0, 0.0]], axis=-1),
                sigma=sigma_,
                gamma0=gamma0_,
                xi=xi_,  # tf.reshape(xi_pred.sample(), [1]),
            )
            print("xi shape:", par["xi"].shape)
            ngm_fn = model_spec.next_generation_matrix_fn(covar_data, par)
            ngm = ngm_fn(t, state)
            return ngm

        return tf.vectorized_map(
            r_fn,
            elems=(
                param["beta1"],
                param["beta2"],
                param["beta3"],
                param["sigma"],
                param["xi"],
                param["gamma0"],
                events,
            ),
        )

    @tf.function
    def predicted_incidence(param, init_state, init_step, num_steps, priors):
        """Runs the simulation forward in time from `init_state` at time `init_time`
        for `num_steps`.
        :param theta: a tensor of batched theta parameters [B] + theta.shape
        :param xi: a tensor of batched xi parameters [B] + xi.shape
        :param events: a [B, M, S] batched state tensor
        :param init_step: the initial time step
        :param num_steps: the number of steps to simulate
        :param priors: the priors for gamma
        :returns: a tensor of srt_quhape [B, M, num_steps, X] where X is the number of state
                transitions
        """
        def sim_fn(args):
            beta1_, beta2_, beta3_, sigma_, xi_, gamma0_, gamma1_, init_ = args

            # FNC NOTE:
            # adding another 0.0 to beta3 as TF complains of dimension mismatch otherwise
            par = dict(
                beta1=beta1_,
                beta2=beta2_,
                beta3=tf.concat([beta3_, [0.0, 0.0]], axis=-1),
                gamma0=gamma0_,
                gamma1=gamma1_,
                xi=xi_,
            )

            model = model_spec.CovidUK(
                covar_data,
                initial_state=init_,
                initial_step=init_step,
                num_steps=num_steps,
                priors=priors,
            )
            sim = model.sample(**par)
            return sim["seir"]

        events = tf.map_fn(
            sim_fn,
            elems=(
                param["beta1"],
                param["beta2"],
                param["beta3"],
                param["sigma"],
                param["xi"],
                param["gamma0"],
                param["gamma1"],
                init_state,
            ),
            fn_output_signature=(tf.float64),
        )
        return events

    # Today's prevalence
    def prevalence(predicted_state, population_size, name=None):
        """Computes prevalence of E and I individuals

        :param state: the state at a particular timepoint [batch, M, S]
        :param population_size: the size of the population
        :returns: a dict of mean and 95% credibility intervals for prevalence
                in units of infections per person
        """
        prev = tf.reduce_sum(predicted_state[:, :, 1:3],
                             axis=-1) / tf.squeeze(population_size)
        return mean_and_ci(prev, name=name)

    def predicted_events(events, name=None):
        num_events = tf.reduce_sum(events, axis=-1)
        return mean_and_ci(num_events, name=name)

    # Load posterior file
    posterior_path = config['PosteriorData']['address']
    print("Using posterior:", posterior_path)
    posterior = h5py.File(
        os.path.expandvars(posterior_path, ),
        "r",
        rdcc_nbytes=1024**3,
        rdcc_nslots=1e6,
    )

    # Pre-determined thinning of posterior (better done in MCMC?)
    if posterior["samples/beta1"].size >= 10000:
        idx = range(6000, 10000, 10)
    else:
        print('Using smaller MCMC sample range')
        print('Size of posterior["samples/beta1"] is',
              posterior["samples/beta1"].size)
        idx = range(600, 1000, 10)
    param = dict(
        beta1=posterior["samples/beta1"][idx],
        beta2=posterior["samples/beta2"][idx],
        beta3=posterior["samples/beta3"][idx, ],
        sigma=posterior["samples/sigma"][idx, ],
        xi=posterior["samples/xi"][idx],
        gamma0=posterior["samples/gamma0"][idx],
        gamma1=posterior["samples/gamma1"][idx],
    )
    events = posterior["samples/events"][idx]
    init_state = posterior["initial_state"][:]
    state_timeseries = compute_state(init_state, events,
                                     model_spec.STOICHIOMETRY)

    # Build model
    model = model_spec.CovidUK(
        covar_data,
        initial_state=init_state,
        initial_step=0,
        num_steps=events.shape[1],
        priors=config["mcmc"]["prior"],
    )

    ngms = calc_R_it(param, events, init_state, covar_data,
                     config["mcmc"]["prior"])
    b, _ = power_iteration(ngms)
    rt = rayleigh_quotient(ngms, b)
    q = np.arange(0.05, 1.0, 0.05)

    # FNC Note: removed dict from this and
    # instead added Rt as a sheet name in the excel writer
    rt_quantiles = pd.DataFrame(np.quantile(rt, q, axis=-1), index=q).T
    rt_quantiles.to_excel(config['RtQuantileData']['address'], sheet_name='Rt')

    # Prediction requires simulation from the last available timepoint for 28 + 4 + 1 days
    # Note a 4 day recording lag in the case timeseries data requires that
    # now = state_timeseries.shape[-2] + 4
    prediction = predicted_incidence(
        param,
        init_state=state_timeseries[..., -1, :],
        init_step=state_timeseries.shape[-2] - 1,
        num_steps=70,
        priors=config["mcmc"]["prior"],
    )
    predicted_state = compute_state(state_timeseries[..., -1, :], prediction,
                                    model_spec.STOICHIOMETRY)

    # Prevalence now
    prev_now = prevalence(predicted_state[..., 4, :],
                          covar_data["N"],
                          name="prev")

    # Incidence of detections now
    cases_now = predicted_events(prediction[..., 4:5, 2], name="cases")

    # Incidence from now to now+7
    cases_7 = predicted_events(prediction[..., 4:11, 2], name="cases7")
    cases_14 = predicted_events(prediction[..., 4:18, 2], name="cases14")
    cases_21 = predicted_events(prediction[..., 4:25, 2], name="cases21")
    cases_28 = predicted_events(prediction[..., 4:32, 2], name="cases28")
    cases_56 = predicted_events(prediction[..., 4:60, 2], name="cases56")

    # Prevalence at day 7
    prev_7 = prevalence(predicted_state[..., 11, :],
                        covar_data["N"],
                        name="prev7")
    prev_14 = prevalence(predicted_state[..., 18, :],
                         covar_data["N"],
                         name="prev14")
    prev_21 = prevalence(predicted_state[..., 25, :],
                         covar_data["N"],
                         name="prev21")
    prev_28 = prevalence(predicted_state[..., 32, :],
                         covar_data["N"],
                         name="prev28")
    prev_56 = prevalence(predicted_state[..., 60, :],
                         covar_data["N"],
                         name="prev56")

    # Package up summary data
    # this will be saved into a pickle
    # Add LADs in for later reference
    summaryData = {
        'cases': {
            'now': cases_now,
            '7': cases_7,
            '14': cases_14,
            '21': cases_21,
            '28': cases_28,
            '56': cases_56
        },
        'prev': {
            'now': prev_now,
            '7': prev_7,
            '14': prev_14,
            '21': prev_21,
            '28': prev_28,
            '56': prev_56
        },
        'metrics': {
            'ngms': ngms,
            'b': b,
            'rt': rt,
            'q': q
        },
        'LADs': config['lad19cds']
    }

    # Save and pass on the output data
    if config['GenerateOutput']['summary']:
        settings = config['SummaryData']
        if settings['format'] == 'pickle':
            fn = settings['address']
            with open(fn, 'wb') as file:
                pickle.dump(summaryData, file)
    pipelineData['summary'] = summaryData
    return pipelineData
    def _log_prob(self, y, **kwargs):
        """Calculates the log probability of observing epidemic events y
        :param y: a list of tensors.  The first is of shape [n_times] containing times,
                  the second is of shape [n_times, n_states, n_states] containing event
                  matrices.
        :param param: a list of parameters
        :returns: a scalar giving the log probability of the epidemic
        """
        dtype = dtype_util.common_dtype([y, self.initial_state],
                                        dtype_hint=self.dtype)
        events = tf.convert_to_tensor(y, dtype)
        with tf.name_scope("StateTransitionMarginalModel.log_prob"):

            state_timeseries = compute_state(
                initial_state=self.initial_state,
                events=events,
                stoichiometry=self.stoichiometry,
                closed=True,
            )

            tms_timeseries = tf.transpose(state_timeseries, perm=(1, 0, 2))
            tmr_events = tf.transpose(events, perm=(1, 0, 2))

            def fn(elems):
                return tf.stack(self.transition_rates(*elems), axis=-1)

            rates = tf.vectorized_map(
                fn=fn,
                elems=(
                    self._initial_step + tf.range(tms_timeseries.shape[0]),
                    tms_timeseries,
                ),
            )

            def integrated_rate_fn():
                """Use mid-point integration to estimate the constant rate
                over time.
                """
                integrated_rates = tms_timeseries[..., :-1] * rates
                return (integrated_rates[:-1, ...] +
                        integrated_rates[1:, ...]) / 2.0

            integrated_rates = integrated_rate_fn()

            log_norm_constant = tf.reduce_sum(
                tf.math.multiply_no_nan(tf.math.log(integrated_rates),
                                        tmr_events) -
                tf.math.lgamma(tmr_events + 1.0),
                axis=(0, 1),
            )
            pi_concentration = (
                tf.reduce_sum(tmr_events, axis=(0, 1)) +
                self.baseline_hazard_rate_priors["concentration"])
            pi_rate = (tf.reduce_sum(integrated_rates * self.time_delta,
                                     axis=(0, 1)) +
                       self.baseline_hazard_rate_priors["rate"])

            log_prob = (log_norm_constant + tf.math.lgamma(pi_concentration) -
                        (pi_concentration) * tf.math.log(pi_rate))

            return tf.reduce_sum(log_prob)
Ejemplo n.º 22
0
    )

    # Pre-determined thinning of posterior (better done in MCMC?)
    idx = range(6000, 10000, 10)
    param = dict(
        beta1=posterior["samples/beta1"][idx],
        beta2=posterior["samples/beta2"][idx],
        beta3=posterior["samples/beta3"][idx],
        sigma=posterior["samples/sigma"][idx, ],
        xi=posterior["samples/xi"][idx],
        gamma0=posterior["samples/gamma0"][idx],
        gamma1=posterior["samples/gamma1"][idx],
    )
    events = posterior["samples/events"][idx]
    init_state = posterior["initial_state"][:]
    state_timeseries = compute_state(init_state, events,
                                     model_spec.STOICHIOMETRY)

    # Prediction requires simulation from 2 weeks ago
    prediction = predicted_incidence(
        param,
        init_state=state_timeseries[..., -14, :],
        init_step=state_timeseries.shape[-2] - 14,
        num_steps=56,
        priors=config["mcmc"]["prior"],
    )

    # prediction quantiles
    q_obs7 = quantile_observed(events[0, :, -7:, 2], prediction[..., 7:14, 2])
    q_obs14 = quantile_observed(events[0, :, -14:, 2], prediction[..., :14, 2])

    geo["Pr(pred<obs)_7"] = q_obs7.numpy()