Exemple #1
0
def test_find_MAP_discrete():
    tol = 2.0**-11
    alpha = 4
    beta = 4
    n = 20
    yes = 15

    with Model() as model:
        p = Beta('p', alpha, beta, transform=None)
        ss = Binomial('ss', n=n, p=p, transform=None)
        s = Binomial('s', n=n, p=p, observed=yes)

        map_est1 = starting.find_MAP()
        map_est2 = starting.find_MAP(vars=model.vars)

    close_to(map_est1['p'], 0.6086956533498806, tol)

    close_to(map_est2['p'], 0.695642178810167, tol)
    assert map_est2['ss'] == 14
Exemple #2
0
def test_find_MAP_discrete():
    tol = 2.0**-11
    alpha = 4
    beta = 4
    n = 20
    yes = 15

    with Model() as model:
        p = Beta("p", alpha, beta)
        Binomial("ss", n=n, p=p)
        Binomial("s", n=n, p=p, observed=yes)

        map_est1 = starting.find_MAP()
        map_est2 = starting.find_MAP(vars=model.vars)

    close_to(map_est1["p"], 0.6086956533498806, tol)

    close_to(map_est2["p"], 0.695642178810167, tol)
    assert map_est2["ss"] == 14
Exemple #3
0
def hmetad_groupLevel(data: dict, sample_model: bool = True, **kwargs):
    """Compute hierachical meta-d' at the subject level.

    This is an internal function. The group level model must be
    called using :py:func:`metadPy.hierarchical.hmetad`.

    Parameters
    ----------
    data : dict
        Response data.
    sample_model : boolean
        If `False`, only the model is returned without sampling.
    **kwargs : keyword arguments
        All keyword arguments are passed to `func::pymc3.sampling.sample`.

    Returns
    -------
    model : :py:class:`pymc3.Model` instance
        The pymc3 model. Encapsulates the variables and likelihood factors.
    trace : :py:class:`pymc3.backends.base.MultiTrace` or
        :py:class:`arviz.InferenceData`
        A `MultiTrace` or `ArviZ InferenceData` object that contains the
        samples.

    References
    ----------
    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation
    of metacognitive efficiency from confidence ratings, Neuroscience of
    Consciousness, 3(1) nix007, https://doi.org/10.1093/nc/nix007
    """
    nSubj = data["nSubj"]
    hits = data["hits"]
    falsealarms = data["falsealarms"]
    s = data["s"]
    n = data["n"]
    counts = data["counts"]
    nRatings = data["nRatings"]
    Tol = data["Tol"]
    cr = data["cr"]
    m = data["m"]

    with Model() as model:

        # hyperpriors on d, c and c2
        mu_c1 = Normal(
            "mu_c1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        mu_c2 = Normal(
            "mu_c2", mu=0, tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        mu_d1 = Normal(
            "mu_d1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        sigma_c1 = HalfNormal(
            "sigma_c1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_c2 = HalfNormal(
            "sigma_c2", tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        sigma_d1 = HalfNormal(
            "sigma_d1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        # Type 1 priors
        c1_tilde = Normal("c1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        c1 = Deterministic("c1", mu_c1 + sigma_c1 * c1_tilde)

        d1_tilde = Normal("d1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        d1 = Deterministic("d1", mu_d1 + sigma_d1 * d1_tilde)

        # TYPE 1 SDT BINOMIAL MODEL
        h = cumulative_normal(d1 / 2 - c1)
        f = cumulative_normal(-d1 / 2 - c1)
        H = Binomial("H", n=s, p=h, observed=hits)
        FA = Binomial("FA", n=n, p=f, observed=falsealarms)

        # Hyperpriors on mRatio
        mu_logMratio = Normal(
            "mu_logMratio", mu=0, tau=1, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_delta = HalfNormal("sigma_delta", tau=1, shape=(1))

        delta_tilde = Normal("delta_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        delta = Deterministic("delta", sigma_delta * delta_tilde)

        epsilon_logMratio = Beta("epsilon_logMratio", 1, 1, shape=(1))
        logMratio = Deterministic("logMratio", mu_logMratio + epsilon_logMratio * delta)
        mRatio = Deterministic("mRatio", math.exp(logMratio))

        # Type 2 priors
        meta_d = Deterministic("meta_d", mRatio * d1)

        # Specify ordered prior on criteria
        # bounded above and below by Type 1 c1
        cS1_hn = Normal(
            "cS1_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(-1.5, -0.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS1 = Deterministic("cS1", -mu_c2 + (cS1_hn * sigma_c2))

        cS2_hn = Normal(
            "cS2_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(0.5, 1.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS2 = Deterministic("cS2", mu_c2 + (cS2_hn * sigma_c2))

        # Means of SDT distributions
        S2mu = meta_d / 2
        S1mu = -meta_d / 2

        # Calculate normalisation constants
        C_area_rS1 = cumulative_normal(c1 - S1mu)
        I_area_rS1 = cumulative_normal(c1 - S2mu)
        C_area_rS2 = 1 - cumulative_normal(c1 - S2mu)
        I_area_rS2 = 1 - cumulative_normal(c1 - S1mu)

        # Get nC_rS1 probs
        nC_rS1 = cumulative_normal(cS1 - S1mu) / C_area_rS1
        nC_rS1 = Deterministic(
            "nC_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S1mu)
                        / C_area_rS1,
                        nC_rS1[:, 1:] - nC_rS1[:, :-1],
                        (
                            (
                                cumulative_normal(c1 - S1mu)
                                - cumulative_normal(
                                    cS1[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                                )
                            )
                            / C_area_rS1
                        ),
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS2 probs
        nI_rS2 = (1 - cumulative_normal(cS2 - S1mu)) / I_area_rS2
        nI_rS2 = Deterministic(
            "nI_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S1mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S1mu
                                )
                            )
                        )
                        / I_area_rS2,
                        nI_rS2[:, :-1]
                        - (1 - cumulative_normal(cS2[:, 1:] - S1mu)) / I_area_rS2,
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                            )
                        )
                        / I_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS1 probs
        nI_rS1 = (-cumulative_normal(cS1 - S2mu)) / I_area_rS1
        nI_rS1 = Deterministic(
            "nI_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S2mu)
                        / I_area_rS1,
                        nI_rS1[:, :-1]
                        + (cumulative_normal(cS1[:, 1:] - S2mu)) / I_area_rS1,
                        (
                            cumulative_normal(c1 - S2mu)
                            - cumulative_normal(
                                cS1[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / I_area_rS1,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nC_rS2 probs
        nC_rS2 = (1 - cumulative_normal(cS2 - S2mu)) / C_area_rS2
        nC_rS2 = Deterministic(
            "nC_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S2mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S2mu
                                )
                            )
                        )
                        / C_area_rS2,
                        nC_rS2[:, :-1]
                        - ((1 - cumulative_normal(cS2[:, 1:] - S2mu)) / C_area_rS2),
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / C_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Avoid underflow of probabilities
        nC_rS1 = math.switch(nC_rS1 < Tol, Tol, nC_rS1)
        nI_rS2 = math.switch(nI_rS2 < Tol, Tol, nI_rS2)
        nI_rS1 = math.switch(nI_rS1 < Tol, Tol, nI_rS1)
        nC_rS2 = math.switch(nC_rS2 < Tol, Tol, nC_rS2)

        # TYPE 2 SDT MODEL (META-D)
        # Multinomial likelihood for response counts ordered as c(nR_S1,nR_S2)
        Multinomial(
            "CR_counts",
            cr,
            nC_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, :nRatings],
        )
        Multinomial(
            "FA_counts",
            FA,
            nI_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings : nRatings * 2],
        )
        Multinomial(
            "M_counts",
            m,
            nI_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 2 : nRatings * 3],
        )
        Multinomial(
            "H_counts",
            H,
            nC_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 3 : nRatings * 4],
        )

        if sample_model is True:

            trace = sample(return_inferencedata=True, **kwargs)

            return model, trace

        else:
            return model
Exemple #4
0
def get_mixbml_model(xs, hparams, verbose=0):
    u"""Returns a PyMC3 probabilistic model of mixture BML.

    This function should be invoked within a Model session of PyMC3.

    :param xs: Observation data.
    :type xs: ndarray, shape=(n_samples, 2), dtype=float
    :param hparams: Hyperparameters for inference.
    :type hparams: MixBMLParams
    :return: Probabilistic model
    :rtype: pymc3.Model
    """
    # Standardize samples
    floatX = 'float32'  # TODO: remove literal
    n_samples = xs.shape[0]
    xs = xs.astype(floatX)
    xs = standardize_samples(xs, True) if hparams.standardize else xs

    # Common scaling parameters
    std_x = np.std(xs, axis=0).astype(floatX)
    max_c = 1.0  # TODO: remove literal
    tau_cmmn = np.array([(std_x[0] * max_c)**2,
                         (std_x[1] * max_c)**2]).astype(floatX)

    # Prior of individual specific effects (\tilde{\mu}_{l}^{(i)})
    L_cov = _get_L_cov(hparams)
    mu1s, mu2s = _indvdl_t(hparams, std_x, n_samples, L_cov)

    # Noise variance
    h1, h2 = _noise_variance(hparams, tau_cmmn)

    # Common interceptions
    mu1, mu2 = _common_interceptions(hparams, tau_cmmn)

    # Noise model
    # obs1 (obs2) is a log likelihood function, not RV
    obs1, obs2 = _noise_model(hparams, h1, h2)

    # Pair of causal models
    v1_params = [mu1, mu1s, obs1]
    v2_params = [mu2, mu2s, obs2]

    # lp_m1: x1 -> x2 (b_21 is non-zero)
    # lp_m2: x2 -> x1 (b_12 is non-zero)
    lp_m1 = _causal_model(hparams, v1_params, v2_params, tau_cmmn, '21')
    lp_m2 = _causal_model(hparams, v2_params, v1_params, tau_cmmn, '12')

    # Prior of mixing proportions for causal models
    z = Beta('z', alpha=1, beta=1)

    # Mixture of potentials of causal models
    def lp_mix(xs):
        def flip(xs):
            # Filp 1st and 2nd features
            return tt.stack([xs[:, 1], xs[:, 0]], axis=0).T

        return pm.logsumexp(
            tt.stack([tt.log(z) + lp_m1(xs),
                      tt.log(1 - z) + lp_m2(flip(xs))],
                     axis=0))

    DensityDist('dist', lp_mix, observed=xs)
Exemple #5
0
    Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M))

    #S = DiscreteObsMJP('S', pi=pi, Q=Q, M=M, nObs=nObs, observed_jumps=obs_jumps, T=T, shape=(nObs), testval=np.ones(nObs,dtype='int32'))
    S = DiscreteObsMJP('S',
                       pi=pi,
                       Q=Q,
                       M=M,
                       nObs=nObs,
                       observed_jumps=obs_jumps,
                       T=T,
                       shape=(nObs))

    #B0 = Beta('B0', alpha = 1., beta = 1., shape=(K,M), testval=0.2*np.ones((K,M)))
    #B = Beta('B', alpha = 1., beta = 1., shape=(K,M), testval=0.2*np.ones((K,M)))
    B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
    B = Beta('B', alpha=1., beta=1., shape=(K, M))

    #X = Comorbidities('X', S=S, B0=B0,B=B, T=T, shape=(nObs, K), testval=np.ones((nObs,K),dtype='int8'))
    X = Comorbidities('X', S=S, B0=B0, B=B, T=T, shape=(nObs, K))

    #Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D), testval=0.5*np.ones((K,D)))
    #L = Beta('L', alpha = 1., beta = 1., shape=D, testval=0.5*np.ones(D))
    Z = Beta('Z', alpha=0.1, beta=1., shape=(K, D))
    L = Beta('L', alpha=1., beta=1., shape=D)
    O_obs = Claims('O_obs',
                   X=X,
                   Z=Z,
                   L=L,
                   T=T,
                   D=D,
Exemple #6
0
if FAKE_DATA:
    faker = gen.Test_Generator()
    data = faker.gen('soft1')
else:
    data = read.get_all_sets()

pathways, features, path_dict, reverse_path_dict, evidence, metfrag_evidence = data
print("num_pathways:", len(pathways))
print("num_features:", len(features))
print("num_evidence:", len(evidence))
print("num_metfrag: ", len(metfrag_evidence))
rate_prior = 0.5
model = Model()
with model:
    eps = Beta('eps', 0.005, 1)
    ap = {p: Gamma('p_' + p, rate_prior, 1) for p in pathways}
    bmp = {
        p: {
            feat: Gamma('b_{' + p + ',' + feat + '}', ap[p], 1)
            for feat in path_dict[p]
        }
        for p in pathways
    }
    y_bmp = {}
    g = {}

    def logp_f(f, b, eps):
        if f in evidence:
            return math.log(1 - math.e**(-1 * b) + epsilon)
        if f in metfrag_evidence:
    def setUp(self):
        #test Claims
        N = 100  # Number of patients
        M = 6  # Number of hidden states
        K = 10  # Number of comorbidities
        D = 721  # Number of claims
        Dd = 80  # Maximum number of claims that can occur at once
        min_obs = 10  # Minimum number of observed claims per patient
        max_obs = 30  # Maximum number of observed claims per patient
        self.M = M
        self.N = N
        self.K = K
        # Load pre-generated data
        from pickle import load

        T = load(open('../../data/X_layer_100_patients_old/T.pkl', 'rb'))
        self.T = T
        obs_jumps = load(
            open('../../data/X_layer_100_patients_old/obs_jumps.pkl', 'rb'))
        S_start = load(open('../../data/X_layer_100_patients_old/S.pkl', 'rb'))
        X_start = load(open('../../data/X_layer_100_patients_old/X.pkl', 'rb'))
        Z_start = load(open('../../data/X_layer_100_patients_old/Z.pkl', 'rb'))
        L_start = load(open('../../data/X_layer_100_patients_old/L.pkl', 'rb'))
        O = load(open('../../data/X_layer_100_patients_old/O_input.pkl', 'rb'))

        self.nObs = nObs = T.sum()
        self.zeroIndices = np.roll(self.T.cumsum(), 1)
        self.zeroIndices[0] = 0
        obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps])
        obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)])
        O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)])
        S_start = np.concatenate([S_start[i, 0:T[i]] for i in range(N)])
        X_start = np.concatenate([X_start[:, 0:T[i], i].T for i in range(N)])
        anchors = []
        self.Z_original
        mask = np.ones((K, D))
        for anchor in anchors:
            for hold in anchor[1]:
                mask[:, hold] = 0
                mask[anchor[0], hold] = 1
        Z_start = Z_start[mask.nonzero()]

        with Model() as self.model:
            self.pi = Dirichlet('pi',
                                a=as_tensor_variable(
                                    [0.5, 0.5, 0.5, 0.5, 0.5, 0.5]),
                                shape=M)
            pi_min_potential = Potential(
                'pi_min_potential',
                TT.switch(TT.min(self.pi) < .1, -np.inf, 0))
            self.Q = DiscreteObsMJP_unif_prior('Q',
                                               M=M,
                                               lower=0.0,
                                               upper=1.0,
                                               shape=(M, M))
            self.S = DiscreteObsMJP('S',
                                    pi=self.pi,
                                    Q=self.Q,
                                    M=M,
                                    nObs=nObs,
                                    observed_jumps=obs_jumps,
                                    T=T,
                                    shape=(nObs))
            self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
            self.B = Beta('B', alpha=1., beta=1., shape=(K, M))
            self.X = Comorbidities('X',
                                   S=self.S,
                                   B0=self.B0,
                                   B=self.B,
                                   T=T,
                                   shape=(nObs, K))
            #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D))
            self.Z = Beta_with_anchors('Z',
                                       anchors=anchors,
                                       K=K,
                                       D=D,
                                       alpha=0.1,
                                       beta=1.,
                                       shape=(K, D))
            self.L = Beta('L', alpha=1., beta=1., shape=D)
            self.testClaims = Claims('O_obs',
                                     X=self.X,
                                     Z=self.Z,
                                     L=self.L,
                                     T=T,
                                     D=D,
                                     O_input=O,
                                     shape=(nObs, Dd),
                                     observed=O)

            self.forS = ForwardS(vars=[self.S],
                                 N=N,
                                 T=T,
                                 nObs=nObs,
                                 observed_jumps=obs_jumps)
            self.forX = ForwardX(vars=[self.X],
                                 N=N,
                                 T=T,
                                 K=K,
                                 D=D,
                                 Dd=Dd,
                                 O=O,
                                 nObs=nObs)

        from scipy.special import logit

        self.Q_raw_log = logit(
            np.array([0.631921, 0.229485, 0.450538, 0.206042, 0.609582]))

        B_lo = logit(
            np.array(
                [[0.000001, 0.760000, 0.720000, 0.570000, 0.700000, 0.610000],
                 [0.000001, 0.460000, 0.390000, 0.220000, 0.200000, 0.140000],
                 [0.000001, 0.620000, 0.620000, 0.440000, 0.390000, 0.240000],
                 [0.000001, 0.270000, 0.210000, 0.170000, 0.190000, 0.070000],
                 [0.000001, 0.490000, 0.340000, 0.220000, 0.160000, 0.090000],
                 [0.000001, 0.620000, 0.340000, 0.320000, 0.240000, 0.120000],
                 [0.000001, 0.550000, 0.390000, 0.320000, 0.290000, 0.150000],
                 [0.000001, 0.420000, 0.240000, 0.170000, 0.170000, 0.110000],
                 [0.000001, 0.310000, 0.300000, 0.230000, 0.190000, 0.110000],
                 [0.000001, 0.470000, 0.340000, 0.190000, 0.190000,
                  0.110000]]))

        B0_lo = logit(
            np.array(
                [[0.410412, 0.410412, 0.418293, 0.418293, 0.429890, 0.429890],
                 [0.240983, 0.240983, 0.240983, 0.240983, 0.240983, 0.240983],
                 [0.339714, 0.339714, 0.339714, 0.339714, 0.339714, 0.339714],
                 [0.130415, 0.130415, 0.130415, 0.130415, 0.130415, 0.130415],
                 [0.143260, 0.143260, 0.143260, 0.143260, 0.143260, 0.143260],
                 [0.211465, 0.211465, 0.211465, 0.211465, 0.211465, 0.211465],
                 [0.194187, 0.194187, 0.194187, 0.194187, 0.194187, 0.194187],
                 [0.185422, 0.185422, 0.185422, 0.185422, 0.185422, 0.185422],
                 [0.171973, 0.171973, 0.171973, 0.171973, 0.171973, 0.171973],
                 [0.152277, 0.152277, 0.152277, 0.152277, 0.152277,
                  0.152277]]))

        Z_lo = logit(Z_start)
        L_lo = logit(L_start)
        #import pdb; pdb.set_trace()
        self.myTestPoint = {
            'Q_ratematrixoneway': self.Q_raw_log,
            'B_logodds': B_lo,
            'B0_logodds': B0_lo,
            'S': S_start,
            'X': X_start,
            'Z_anchoredbeta': Z_lo,
            'L_logodds': L_lo,
            'pi_stickbreaking': np.array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5])
        }
    # exp(t*Q)[m,m']: probability of transitioning from disease state m to m' after a period of time t
    Q = DiscreteObsMJP_unif_prior('Q', M=M, lower=0.0, upper=1.0, shape=(M, M))

    # S[o]: disease state (between 0 and M-1) at obeservation o
    #
    S = DiscreteObsMJP('S',
                       pi=pi,
                       Q=Q,
                       M=M,
                       nObs=nObs,
                       observed_jumps=obs_jumps,
                       T=T,
                       shape=(nObs))

    B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
    B0_monotonicity_constraint = Potential(
        'B0_monotonicity_constraint',
        TT.switch(TT.min(DES_diff(B0)) < 0., 100.0 * TT.min(DES_diff(B0)), 0))

    B = Beta('B', alpha=1., beta=1., shape=(K, M))

    X = Comorbidities('X', S=S, B0=B0, B=B, T=T, shape=(nObs, K))

    #Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D))
    Z = Beta_with_anchors('Z',
                          anchors=anchors,
                          K=K,
                          D=D,
                          alpha=0.1,
                          beta=1.,
X = np.array([[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
             dtype=np.int8)

model = Model()

Q_test = np.array([[-6, 2, 2, 1, 1], [1, -4, 0, 1, 2], [1, 0, -4, 2, 1],
                   [2, 1, 0, -3, 0], [1, 1, 1, 1, -4]])

with model:
    Q = DiscreteObsMJP_unif_prior('Q', M=M, shape=(M, M))

    S = DiscreteObsMJP('S', Q=Q, observed_jumps=observed_jumps, shape=(Tn))

    B0 = Beta('B0', alpha=1, beta=1, shape=(K, M))
    B = Beta('B', alpha=1, beta=1, shape=(K, M))
    Xobs = Comorbidities('Xobs', S=S, B0=B0, B=B, shape=(K, Tn), observed=X)

    step2 = ForwardS(vars=[S], X=X, observed_jumps=observed_jumps)
    step2.step_sizes = np.array([0.1, 1, 100])
    pS = step2.compute_pS(Q_test, 5)
    print pS
Exemple #10
0
    def setUp(self):
        #test Claims
        N = 5  # Number of patients
        self.N = N
        M = 3  # Number of hidden states
        self.M = M
        K = 2  # Number of comorbidities
        D = 20  # Number of claims
        Dd = 4  # Maximum number of claims that can occur at once
        min_obs = 2  # Minimum number of observed claims per patient
        max_obs = 4  # Maximum number of observed claims per patient
        #obs_jumps = np.ones((N,max_obs-1))
        obs_jumps = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1],
                              [1, 1, 1]])
        T = np.array([4, 2, 3, 4, 2])
        self.T = T
        nObs = T.sum()
        obs_jumps = np.hstack([np.zeros((N, 1), dtype='int8'), obs_jumps])
        obs_jumps = np.concatenate([obs_jumps[i, 0:T[i]] for i in range(N)])

        #O(4,4,5)
        #O = np.zeros((nObs,Dd),dtype='int8')
        O = np.zeros((Dd, max_obs, N), dtype='int8')
        #import pdb; pdb.set_trace()
        O[[0, 1, 3, 2, 3, 3], [0, 1, 3, 2, 3, 3], [0, 1, 4, 3, 3, 4]] = 1
        #O[[0,5,11,12],[0,1,2,3]] = 1
        O = np.concatenate([O[:, 0:T[i], i].T for i in range(N)])

        Z_lo = np.array([[
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
            -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509
        ],
                         [
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509, -2.30258509,
                             -2.30258509, -2.30258509
                         ]])

        anchors = []
        mask = np.ones((K, D))
        for anchor in anchors:
            for hold in anchor[1]:
                mask[:, hold] = 0
                mask[anchor[0], hold] = 1
        Z_lo = Z_lo[mask.nonzero()]

        with Model() as self.model:
            self.pi = Dirichlet('pi',
                                a=as_tensor_variable([0.5, 0.5, 0.5]),
                                shape=M)
            pi_min_potential = Potential(
                'pi_min_potential',
                TT.switch(TT.min(self.pi) < .1, -np.inf, 0))
            self.Q = DiscreteObsMJP_unif_prior('Q',
                                               M=M,
                                               lower=0.0,
                                               upper=1.0,
                                               shape=(M, M))
            self.S = DiscreteObsMJP('S',
                                    pi=self.pi,
                                    Q=self.Q,
                                    M=M,
                                    nObs=nObs,
                                    observed_jumps=obs_jumps,
                                    T=T,
                                    shape=(nObs))
            self.B0 = Beta('B0', alpha=1., beta=1., shape=(K, M))
            self.B = Beta('B', alpha=1., beta=1., shape=(K, M))
            self.X = Comorbidities('X',
                                   S=self.S,
                                   B0=self.B0,
                                   B=self.B,
                                   T=T,
                                   shape=(nObs, K))
            #self.Z = Beta('Z', alpha = 0.1, beta = 1., shape=(K,D))
            self.Z = Beta_with_anchors('Z',
                                       anchors=anchors,
                                       K=K,
                                       D=D,
                                       alpha=0.1,
                                       beta=1.,
                                       shape=(K, D))
            self.L = Beta('L', alpha=1., beta=1., shape=D)
            #L = Beta('L', alpha = 0.1, beta = 1, shape=D, transform=None)
            #L = Uniform('L', left = 0.0, right = 1.0, shape=D, transform=None)
            #L = Uniform('L', lower = 0.0, upper = 1.0, shape=D)
            self.testClaims = Claims('O_obs',
                                     X=self.X,
                                     Z=self.Z,
                                     L=self.L,
                                     T=T,
                                     D=D,
                                     O_input=O,
                                     shape=(nObs, Dd),
                                     observed=O)

            self.forS = ForwardS(vars=[self.S],
                                 N=N,
                                 T=T,
                                 nObs=nObs,
                                 observed_jumps=obs_jumps)
            self.forX = ForwardX(vars=[self.X],
                                 N=N,
                                 T=T,
                                 K=K,
                                 D=D,
                                 Dd=Dd,
                                 O=O,
                                 nObs=nObs)

        self.myTestPoint = {
            'Z_anchoredbeta':
            Z_lo,
            'Q_ratematrixoneway':
            np.array([0.1, 0.1]),
            'pi_stickbreaking':
            np.array([0.2, 0.1]),
            'S':
            np.array([[0, 0, 1, 1], [1, 1, 1, 1], [1, 1, 2, 2], [0, 2, 2, 2],
                      [0, 0, 0, 1]],
                     dtype=np.int32),
            'B0_logodds':
            np.array([[0., 1., 0.], [0., 0., 1.]]),
            'X':
            np.array([[[0, 1, 1, 1, 1], [0, 1, 1, 1, 1], [1, 1, 1, 1, 1],
                       [1, 1, 1, 1, 1]],
                      [[1, 1, 0, 0, 1], [1, 1, 0, 1, 1], [1, 1, 1, 1, 1],
                       [1, 1, 1, 1, 1]]],
                     dtype=np.int8),
            'L_logodds':
            np.array([
                0.1, 0.1, 0.1, 0.1, 0.01, 0.01, 0.01, 0.01, 0.0011, 0.0011,
                0.0011, 0.0011, 0.0011, 0., 0.0101, 0.0101, 0.0101, 0.01, 0.01,
                0.01
            ]),
            'B_logodds':
            np.array([[1., 0., 1.], [0., 1., 0.]])
        }
        self.myTestPoint['S'] = np.concatenate(
            [self.myTestPoint['S'][i, 0:T[i]] for i in range(N)])
        self.myTestPoint['X'] = np.concatenate(
            [self.myTestPoint['X'][:, 0:T[i], i].T for i in range(N)])
        stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                   [0, 0, 0, 0, 0], [0, 0, 0, 0, 1]],
                                  [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
                                   [1, 0, 0, 0, 0], [1, 0, 0, 0, 0]]],
                                 dtype=np.int8)

        stepX_Correct = np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 1, 0],
                                   [0, 0, 0, 1, 0], [0, 0, 0, 1, 0]],
                                  [[0, 1, 0, 0, 0], [0, 1, 0, 0, 0],
                                   [0, 1, 0, 0, 0], [0, 1, 0, 0, 1]]],
                                 dtype=np.int8)
Exemple #11
0
def run_phi(data, **kwargs):
    if isinstance(data, str):
        data = csv(data)
    data = np.array(data)

    # Check limits in **kwargs
    if kwargs.get("limits") is not None:
        limits = kwargs.get("limits")
    else:
        limits = (np.nanmin(list(itertools.chain.from_iterable(data))),
                  np.nanmax(list(itertools.chain.from_iterable(data))))

    if kwargs.get("verbose") is not None:
        verbose = kwargs.get("verbose")
    else:
        verbose = False

    if (kwargs.get("binning") is not None) and not kwargs.get("binning"):
        print("removing binning on borders")
        binning_multiplier = 2
    else:
        binning_multiplier = 1

    if kwargs.get("seed") is not None:
        seed = kwargs.get("seed")
    else:
        seed = 123

    if kwargs.get("table") is not None:
        table = kwargs.get("table")
    else:
        table = False

    if kwargs.get("N") is not None:
        N = kwargs.get("N")
    else:
        N = 1000

    if kwargs.get("keep_missing") is not None:
        keep_missing = kwargs.get("keep_missing")
    else:
        keep_missing = None  #AUTO
        #keep_missing = True

    if kwargs.get("fast") is not None:
        fast = kwargs.get("fast")
    else:
        fast = True

    if kwargs.get("njobs") is not None:
        njobs = kwargs.get("njobs")
    else:
        njobs = 2

    if kwargs.get("sd") is not None:
        sd = kwargs.get("sd")
    else:
        sd = 1000000

    # Check gt in **kwargs
    if kwargs.get("gt") is not None:
        gt = kwargs.get("gt")
    else:
        gt = [None] * len(data)

    if verbose: print("Computing Phi")
    idx_of_gt = np.array([x is not None for x in gt])
    idx_of_not_gt = np.array([x is None for x in gt])
    num_of_gt = np.sum(idx_of_gt)

    basic_model = Model()

    for i, g in enumerate(gt):
        if g is not None:
            gt[i] = scale_mat(np.array([[gt[i]] * len(data[i])]),
                              limits,
                              binning_multiplier=binning_multiplier)[0][0]

    num_of_docs = len(data)  # number of documents

    rectangular = True
    sparse = False
    if np.isnan(data).any():
        sparse = True
        data = np.ma.masked_invalid(data)
        data = minimal_matrix(data)

    scaled = scale_mat(data, limits, binning_multiplier=binning_multiplier)

    if (np.count_nonzero(np.isnan(scaled)) /
            scaled.size) > 0.2:  # a lot of nans
        if verbose:
            print(
                "WARNING: a lot of missing values: we are going to set keep_missing=False to improve convergence (if not manually overridden)"
            )
        if keep_missing is None:
            keep_missing = False

    if (sparse and keep_missing == False):
        rectangular = False
        scaled = [doc[~np.isnan(doc)].tolist()
                  for doc in scaled]  #make data a list of lists

    NUM_OF_ITERATIONS = N

    with basic_model:
        precision = Normal('precision', mu=2, sd=sd)
        #precision = Gamma('precision',mu=2,sd=1)

        if num_of_docs - num_of_gt == 1:
            mu = Normal('mu', mu=1 / 2, sd=sd)
        else:
            mu = Normal('mu', mu=1 / 2, sd=sd, shape=num_of_docs - num_of_gt)
        alpha = mu * precision
        beta = precision * (1 - mu)

        if rectangular:
            masked = pd.DataFrame(
                scaled[idx_of_not_gt])  #needed to keep nan working
            if num_of_docs - num_of_gt == 1:
                Beta('beta_obs', observed=masked, alpha=alpha, beta=beta)
            else:
                Beta('beta_obs',
                     observed=masked.T,
                     alpha=alpha,
                     beta=beta,
                     shape=num_of_docs - num_of_gt)
        else:
            for i, doc in enumerate(scaled):
                Beta('beta_obs' + str(i),
                     observed=doc,
                     alpha=alpha[i],
                     beta=beta[i])

        for i, g in enumerate(gt):
            if g is not None:
                mu = Normal('mu' + str(i), mu=gt[i], sd=1)
                alpha = mu * precision
                beta = precision * (1 - mu)
                Beta('beta_obs_g' + str(i),
                     observed=scaled[i],
                     alpha=alpha,
                     beta=beta)  #alpha=a,beta=b,observed=beta)

        try:
            if fast:
                assert False
            stds = np.ones(basic_model.ndim)
            for _ in range(5):
                args = {'scaling': stds**2, 'is_cov': True}
                trace = pm.sample(round(NUM_OF_ITERATIONS / 10),
                                  tune=round(NUM_OF_ITERATIONS / 10),
                                  init=None,
                                  nuts_kwargs=args,
                                  chains=10,
                                  progressbar=verbose,
                                  random_seed=seed)
                samples = [basic_model.dict_to_array(p) for p in trace]
                stds = np.array(samples).std(axis=0)

            step = pm.NUTS(scaling=stds**2, is_cov=True, target_accept=0.9)
            start = trace[0]
            trace = sample(NUM_OF_ITERATIONS,
                           tune=round(NUM_OF_ITERATIONS / 2),
                           njobs=njobs,
                           chains=8,
                           init=None,
                           step=step,
                           start=start,
                           progressbar=verbose,
                           random_seed=seed)
            # Staistical inference
            beg = time()
            #start = find_MAP()
            bef_slice = time()
            #step = NUTS()# Metropolis()
            #step = Metropolis()
            aft_slice = time()
            bef_trace = time()
            #trace = sample(NUM_OF_ITERATIONS, progressbar=verbose,random_seed=123, njobs=njobs,start=start,step=step)
    #        trace = sample(NUM_OF_ITERATIONS, progressbar=verbose,random_seed=123, njobs=njobs,init=None,tune=100)
        except:
            beg = time()
            step = Metropolis()
            #start = find_MAP()
            trace = sample(NUM_OF_ITERATIONS,
                           progressbar=verbose,
                           random_seed=seed,
                           njobs=njobs,
                           step=step)  #,start=start)
        #pm.summary(trace,include_transformed=True)
        if np.float(pymc3.__version__) <= 3.3:
            res = pm.stats.df_summary(trace, include_transformed=True)
        else:
            res = pm.summary(trace, include_transformed=True)
        res.drop(["sd", "mc_error"], axis=1, inplace=True)
        res = res.transpose()
        res["agreement"] = agreement(res['precision'])
        # ----

        #sub_res = res.copy()

        # Mu rescaling

        col_agreement = res["agreement"]
        col_precision = res["precision"]

        res.drop("agreement", inplace=True, axis=1)
        res.drop("precision", inplace=True, axis=1)

        if table:
            col_names = res.columns[0:len(data) - 1]
            for i, name in enumerate(col_names):
                l = len(scaled[i]) * binning_multiplier
                for j in range(3):

                    b = res[name].iloc[j]
                    mu_res = (b * l - 0.5) / (l - 1)
                    res[name].iloc[j] = np.clip(mu_res, 0,
                                                1) * (limits[1] - limits[0])

        res["agreement"] = col_agreement
        res.insert(0, "precision", col_precision)
        aft_trace = time()
    computation_time = time() - beg
    if verbose: print("Elapsed time for computation: ", computation_time)

    convergence = True
    if np.isnan(res.loc['Rhat']['precision']
                ) or np.abs(res.loc['Rhat']['precision'] - 1) > 1e-1:
        print("Warning! You need more iterations!")
        convergence = False
    if table:
        return {
            'agreement': col_agreement['mean'],
            'interval': col_agreement[['hpd_2.5', 'hpd_97.5']].values,
            "computation_time": computation_time,
            "convergence_test": convergence,
            'table': res
        }
    else:
        return {
            'agreement': col_agreement['mean'],
            'interval': col_agreement[['hpd_2.5', 'hpd_97.5']].values,
            "computation_time": computation_time,
            "convergence_test": convergence
        }