Example #1
0
    def logPrior(self):
        prior = 0.0
        prior += gamma.logpdf(self.par[0], 2.0, scale=6.0)
        prior += beta.logpdf(0.5 * (self.par[1] + 1.0), 2.0, 2.0)
        prior += beta.logpdf(0.5 * (self.par[2] + 1.0), 2.0, 2.0)
        prior += beta.logpdf(0.5 * (self.par[3] + 1.0), 2.0, 2.0)

        return(prior)
Example #2
0
    def logPrior(self):
        prior = 0.0
        prior += gamma.logpdf(self.par[0], 2.0, scale=6.0)
        prior += beta.logpdf(0.5 * (self.par[1] + 1.0), 2.0, 2.0)
        prior += beta.logpdf(0.5 * (self.par[2] + 1.0), 2.0, 2.0)
        prior += beta.logpdf(0.5 * (self.par[3] + 1.0), 2.0, 2.0)

        return (prior)
Example #3
0
def credible_interval(k, n, confidence_level=.95, tolerance=1e-6):
    # Function for estimating width of credible interval.
    # Find the highest posterior density interval using binary search.
    p_min_lower = float(0)
    p_middle = p_min_upper = p_max_lower = k / float(n)
    p_max = p_max_upper = float(1)
    p_min_middle = (p_min_lower + p_middle) / 2  # == 0 if k == 0.
    p_max_middle = (p_middle + p_max) / 2  # == n if k == n
    if (k == 0):  # Exception handling
        # p_min_middle = 0  # Per definition... it's the peak.
        while (abs(beta.cdf(p_max_middle, 1, n + 1) - confidence_level) >
               tolerance):
            if (beta.cdf(p_max_middle, 1, n + 1) > confidence_level):
                p_max_upper = p_max_middle
            else:
                p_max_lower = p_max_middle
            p_max_middle = (p_max_lower + p_max_upper) / 2
    elif (k == n):  # Exception handling
        while (abs(1 - beta.cdf(p_min_middle, k + 1, 1) - confidence_level) >
               tolerance):
            if (1 - beta.cdf(p_min_middle, k + 1, 1) > confidence_level):
                p_min_lower = p_min_middle
            else:
                p_min_upper = p_min_middle
            p_min_middle = (p_min_lower + p_min_upper) / 2
    else:  # Main case
        while (abs(
                beta.cdf(p_max_middle, k + 1, n - k + 1) -
                beta.cdf(p_min_middle, k + 1, n - k + 1) - confidence_level) >
               tolerance / 2):
            # Binary search
            # Reset p-max values for new iteration:
            p_max_lower = p_middle
            p_max_upper = p_max
            p_max_middle = (p_max_lower + p_max_upper) / 2
            while (abs(
                    beta.logpdf(p_min_middle, k + 1, n - k + 1) -
                    beta.logpdf(p_max_middle, k + 1, n - k + 1)) >
                   tolerance / 2):
                # Binary search to find p_max corresponding to p_min (same value in pdf).
                if (k * np.log(p_min_middle) +
                    (n - k) * np.log(1 - p_min_middle) >
                        k * np.log(p_max_middle) +
                    (n - k) * np.log(1 - p_max_middle)):
                    p_max_upper = p_max_middle
                else:
                    p_max_lower = p_max_middle
                p_max_middle = (p_max_lower + p_max_upper) / 2
            if (beta.cdf(p_max_middle, k + 1, n - k + 1) -
                    beta.cdf(p_min_middle, k + 1, n - k + 1) >
                    confidence_level):
                p_min_lower = p_min_middle
            else:
                p_min_upper = p_min_middle
            p_min_middle = (p_min_lower + p_min_upper) / 2
    return (dict([('p_min', p_min_middle), ('p_max', p_max_middle)]))
Example #4
0
def get_outliers(data, filter, plotting):
    if plotting:
        for x, r in [("x1", (0, 1)), ("x2", (0, 30)), ("x3", (0, 1))]:
            plt.violinplot(data[x], vert=False)
            plt.xlim(r)
            plt.savefig("plots/violin/%s.png" % x)
            plt.clf()

    if filter:
        data_fl = data[data["class"] == 0]
    else:
        data_fl = data

    pdf = pd.DataFrame({})

    a, b, loc, scale = beta.fit(data_fl["x1"])
    pdf["x1"] = beta.logpdf(data["x1"], a, b, loc=loc, scale=scale)

    a, loc, scale = gamma.fit(data_fl["x2"])
    pdf["x2"] = gamma.logpdf(data["x2"], a, loc=loc, scale=scale)

    a, b, loc, scale = beta.fit(data_fl["x3"])
    pdf["x3"] = beta.logpdf(data["x3"], a, b, loc=loc, scale=scale)

    pdfs = pdf["x1"] + pdf["x2"] + pdf["x3"]

    if plotting:
        sns.boxplot(y=pdfs, x="class", data=data)
        plt.savefig("plots/boxplot.png")
        plt.clf()

    if plotting:
        plt.plot(np.sort(pdfs))
        splits = [40, 45, 50, 60]
        for split in splits:
            split = np.sort(pdfs)[60]
            plt.plot((0, 1000), (split, split), 'k-', lw=0.5)
            split = np.sort(pdfs)[50]
            plt.plot((0, 1000), (split, split), 'k.', lw=0.5)
            split = np.sort(pdfs)[45]
            plt.plot((0, 1000), (split, split), 'k--', lw=0.5)
            split = np.sort(pdfs)[40]
            plt.plot((0, 1000), (split, split), 'k--', lw=0.5)

        plt.savefig("plots/thresholds.png")
        plt.clf()

    outliers = np.argsort(pdfs)

    final = []
    for outlier in outliers:
        if data["class"][outlier] == -1:
            final.append(outlier)

    return np.array(final[:100])
Example #5
0
def ComputeLRT(adjmatrix, ncr, ntr, nruns):
    #print(adjmatrix)
    adjmatrix[adjmatrix == 0] = [1]
    adjmatrix[adjmatrix == 100] = [99]
    #print(adjmatrix)
    intra_cluster = []
    for i in range(0, ncr):
        for j in range(i + 1, ncr):
            intra_cluster.append(adjmatrix[(i, j)])

    for i in range(ncr, ntr + ncr):
        for j in range(i + 1, ncr + ntr):
            intra_cluster.append(adjmatrix[(i, j)])

    inter_cluster = []
    for i in range(0, ncr):
        for j in range(ncr, ncr + ntr):
            inter_cluster.append(adjmatrix[(i, j)])

    intra_cluster = np.array(intra_cluster) / nruns
    inter_cluster = np.array(inter_cluster) / nruns
    Stability = np.sum(intra_cluster) / (np.sum(intra_cluster) +
                                         np.sum(inter_cluster))
    intra_cluster = [0.01 if x == 0 else x for x in intra_cluster]
    intra_cluster = [0.99 if x == 1 else x for x in intra_cluster]
    inter_cluster = [0.01 if x == 0 else x for x in inter_cluster]
    inter_cluster = [0.01 if x == 1 else x for x in inter_cluster]

    #print(np.var(intra_cluster),np.var(inter_cluster))
    if float(np.var(intra_cluster)) <= 0.0000001:
        #print(intra_cluster)
        for i in range(0, len(intra_cluster)):
            intra_cluster[i] = intra_cluster[i] + (i + 1) / 1000
        #print(intra_cluster)
    if float(np.var(inter_cluster)) <= 0.0000001:
        for i in range(0, len(inter_cluster)):
            inter_cluster[i] = inter_cluster[i] + (i + 1) / 1000

    a1, b1 = estBetaParams(intra_cluster)
    l1 = beta.logpdf(intra_cluster, a1, b1)
    a2, b2 = estBetaParams(inter_cluster)
    l2 = beta.logpdf(inter_cluster, a2, b2)
    a3, b3 = estBetaParams(np.append(intra_cluster, inter_cluster))
    l0 = beta.logpdf(np.append(intra_cluster, inter_cluster), a3, b3)
    LR = 2 * ((np.sum(l1) + np.sum(l2)) - np.sum(l0))
    if math.isnan(LR):
        exit()
    p = chi2.sf(LR, 2)
    #print(a1,b1,a2,b2,a3,b3)
    return (LR, p, Stability)
Example #6
0
    def hyper_param_inf(self, corpus, params, score):
        if self.verbose >= 1:
            print "\n****** HP INFERENCE *******"

        for i in range(params.n_hypermoves):
            if self.verbose > 1:
                print "\n--- current params ---"
                params.show()
                print "hyper param score:" + str(score)
                print "    a_nr: " + str(
                    gamma.logpdf(params.alpha_r, params.alpha_r_hp))
                print "    a_r: " + str(
                    gamma.logpdf(params.alpha_nr, params.alpha_nr_hp))
                print "    empty_i: " + str(
                    beta.logpdf(params.empty_intent, params.intent_hp_a,
                                params.intent_hp_b))

            new_params = Params()
            new_params.propose_hyper_params(params)
            new_score = self.score_full_lex(corpus, new_params)
            # print "* scoring"
            # params.show()

            if self.verbose > 1:
                print "--- new params ---"
                new_params.show()
                print "hyper param score:" + str(new_score)
                print "    a_nr: " + str(
                    gamma.logpdf(new_params.alpha_r, new_params.alpha_r_hp))
                print "    a_r: " + str(
                    gamma.logpdf(new_params.alpha_nr, new_params.alpha_nr_hp))
                print "    empty_i: " + str(
                    beta.logpdf(new_params.empty_intent,
                                new_params.intent_hp_a,
                                new_params.intent_hp_b))

            if new_score - score > 0:
                params = new_params
            elif random() < exp(new_score - score):
                params = new_params

                if self.verbose >= 1:
                    print "    hp change! - old = %2.2f, new = %2.2f" % (
                        score, new_score)

        # now rescore with the new parameters - redundant if you didn't swap, FIXME
        self.score_full_lex(corpus, params)

        return params
Example #7
0
def log_lk(newly_exposed, newly_infected, unobserved, exposed_p,
           external_sources_p, infected_p, tested_p, tested_contact_p, dead_p,
           immune_p, susceptible_p, dead_alpha, dead_beta, test_alpha,
           test_beta, contact_alpha, contact_beta):
    if (exposed_p <= 0 or external_sources_p <= 0 or infected_p <= 0
            or (tested_p <= 0).any() or (tested_contact_p <= 0).any()
            or (dead_p <= 0).any() or immune_p <= 0 or susceptible_p <= 0 or
            # exposed_p>=1 or
            external_sources_p >= 1 or infected_p >= 1
            or (tested_p >= 1).any() or (tested_contact_p >= 1).any()
            or (dead_p >= 1).any() or immune_p >= 1 or susceptible_p >= 1):
        return -numpy.inf, None
    cum_exposed = numpy.hstack((numpy.zeros_like(populations),
                                cumsum(newly_exposed - newly_infected,
                                       axis=1)))
    cum_unobserved = numpy.hstack(
        (numpy.zeros_like(populations), cumsum(unobserved, axis=1)))
    cum_unknown_infected = numpy.hstack(
        (numpy.zeros_like(populations),
         cumsum(newly_infected - unobserved - confirmed, axis=1)))
    cum_susceptible = populations - cum_exposed - cum_unknown_infected - cum_confirmed - cum_deaths - cum_recovered - cum_unobserved
    log_lk = numpy.vstack((
        # Susceptible people get exposed to infected or tested
        binom.logpmf(
            newly_exposed, cum_susceptible[:, :-1],
            exposed_p * (cum_unknown_infected[:, :-1] +
                         tested_contact_p[:, None] * cum_confirmed[:, :-1]) +
            (external_sources_p)),
        # Exposed people become infected
        binom.logpmf(newly_infected, cum_exposed[:, :-1], infected_p),
        # People might recover or die before they are tested
        binom.logpmf(unobserved, cum_unknown_infected[:, :-1],
                     dead_p[:, None] + immune_p),
        # Infected people become tested
        binom.logpmf(confirmed, cum_unknown_infected[:, :-1] - unobserved,
                     tested_p[:, None]),
        # Tested people recover
        binom.logpmf(recovered, cum_confirmed[:, :-1], immune_p),
        # or they die
        binom.logpmf(deaths, cum_confirmed[:, :-1], dead_p[:, None]),
    ))
    errors = ~numpy.isfinite(log_lk)
    log_lk[errors] = 0
    prior = (
        beta.logpdf(dead_p, dead_alpha, dead_beta).sum(),
        beta.logpdf(tested_p, test_alpha, test_beta).sum(),
        beta.logpdf(tested_contact_p, contact_alpha, contact_beta).sum(),
    )
    return numpy.sum(log_lk) + sum(prior), errors
def sample_trunc_beta(a, b, lower, upper):
    """
    Samples from a truncated beta distribution in log space

    Parameters
    ----------
    a, b: float
        Canonical parameters of the beta distribution
    lower, upper: float
        Lower and upper truncations of the beta distribution

    Returns
    -------
    s: float
        Sampled value from the truncated beta distribution in log space
    """
    # Check boundaries are correct
    if upper < lower:
        return

    # If a=1 and b=1, then we're sampling truncated uniform distribution
    # (i.e. peak formula below is not valid, but also not needed)
    if a == 1 and b == 1:
        s = np.random.uniform(low=lower, high=upper)
        return s

    # Get location of peak of distribution to determine type of sampling
    peak = (a-1) / (a+b-2)
    # If peak of beta dist is outside truncation, use uniform rejection sampling
    if peak < lower or peak > upper:
        # Sample a proposal
        s = np.random.uniform(low=lower, high=upper)
        # Get components of rejection sampling
        log_f_s = beta.logpdf(s, a, b)
        log_g_s = -1*np.log(upper-lower)
        log_M = max(beta.logpdf(lower,a,b), beta.logpdf(upper,a,b))\
                + np.log(upper-lower)
        # Keep sampling until proposal is accepted
        while np.log(np.random.random()) > log_f_s - (log_M + log_g_s):
            s = np.random.uniform(low=lower, high=upper)
            log_f_s = beta.logpdf(s, a, b)
    # If peak of beta is inside truncation, sample from beta directly
    else:
        s = beta.rvs(a, b)
        # Keep sampling until proposal falls inside truncation boundaries
        while s < lower or s > upper:
            s = beta.rvs(a,b)

    return s
Example #9
0
def compute_pdfs(_VS, _AS, _BS, _BB=None, check=False):
    M = np.array(map(lambda v: False if v is False else True, _VS))
    if _BB is None:
        VS, AS, BS = map(np.array, [_VS, _AS, _BS])
    else:
        VS, AS, BS, BB = map(np.array, [_VS, _AS, _BS, _BB])
    VS[VS < SEQERROR] = SEQERROR
    VS[VS > (1.0 - SEQERROR)] = 1.0 - SEQERROR
    RS = np.full(M.shape[0], np.NINF)
    if _BB is None:
        RS[M] = beta.logpdf(VS[M], AS[M], BS[M])
    else:
        NS = AS[M] + BS[M]
        KS = AS[M]
        CS = gammaln(NS + 1) - gammaln(NS - KS + 1) - gammaln(
            KS + 1)  #comb(NS, KS)
        NSminusKS = NS - KS
        BAFS = VS[M]
        minusBAFS = 1 - BAFS
        SS = BB[M]
        RS[M] = CS + betaln(KS + SS * BAFS, NSminusKS +
                            SS * minusBAFS) - betaln(SS * BAFS, SS * minusBAFS)
    RS[M & (RS < EPSILON)] = EPSILON
    if check:
        assert np.NINF not in RS[M]
    return RS
    def fit_beta(cls, X):
        N = X.shape[0]
        D = X.shape[1]

        Xsafe = np.clip(X, 0.01, 1-0.01)
        
        P = 20 
        #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)])
        #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one
        params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)])
        #dists = np.asarray([beta.pdf(x, a, b) for a, b in params]) 

        theta = np.zeros((D, 2))
        
        scores = np.zeros(len(params))
        
        for d in range(D):
            # Check likelihood of the dists
            for p in range(len(params)):
                scores[p] = beta.logpdf(Xsafe[:,d], *params[p]).sum()

            ii = scores.argmax()

            theta[d] = params[ii]

        return theta
    def fit_beta_atleast_std(cls, X, std):
        variance = std**2
        N = X.shape[0]
        D = X.shape[1]

        Xsafe = np.clip(X, 0.01, 1-0.01)
        
        P = 20 
        #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)])
        #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one
        params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)])

        a, b = params.T
        variances = a * b / ((a + b)**2 * (a + b + 1))

        II = np.where(variances >= variance)[0]

        params = params[II]
        #dists = np.asarray([beta.pdf(x, a, b) for a, b in params]) 

        theta = np.zeros((D, 2))
        
        scores = np.zeros(len(params))
        
        for d in range(D):
            # Check likelihood of the dists
            for p in range(len(params)):
                scores[p] = beta.logpdf(Xsafe[:,d], *params[p]).sum()

            ii = scores.argmax()

            theta[d] = params[ii]

        return theta
Example #12
0
    def fit_beta(cls, X):
        N = X.shape[0]
        D = X.shape[1]

        Xsafe = np.clip(X, 0.01, 1 - 0.01)

        P = 20
        #params = np.asarray([(2+a, 52-a) for a in np.linspace(0, 50, P)])
        #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, b, P)]) # The buggy one
        params = np.asarray([(b + a, b + c - a) for b in np.linspace(1, 2, 5)
                             for c in np.linspace(1, 50, 10)
                             for a in np.linspace(0, c, P)])
        #dists = np.asarray([beta.pdf(x, a, b) for a, b in params])

        theta = np.zeros((D, 2))

        scores = np.zeros(len(params))

        for d in range(D):
            # Check likelihood of the dists
            for p in range(len(params)):
                scores[p] = beta.logpdf(Xsafe[:, d], *params[p]).sum()

            ii = scores.argmax()

            theta[d] = params[ii]

        return theta
Example #13
0
    def __init__(self, ez_nk, B1=None, B2=None, P_lim=[2, 65536]):

        # These are the fixed, assumed beta distributions we use for
        # short-period and long-period, respectively
        if B1 is None:
            B1 = beta(1.5, 50.)
        if B2 is None:
            B2 = beta(1, 1.8)

        self.ez = ez_nk  # (2, N, K)
        self.K = np.isfinite(self.ez[0]).sum(axis=-1)  # (N, )
        self.P_lim = P_lim

        # Used priors from The Joker:
        ln_e_p0 = beta.logpdf(self.ez[0], a=0.867, b=3.03)
        ln_z_p0 = np.full_like(self.ez[1],
                               -np.log(np.log(P_lim[1]) - np.log(P_lim[0])))
        self.ln_p0 = np.stack((ln_e_p0, ln_z_p0))  # (2, N, K)

        self.B1 = B1
        self.B2 = B2
        self._lnp1e = B1.logpdf(self.ez[0])
        self._lnp2e = B2.logpdf(self.ez[0])

        self._zlim = np.log(P_lim)
Example #14
0
    def pdf(self, u: Array, log=False):
        assert self.smoothing == "beta", "Empirical Copula only has density (PDF) for smoothing = 'beta'"
        assert isinstance(
            self.data,
            np.ndarray), "data is still undefined for EmpiricalCopula"
        u = self.pobs(u, self._ties)

        data_rank = rank_data(self.data, 1, self._ties)
        n = len(self.data)

        if log:
            return np.array([
                log_sum(
                    np.array([
                        sum(beta.logpdf(row, a=row_rank, b=n + 1 - row_rank))
                        for row_rank in data_rank
                    ])) for row in u
            ]) - np.log(n + self._offset)
        else:
            return np.array([
                sum([
                    np.prod(beta.pdf(row, a=row_rank, b=n + 1 - row_rank))
                    for row_rank in data_rank
                ]) for row in u
            ]) / (n + self._offset)
Example #15
0
 def prior_p(self, p0, p1):
     '''
     Joint log-prior for pi0 and pi1. Uses two independent betas
     '''
     return beta.logpdf(x=p0, a=self.ap0,
                        b=self.bp0) + sp.stats.beta.logpdf(
                            x=p1, a=self.ap1, b=self.bp1)
 def get_particle_from_state(self, state, obs):
     """
     Returns a particle from this state, as well as the log_density of this particle
     """
     sample_means = beta.rvs(state['successes'], state['failures'])
     log_density = np.sum(beta.logpdf(sample_means, state['successes'], state['failures']))
     return sample_means, log_density
 def _py_log_prob(self, xs, zs):
     n_samples = zs.shape[0]
     lp = np.zeros(n_samples, dtype=np.float32)
     for s in range(n_samples):
         lp[s] = beta.logpdf(zs[s, :], a=1.0, b=1.0)
         for n in range(len(xs)):
             lp[s] += bernoulli.logpmf(xs[n], p=zs[s, :])
     return lp
Example #18
0
    def dBE(y: np.ndarray, location: np.ndarray, scale: np.ndarray):
        """Density function.

        """
        a = location * (1 - scale**2) / (scale**2)
        b = a * (1 - location) / location
        fy = beta.logpdf(x=y, a=a, b=b)
        return fy
 def test_beta_log_pdf(self):
     from scipy.stats import beta
     a = 3.0
     b = 2.0
     for x in np.linspace(0.001,0.999,25):
         expected = beta.logpdf(x,a,b)
         got = beta_log_pdf(x,a,b)
         # print x,got,expected
         self.assertAlmostEqual(got,expected,places=6)
Example #20
0
        def f0(x):
            mu, a, th = x[0], x[1], x[2]
            res = uv_exp_ll(t, mu, a, th, T)

            res += gamma.logpdf(mu, mu_hyp[0], scale=mu_hyp[1]) \
                + gamma.logpdf(th, theta_hyp[0], scale=theta_hyp[1]) \
                + beta.logpdf(a, alpha_hyp[0], alpha_hyp[1])

            return res
def _test(model, xs, zs):
  val_true = beta.logpdf(zs['p'], 1.0, 1.0)
  val_true += np.sum([bernoulli.logpmf(x, zs['p'])
                      for x in xs['x']])
  val_ed = model.log_prob(xs, zs)
  assert np.allclose(val_ed.eval(), val_true)
  zs_tf = {key: tf.cast(value, dtype=tf.float32)
           for key, value in six.iteritems(zs)}
  val_ed = model.log_prob(xs, zs_tf)
  assert np.allclose(val_ed.eval(), val_true)
Example #22
0
def _test(model, xs, zs):
  val_true = beta.logpdf(zs['p'], 1.0, 1.0)
  val_true += np.sum([bernoulli.logpmf(x, zs['p'])
                      for x in list(six.itervalues(xs))[0]])
  val_ed = model.log_prob(xs, zs)
  assert np.allclose(val_ed.eval(), val_true)
  zs_tf = {key: tf.cast(value, dtype=tf.float32)
           for key, value in six.iteritems(zs)}
  val_ed = model.log_prob(xs, zs_tf)
  assert np.allclose(val_ed.eval(), val_true)
Example #23
0
    def test_dummy_posterior_correct(self):
        A = self.arr
        logpost = self.bhp.log_posterior_with_params(A, 5., .2, .1, A[-1])

        check = self.bhp.log_likelihood_with_params(A, 5, .2, .1, A[-1]) + \
                gamma.logpdf(5, self.bhp.mu_hyp[0], scale=self.bhp.mu_hyp[1]) + \
                gamma.logpdf(.1, self.bhp.theta_hyp[0], scale=self.bhp.theta_hyp[1]) + \
                beta.logpdf(.2, self.bhp.alpha_hyp[0], self.bhp.alpha_hyp[1])

        self.assertAlmostEqual(logpost, check)
Example #24
0
        def hyper_param_inf(self,
                            corpus,
                            params,
                            score):
            if self.verbose >= 1:
                print "\n****** HP INFERENCE *******"

            for i in range(params.n_hypermoves):
                if self.verbose > 1:
                    print "\n--- current params ---"
                    params.show()
                    print "hyper param score:" + str(score)
                    print "    a_nr: " + str(gamma.logpdf(params.alpha_r, params.alpha_r_hp))
                    print "    a_r: " + str(gamma.logpdf(params.alpha_nr, params.alpha_nr_hp))
                    print "    empty_i: " + str(beta.logpdf(params.empty_intent, params.intent_hp_a, params.intent_hp_b))

                new_params = Params()
                new_params.propose_hyper_params(params)
                new_score = self.score_full_lex(corpus, new_params)
                # print "* scoring"
                # params.show()

                if self.verbose > 1:
                    print "--- new params ---"
                    new_params.show()
                    print "hyper param score:" + str(new_score)
                    print "    a_nr: " + str(gamma.logpdf(new_params.alpha_r, new_params.alpha_r_hp))
                    print "    a_r: " + str(gamma.logpdf(new_params.alpha_nr, new_params.alpha_nr_hp))
                    print "    empty_i: " + str(beta.logpdf(new_params.empty_intent, new_params.intent_hp_a, new_params.intent_hp_b))

                if new_score - score > 0:
                    params = new_params
                elif random() < exp(new_score - score):
                    params = new_params

                    if self.verbose >= 1:
                        print "    hp change! - old = %2.2f, new = %2.2f" % (score, new_score)

            # now rescore with the new parameters - redundant if you didn't swap, FIXME
            self.score_full_lex(corpus, params)

            return params
Example #25
0
def binBounds2(alpha, a, b, t, kb):

    # possible p values
    p_vals = np.linspace(0, 1, num=int(1 / 0.001) + 1)
    indices = np.arange(len(p_vals))

    # computation of prior
    log_prior_0 = beta.logpdf(p_vals, a, b)

    # computation of posterior
    log_posterior_0 = beta.logpdf(p_vals, a + kb, b + t - kb)

    # martingale computation
    log_martingale_0 = log_prior_0 - log_posterior_0

    # Confidence intervals
    ci_condition_0 = log_martingale_0 < np.log(1 / alpha)
    
    ci_indices_0 = np.copy(indices[ci_condition_0])
    return [p_vals[np.min(ci_indices_0)], p_vals[np.max(ci_indices_0)]]
Example #26
0
    def _py_log_prob(self, zs):
        # This example is written for pedagogy. We recommend
        # vectorizing operations in practice.
        n_minibatch = zs.shape[0]
        lp = np.zeros(n_minibatch, dtype=np.float32)
        for b in range(n_minibatch):
            lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0)
            for n in range(len(self.data)):
                lp[b] += bernoulli.logpmf(self.data[n], p=zs[b, :])

        return lp
Example #27
0
    def _py_log_prob(self, xs, zs):
        # This example is written for pedagogy. We recommend
        # vectorizing operations in practice.
        n_minibatch = zs.shape[0]
        lp = np.zeros(n_minibatch, dtype=np.float32)
        for b in range(n_minibatch):
            lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0)
            for n in range(xs['x'].shape[0]):
                lp[b] += bernoulli.logpmf(xs['x'][n], p=zs[b, :])

        return lp
Example #28
0
    def _py_log_prob(self, xs, zs):
        # This example is written for pedagogy. We recommend
        # vectorizing operations in practice.
        n_samples = zs.shape[0]
        lp = np.zeros(n_samples, dtype=np.float32)
        for b in range(n_samples):
            lp[b] = beta.logpdf(zs[b, :], a=1.0, b=1.0)
            for n in range(xs['x'].shape[0]):
                lp[b] += bernoulli.logpmf(xs['x'][n], p=zs[b, :])

        return lp
    def weight(self, sym, start, end, theta):
        state = (sym, start, end)
        try:
            u = self.slice_variables[state]
        except:
            raise ValueError('I do not expect to reweight a rule for an unseen state: %s' % str(state))

        if theta > u:
            return - beta.logpdf(math.exp(u), self.a, self.b)

        else:
            raise ValueError('I do not expect to reweight rules scoring less than the threshold')
Example #30
0
def player_beta(params, games, date, day_span, decay):
    """
    Likelihood function to determine player beta distribution parameters
    :return: Likelihood
    """

    likelihood = beta.logpdf(games['pts'] / games['team_pts'], params[0],
                             params[1])
    weight = np.exp(-decay * np.ceil(
        ((date - games['date']).dt.days) / day_span))

    return -np.dot(likelihood, weight)
Example #31
0
    def test_diffuse_prior_posterior_correct(self):
        A = self.arr
        bhp2 = BayesianUVExpHawkesProcess((1, 10000), (1, 1), (1, 1e5))

        logpost = bhp2.log_posterior_with_params(A, 5., .2, .1, A[-1])

        check = bhp2.log_likelihood_with_params(A, 5, .2, .1, A[-1]) + \
                gamma.logpdf(5, bhp2.mu_hyp[0], scale=bhp2.mu_hyp[1]) + \
                gamma.logpdf(.1, bhp2.theta_hyp[0], scale=bhp2.theta_hyp[1]) + \
                beta.logpdf(.2, bhp2.alpha_hyp[0], bhp2.alpha_hyp[1])

        self.assertAlmostEqual(logpost, check)
Example #32
0
def admixture_proportion_proposal(graph):
    '''
    return proposed admixture fraction 
    and "log q(x'|x) - log q(x|x')"
    '''
    # beta proposal
    admixture_proportions = graph.get_admixture_proportions()
    proportions = dict()
    qs = 0
    for i, e in enumerate(admixture_proportions):
        mu = admixture_proportions[e]
        admixture_edge_name = f'{e[0]}_{e[1]}_proportion'
        a = mu * 30
        b = (1 - mu) * 30
        p = np.random.beta(a, b, 1)[0]
        proportions[admixture_edge_name] = p
        q_forward = beta.logpdf(p, a, b)
        a = p * 30
        b = (1 - p) * 30
        q_backward = beta.logpdf(mu, a, b)
        qs += q_backward - q_forward
    return proportions, qs
Example #33
0
def _test(model, xs, zs):
    n_samples = zs.shape[0]
    val_true = np.zeros(n_samples, dtype=np.float32)
    for s in range(n_samples):
        p = np.squeeze(zs[s, :])
        val_true[s] = beta.logpdf(p, 1, 1)
        val_true[s] += np.sum([bernoulli.logpmf(x, p) for x in xs['x']])

    val_ed = model.log_prob(xs, zs)
    assert np.allclose(val_ed.eval(), val_true)
    zs_tf = tf.cast(zs, dtype=tf.float32)
    val_ed = model.log_prob(xs, zs_tf)
    assert np.allclose(val_ed.eval(), val_true)
Example #34
0
def get_beta_prior():
    '''
     (2) Beta(1+eps, 1+eps)
        we need to add this prior to the mll probs in the case where 
        we have a flat mll curve('i.e. high regularization')
    '''
    #prior (2)
    eps = 0.0001
    a, b = 1.0 + eps, 1.0 + eps
    gridpoints = np.linspace(0.001, 0.999, 999)
    log_prior = beta.logpdf(gridpoints, a, b)
    assert len(log_prior) == len(infer.trange)
    return log_prior
Example #35
0
def _test(model, xs, zs):
    n_samples = zs.shape[0]
    val_true = np.zeros(n_samples, dtype=np.float32)
    for s in range(n_samples):
        p = np.squeeze(zs[s, :])
        val_true[s] = beta.logpdf(p, 1, 1)
        val_true[s] += np.sum([bernoulli.logpmf(x, p)
                               for x in xs['x']])

    val_ed = model.log_prob(xs, zs)
    assert np.allclose(val_ed.eval(), val_true)
    zs_tf = tf.cast(zs, dtype=tf.float32)
    val_ed = model.log_prob(xs, zs_tf)
    assert np.allclose(val_ed.eval(), val_true)
Example #36
0
    def test_logprob(self):
        # Beta(1,b) = Kumaraswamy(1,b)
        b = torch.exp(Variable(torch.randn(10)))
        a = Variable(torch.ones(10))
        value = Variable(torch.randn(10))
        dist = Kumaraswamy(a, b)

        # test log probability
        res1 = dist.log_prob(value).data
        res2 = beta.logpdf(value.data.numpy(), a.data.numpy(), b.data.numpy())
        res2[np.isinf(res2)] = dist.LOG_0
        self.assertEqual(res1, res2)

        # Beta(a,1) = Kumaraswamy(a,1)
        a = torch.exp(Variable(torch.randn(100)))
        b = Variable(torch.ones(100))
        value = Variable(torch.randn(100))
        dist = Kumaraswamy(a, b)

        # test log probability
        res1 = dist.log_prob(value).data
        res2 = beta.logpdf(value.data.numpy(), a.data.numpy(), b.data.numpy())
        res2[np.isinf(res2)] = dist.LOG_0
        self.assertEqual(res1, res2)
Example #37
0
    def weight(self, sym, start, end, theta):
        state = (sym, start, end)
        try:
            u = self.slice_variables[state]
        except:
            raise ValueError(
                'I do not expect to reweight a rule for an unseen state: %s' %
                str(state))

        if theta > u:
            return -beta.logpdf(math.exp(u), self.a, self.b)

        else:
            raise ValueError(
                'I do not expect to reweight rules scoring less than the threshold'
            )
Example #38
0
    def _compute_loglikelihoods(self, X):
        llh = 0.0
        M = self._n_clusters
        D = X.shape[1]
        #print self.theta_.min(), self.theta_.max()
        for m in range(M):
            for d in range(D):
                #print self.theta_[m,d,0], self.theta_[m,d,1]
                #print X[:,d].min(), X[:,d].max(), self.theta_[m,d]
                llh += np.sum((self.labels_ == m) * \
                    beta.logpdf(X[:,d], self.theta_[m,d,0], self.theta_[m,d,1]))

        #print X.min(), X.max()
        #print self.theta_.min(), self.theta_.max()
        #print 'LLH:::::::', llh

        return llh
    def _compute_loglikelihoods(self, X):
        llh = 0.0
        M = self._n_clusters
        D = X.shape[1]
        #print self.theta_.min(), self.theta_.max()
        for m in range(M):
            for d in range(D):
                #print self.theta_[m,d,0], self.theta_[m,d,1]
                #print X[:,d].min(), X[:,d].max(), self.theta_[m,d]
                llh += np.sum((self.labels_ == m) * \
                    beta.logpdf(X[:,d], self.theta_[m,d,0], self.theta_[m,d,1]))
        
        #print X.min(), X.max()
        #print self.theta_.min(), self.theta_.max()
        #print 'LLH:::::::', llh

        return llh
def _classify(neg_feats, pos_feats, mixture_params):
    from scipy.stats import beta
    M = len(mixture_params)
    collapsed_feats = np.apply_over_axes(np.mean, neg_feats, [0, 1]).ravel()
    collapsed_feats = np.clip(collapsed_feats, 0.01, 1-0.01)
    D = collapsed_feats.shape[0]
    
    qlogs = np.zeros(M)
    for m in xrange(M):
        #v = qlogs[m] 
        v = 0.0
        for d in xrange(D):
            v += beta.logpdf(collapsed_feats[d], mixture_params[m,d,0], mixture_params[m,d,1])
        qlogs[m] = v

    #bkg_id = qlogs.argmax()
    #return bkg_id
    return qlogs
Example #41
0
    def lnpriorfn(self, p):
        lnfs, lnrs, lnms, q1, q2 = p[:5]
        lp = 0.0
        if not ((0 < q1 < 1) and (0 < q2 < 1)):
            return -np.inf
        lp -= 0.5 * (((lnrs - ln_rstar) / ln_rstar_err) ** 2 +
                     ((lnms - ln_mstar) / ln_mstar_err) ** 2)

        lnr, lnp, t0, b, sesn, secs = p[5:]
        if not 0 <= b < 2.0:
            return -np.inf
        if np.exp(lnp) < min_period:
            return -np.inf
        e = sesn**2 + secs**2
        if not 0 <= e < 1.0:
            return -np.inf

        lp += beta.logpdf(e, 1.12, 3.09)
        return lp  # + lnp
Example #42
0
 def _logp(self, value, a, b):
     
     if value < 0 or value > 1:
         raise ValueError("Domain Error.")
     
     return np.sum(beta.logpdf(value, a, b))
Example #43
0
 def logL(ab):
     a0,b0,a1,b1 = ab
     LL = beta.logpdf(p[Y==0],a0,b0).sum() + beta.logpdf(p[Y==1],a1,b1).sum() 
     return -LL
Example #44
0
 def _py_log_prob(self, xs, zs):
   log_prior = beta.logpdf(zs['p'], a=1.0, b=1.0)
   log_lik = np.sum(bernoulli.logpmf(xs['x'], p=zs['p']))
   return log_lik + log_prior
    
    new = rs.normal(old, 0.05) # generate a sample from normal distribution
    
    if new < 0:
        att_symp[i] = old # reject
        ll[i] = -1e10
    else:
        simp_old = exp(-old*tps[:5]) - exp(-old*tps[1:])
        simp_new = exp(-new*tps[:5]) - exp(-new*tps[1:])

        if sum(simp_new > 0) != len(tps) - 1:
            att_symp[i] = old # reject
            ll[i] = -1e10
        else:
            # simulate probabilities corresponding to the data
            log_ratio = sum(beta.logpdf(simp_new, a, b, loc=0, scale=1)) - sum(beta.logpdf(simp_old, a, b, loc=0, scale=1))
    
            if log(rs.uniform(0,1)) <  log_ratio:
                att_symp[i] = new # accept
                ll[i] = sum(beta.logpdf(simp_new, a, b, loc=0, scale=1))
                old = new
                acc = acc+1
            else:
                att_symp[i] = old # reject
                ll[i] = sum(beta.logpdf(simp_old, a, b, loc=0, scale=1))
    
    props[i] = simp_old
    i = i+1
    
att_symp = att_symp[1000:] # remove burn-in samples
ll = ll[1000:] # log-likelihood
Example #46
0
def lnprior(p,ID,Tarr,fmll,minWL,maxWL,minLWL,maxLWL,verbose=False):

    # apply non-informative prior on wavelength to make sure
    # line is not shifted outside working segment
    for ii,pp in enumerate(zip(p['DWL'][Tarr[...,0] != -1],fmll['WL'][Tarr[...,0] != -1])):
        if (np.abs(pp[0]) > 0.0) & (pp[1] > minWL) & (pp[1] < maxWL):
            wlshift = pp[1]+pp[0]
            if (wlshift < minLWL-0.05) or (wlshift > maxLWL+0.05):
                if verbose:
                    print('Pro: {0} --> CAUGHT A WAVELENGTH SHIFT OUTSIDE SPECTRUM BOUNDS {1}-{2}, {3} LINE SHIFTED TO: {4}'.format(ID,minLWL-0.025,maxLWL+0.025,pp[1],wlshift))
                return -np.inf

    # Prior on gamma using beta function
    mingamma = -1.5
    maxgamma = 0.65
    rangegamma = maxgamma-mingamma
    gammawprior = beta.logpdf(p['DGAMMAW'][Tarr[...,2] != -1],1.0,1.0,loc=mingamma,scale=rangegamma)
    gammarprior = beta.logpdf(p['DGAMMAR'][Tarr[...,3] != -1],1.0,1.0,loc=mingamma,scale=rangegamma)
    gammasprior = beta.logpdf(p['DGAMMAS'][Tarr[...,4] != -1],1.0,1.0,loc=mingamma,scale=rangegamma)

    # check to see if it returns any priors outside uniform prior
    if (any(np.isinf(gammawprior)) or 
        any(np.isinf(gammarprior)) or 
        any(np.isinf(gammasprior))) :
        if verbose:
            print('Pro: {0} --> CAUGHT A GAMMA SHIFT OUTSIDE THE PRIORS'.format(ID))
        return -np.inf

    # Prior on gf using beta function
    mingflog = -10.0
    maxgflog = 1.5
    rangegflog = maxgflog-mingflog
    gfprior = beta.logpdf(p['DGFLOG'][Tarr[...,1] != -1],1.0,1.0,loc=mingflog,scale=rangegflog)

    # check to see if it returns any priors outside uniform prior
    if any(np.isinf(gfprior)):
        if verbose:
            print('Pro: {0} --> CAUGHT A LOG(GF) SHIFT OUTSIDE THE PRIORS'.format(ID))
        return -np.inf

    velshift = 50.0 #km/s
    wsh = fmll['WL'][Tarr[...,0] != -1]*(velshift/speedoflight)
    wsh_max = max(wsh)
    minwll = -1.0*wsh_max
    maxwll = wsh_max
    rangewll = maxwll-minwll
    wlprior = beta.logpdf(p['DWL'][Tarr[...,0] != -1],1.0,1.0,loc=minwll,scale=rangewll)

    # check to see if it returns any priors outside uniform prior
    if any(np.isinf(wlprior)):
        if verbose:
            for ii,wlp in enumerate(wlprior):
                if np.isinf(wlp):
                    print('Pro: {0} --> CAUGHT A WAVELENGTH SHIFT OUTSIDE THE PRIORS: line {2} shifted by {1} nm'.format(ID,p['DWL'][ii],fmll['WL'][ii]))
        return -np.inf

    # check to see if arrays are empty, if so add 0.0 so that the sumation works
    if len(gammawprior) == 0:
        gammawprior = [0.0]
    if len(gammarprior) == 0:
        gammarprior = [0.0]
    if len(gammasprior) == 0:
        gammasprior = [0.0]

    # 2-D gaussian prior on delta(log(gf)) and delta(lambda)
    # reparameterize such that they move on space evenly
    sig_dgflog = 0.5
    sig_dWL = 0.05
    gf_wl_prior = (-0.5*((p['DGFLOG'][Tarr[...,1] != -1]/sig_dgflog)**2.0)*((p['DWL'][Tarr[...,0] != -1]/sig_dWL)**2.0)) #- 0.5*np.log(2.0*np.pi*(sig_coup**2.0))

    # RETURN WITH COUPLED PRIOR
    return np.sum(np.hstack([gammawprior,gammarprior,gammasprior,gfprior,wlprior,gf_wl_prior]))
Example #47
0
        def score_full_lex(self,
                           corpus,
                           params,
                           init=False):

            # set up the intent caching
            for i in range(corpus.n_sents):

                # cache word and object probabilities uniformly
                # 1 x o matrix with [uniform ... empty]
                # and 1 x w matrix again with [uniform ... empty]
                n_os = len(corpus.sents[i][0])
                if n_os > 0:
                    unif_o = log((1 - params.empty_intent) / n_os)
                else:
                    unif_o = [None] # protects against zero objects

                self.intent_obj_probs[i] = [unif_o] * n_os + [log(params.empty_intent)]

                if init:
                    # update lexicon dirichlets based on random init
                    io = self.oi[i] == self.intent_obj[i]
                    rw = self.wi[i] == self.ref_word[i]

                    if io.any():  # protect against nulls
                        self.ref[corpus.sents[i][0][io],corpus.sents[i][1][rw]] += 1

                    # includes all words that are not the referential word
                    self.non_ref[corpus.sents[i][1][self.wi[i] != self.ref_word[i]]] += 1

                    # now add the referential words for null objects
                    if not io.any():
                        self.non_ref[corpus.sents[i][1][self.wi[i] == self.ref_word[i]]] += 1


                # now set up the quick scoring probability caches
                self.intent_obj_prob[i] = self.intent_obj_probs[i][self.intent_obj[i]]

            # cache DM scores for lexicon
            for i in range(corpus.world.n_objs):
                self.ref_score[i] = score_dm(self.ref[i, :], params.alpha_r)

            # cache non-ref DM score also
            self.nr_score = score_dm(self.non_ref, params.alpha_nr)

            # score hyperparameters (via hyper-hyperparameters)
            empty_intent_score = beta.logpdf(params.empty_intent, params.intent_hp_a, params.intent_hp_b)
            alpha_score = gamma.logpdf(params.alpha_r, params.alpha_r_hp) + gamma.logpdf(params.alpha_nr,
                                                                                         params.alpha_nr_hp)
            self.param_score = empty_intent_score + alpha_score
            score = self.update_score(corpus.n_sents)

            # debugging stuff
            if self.verbose >= 1:
                print "\n--- score full lex ---"
                print self.ref
                print " " + str(self.non_ref)

                if self.verbose > 1:
                    print "counts: %d" % (sum(self.non_ref) + sum(self.ref))
                    print "    intent obj: " + str(self.intent_obj)
                    print "    ref word: " + str(self.ref_word)
                    print "    intent obj prob: " + str(self.intent_obj_prob.round(1))

                print "full score: r %2.1f, nr %2.1f, i %2.1f, " \
                          "p %2.1f,  total: %2.1f" % (sum(self.ref_score),
                                                      self.nr_score,
                                                      sum(self.intent_obj_prob),
                                                      self.param_score,
                                                      score)


            return score
    def fit(self, X, tol=0.00001, min_probability=0.01, min_q=0.01):
        Xsafe = np.clip(X, min_probability, 1 - min_probability)
        self._init(X, seed=0)
        M = self._n_clusters
        N = X.shape[0]
        D = X.shape[1]


        qlogs = np.zeros((M, N))
        q = np.zeros((M, N))
        v = np.zeros(N)

        loglikelihood = -np.inf
        new_loglikelihood = self._compute_loglikelihoods(Xsafe)
         
        self.iterations = 0
        while self.iterations < 200 and (np.isinf(loglikelihood) or self.iterations < 2 or np.fabs((new_loglikelihood - loglikelihood)/loglikelihood) > tol):
            loglikelihood = new_loglikelihood
            ag.info("Iteration {0}: loglikelihood {1}".format(self.iterations, loglikelihood))
            for m in range(M):
                v = qlogs[m] 
                v[:] = 0.0
                for d in range(D):
                    #print beta.logpdf(Xsafe[:,d], self.theta_[m,d,0], self.theta_[m,d,1])
                    v += beta.logpdf(Xsafe[:,d], self.theta_[m,d,0], self.theta_[m,d,1])
                qlogs[m] = v
                #print v.min(), v.max()
                
            #try:
            try:
                q[:] = np.exp(np.maximum(np.log(min_q), qlogs - logsumexp(qlogs, axis=0)))
            except:
                pass
            #except:
            #    pass
            # Clip it, for saftey
            #print q.min(), q.max()
            q[:] = np.clip(q, min_q, 1 - min_q)

            # Update labels from these responsibilities
            self.labels_ = q.argmax(axis=0) 
            
            # Update thetas with the new labels
            if 0:
                for m in range(M):
                    for d in range(D):
                        Xsafem = Xsafe[self.labels_ == m, d]
                        sm, sv = weighted_avg_and_var(Xsafe[:,d], q[m])
                        #sm = np.mean(Xsafem)
                        #sv = np.var(Xsafem)
                        self.theta_[m,d,0] = sm * (sm * (1 - sm) / sv - 1)
                        self.theta_[m,d,1] = (1 - sm) * (sm * (1 - sm) / sv - 1)
                        if np.isnan(self.theta_[m,d,0]) or np.isnan(self.theta_[m,d,1]):
                            import pdb; pdb.set_trace()
                            raise Exception()

            else:
                for m in range(M):
                    for d in range(D):
                        #from scipy.optimize import newton_krylov, nonlin
                        from scipy.special import psi

                        Ca = np.average(np.log(Xsafe[:,d]), weights=q[m])
                        Cb = np.average(np.log(1-Xsafe[:,d]), weights=q[m])
                        a, b = self.theta_[m,d]

                            #self.theta_[m,d,0] = newton_krylov(lambda x: (psi(x) - psi(x+b)) - Ca, 1.0)
        
                        self.theta_[m,d,0] = binary_search(lambda x: (psi(x) - psi(x+b)) - Ca, 0.0001, 10000.0, maxiter=20)
    

                        self.theta_[m,d,1] = binary_search(lambda x: (psi(x) - psi(x+a)) - Cb, 0.0001, 10000.0, maxiter=20)

                        # Make sure the alpha and the beta don't get too extreme. If one needs adjusting, we need
                        # adjust both, to preserve its mean
                            
                        #C = np.average(    

            #self.theta_ = np.clip(self.theta_, 0.1, 100.0)
        
            # Calculate log-likelihood
            new_loglikelihood = self._compute_loglikelihoods(Xsafe)
            self.iterations += 1

        ag.info("Iteration DONE: loglikelihood {}".format(new_loglikelihood))

        if 1:
            # Now fit constrained betas using this distribution
            #params = np.asarray([(b+a, b+c-a) for b in np.linspace(1, 2, 5) for c in np.linspace(1, 50, 10) for a in np.linspace(0, c, P)])

            #def fit_beta(cls, X):
        
            #import pdb; pdb.set_trace()
    
            for m in range(M):
                Xm = Xsafe[self.labels_ == m]
                self.theta_[m] = self.fit_beta_atleast_std(Xm, 0.225)

            #for m in xrange(M):
            #    for d in xrange(D):
                    

        if 0:
            for m in range(M):
                for d in range(D):
                    if self.theta_[m,d].max() > 50:
                        self.theta_[m,d] /= self.theta_[m,d].max() / 50