def getTrueDistn(true_probs):
    if true_probs == None:
        football_data = pd.read_csv(
            os.path.join(os.environ['HOME'],
                         'Bayesian_Inference/csv/EPL20172018.csv'))
        naive_probs = football_data['FTR'].value_counts(normalize=True)
        true_distn = multinomial(1, naive_probs)
    else:
        true_distn = multinomial(1, true_probs)
    return true_distn
Esempio n. 2
0
async def generate_event_2(users, producer):
    m = {0: "good", 1: "neutral", 2: "bad"}

    n11 = norm(10, 2)
    n12 = norm(20,5)
    rv = multinomial(1, [0.3, 0.2, 0.5])

    def gen_event(user):
        return (
            "user_%s" % user,
            {
                "userId": "user_%s" % user,
                "userValue3": round(n11.rvs() if user % 4 == 0 else n12.rvs(), 2),
                "userValue4": m[int(np.argmax(rv.rvs()))],
                "timestamp": int((datetime.utcnow() - datetime(1970, 1, 1)).total_seconds() * 1000)
            }
        )

    while True:
        size = random.randint(10,20)
        print("Event 2", size)

        for user in random.sample(users, size):
            user_id, user_event = gen_event(user)
            producer.produce(topic = "dev-v1-avro-event2",  key = {"user": user_id}, value=user_event)

        await asyncio.sleep(5)
def data_generating_process(N,
                            sigma_0,
                            p_domain,
                            gamma,
                            V,
                            theta,
                            coef,
                            beta=None,
                            random_state=None):
    """

    """
    ## Set Random State
    if random_state is not None:
        np.random.seed(random_state)
    ## Update Beta
    if beta is None:
        beta = 1 / V
    ## Convert Data Types
    theta = np.array(theta)
    coef = np.array(coef)
    ## Normalization of Parameters
    theta = theta / theta.sum(axis=1, keepdims=True)
    ## Update Document Topic Concentration
    theta = theta * sigma_0
    ## Generate Topic-Word Distributions
    phi = stats.dirichlet([beta] * V).rvs(theta.shape[1])
    ## Data Storage
    X_latent = np.zeros((N, coef.shape[1]), dtype=float)
    X = np.zeros((N, phi.shape[1]), dtype=int)
    D = np.zeros(N, dtype=int)
    ## Sample Procedure
    for n in tqdm(range(N), "Sampling"):
        ## Sample Domain
        D[n] = int(np.random.rand() < p_domain)
        ## Sample Document Topic Mixture (Conditioned on Domain)
        X_latent[n] = stats.dirichlet(theta[D[n]]).rvs()
        ## Sample Number of Words
        n_d = stats.poisson(gamma).rvs()
        ## Create Document
        for _ in range(n_d):
            ## Sample Topic
            z = np.where(stats.multinomial(1, X_latent[n]).rvs()[0] > 0)[0][0]
            ## Sample Word
            w = np.random.choice(phi.shape[1], p=phi[z])
            ## Cache
            X[n, w] += 1
    ## Standardize
    X_latent_normed = standardize(X_latent, D)
    ## Compute P(y)
    py = np.zeros(N)
    py[D == 0] = (1 /
                  (1 + np.exp(-coef[[0]].dot(X_latent_normed[D == 0].T))))[0]
    py[D == 1] = (1 /
                  (1 + np.exp(-coef[[1]].dot(X_latent_normed[D == 1].T))))[0]
    ## Sample Y
    y = np.zeros(N)
    y[D == 0] = (np.random.rand((D == 0).sum()) < py[D == 0]).astype(int)
    y[D == 1] = (np.random.rand((D == 1).sum()) < py[D == 1]).astype(int)
    return X_latent, X, y, D, theta, phi
Esempio n. 4
0
def compare_case_control(case_counts, control_counts):
    results = []
    for pos in case_counts:
        this_case_counts = case_counts[pos]
        case_A = this_case_counts.A
        case_T = this_case_counts.T
        case_G = this_case_counts.G
        case_C = this_case_counts.C
        case_total = case_A + case_T + case_G + case_C
        if pos in control_counts:
            this_control_counts = control_counts[pos]
            control_A = this_control_counts['A']
            control_T = this_control_counts['T']
            control_G = this_control_counts['G']
            control_C = this_control_counts['C']
            control_total = control_A + control_T + control_G + control_C
            control_A_proportion = control_A / control_total
            control_T_proportion = control_T / control_total
            control_G_proportion = control_G / control_total
            control_C_proportion = control_C / control_total
            rv = multinomial(case_total, [
                control_A_proportion, control_T_proportion,
                control_G_proportion, control_C_proportion
            ])
            probability = rv.pmf([case_A, case_T, case_G, case_C])
            if probability <= 0.001:
                results.append(
                    (probability, pos, case_A, case_T, case_G, case_C,
                     control_A, control_T, control_G, control_C))
    return results
Esempio n. 5
0
    def sample(self, batch_size, buckets=False):
        """Samples a batch of datapoints.

        Args:
            batch_size (int): Number of datapoints to sample.
            buckets (bool): Indicates if buckets indices should be returned.

        Returns:
            Datapoint object with sampled datapoints stacked along the 0 axis.

        Raises:
            ValueError: If the buffer is empty.
        """
        if self._hierarchy_depth > 1:
            samples = [self._sample_one() for _ in range(batch_size)]
        else:
            p = np.ones((len(self._buffer_hierarchy), ), dtype=np.float32)
            p = np.atleast_1d(p) / p.sum()
            samples = []
            distribution = multinomial(batch_size, p=p)
            rvs = distribution.rvs(1).squeeze(axis=0)
            for bucket, n_samples in zip(self._buffer_hierarchy, rvs):
                if self._hierarchy_depth > 0:
                    buffer = self._buffer_hierarchy[bucket]
                else:
                    buffer = self._buffer_hierarchy
                samples_b = buffer.sample(n_samples)
                if buckets:
                    samples_b = samples_b + (np.full(n_samples, bucket), )
                samples.append(samples_b)

        return data.nested_concatenate(samples)
Esempio n. 6
0
    def proba(self, s):
        """
        Given a state observation :math:`s`, return a proability distribution
        over all possible actions.

        Parameters
        ----------
        s : state observation
            Depending on the observation space, `s` may be an integer or an
            array of floats.

        Returns
        -------
        dist : scipy.stats probability distribution
            Depending on the action space, this may be a discrete distribution
            (typically a Dirichlet distribution) or a continuous distribution
            (typically a normal distribution).

        """
        X_s = self.X_next(s)
        P = self.batch_eval(X_s)
        if isinstance(self.env.action_space, Discrete):
            return st.multinomial(n=1, p=P[0])
        else:
            raise NotImplementedError(
                "I haven't yet implemented continuous action spaces; "
                "please send me a message to let me know if this is holding "
                "you back. -kris")
Esempio n. 7
0
def getMargin(comps):
    # this implementation: only Beta, Weibull and Multinomial (cat)
    mtype = comps['mtype']
    params = comps['params']
    c_nr = comps['comps_nr']
    if mtype == 'Beta':
        dists = []
        for c_id in range(c_nr):
            #labels = ['a','b']
            param = params[c_id][:2]
            dist = stt.beta(a=param[0], b=param[1])
            dists.append(dist)
    elif mtype == 'Weibull':
        dists = []
        for c_id in range(comps['comps_nr']):
            #labels = ['c','scale']
            param = [params[(p_id * c_nr) + c_id] for p_id in range(2)]
            dist = stt.weibull_min(param[0], 0., param[1])
            dists.append(dist)
    elif mtype == 'Multinomial':
        # IMPLEMENTAR
        dists = [stt.multinomial(1, params)]
    try:
        dists[0].interval(1.)
    except:
        domain = []
    else:
        domains = np.array(
            [dists[d_id].interval(1.) for d_id in range(len(dists))])
        domain_raw = [domains[:, 0].max(), domains[:, 1].min()]
        delta = 0.001 * (domain_raw[1] - domain_raw[0])
        domain = [domain_raw[0] + delta, domain_raw[1] - delta]
    return dists, domain
Esempio n. 8
0
def calc_full_log_likelihood(count_matrix,
                             node_membership,
                             duration,
                             bp_lambda,
                             num_classes,
                             add_com_assig_log_prob=True):
    """
    Calculates the full log likelihood of the Poisson baseline model.

    :param count_matrix: n_classes x n_classes where entry ij is denotes the number of events in block-pair ij
    :param node_membership: (list) membership of every node to one of K classes
    :param duration: (int) duration of the network
    :param bp_lambda: n_classes x n_classes where entry ij is the lambda of the block pair ij
    :param num_classes: (int) number of blocks / classes
    :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood.

    :return: log-likelihood of the Poisson baseline model
    """
    log_likelihood = 0

    bp_size = utils.calc_block_pair_size(node_membership, num_classes)
    bp_ll = count_matrix * np.log(bp_lambda) - (bp_lambda * duration * bp_size)
    log_likelihood += np.sum(bp_ll)

    if add_com_assig_log_prob:
        # Adding the log probability of the community assignments to the full log likelihood
        n_nodes = len(node_membership)
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob_mle = block_count / sum(block_count)
        rv_multi = multinomial(n_nodes, class_prob_mle)
        log_prob_community_assignment = rv_multi.logpmf(block_count)

        log_likelihood += log_prob_community_assignment

    return log_likelihood
Esempio n. 9
0
def initial_logp(states, p_transition):
    initial_state = states[0]
    states_oh = np.eye(len(p_transition))
    eq_p = equilibrium_distribution(p_transition)
    return (
        multinomial(n=1, p=eq_p)
        .logpmf(states_oh[initial_state].squeeze())
    )
Esempio n. 10
0
def BRIE_base_lik(psi, counts, lengths):
    """Base likelihood function of BRIE model
    """
    size_vect = np.array([psi, (1 - psi), 1]) * lengths
    prob_vect = size_vect / np.sum(size_vect)

    rv = multinomial(np.sum(counts), prob_vect)
    return rv.pmf(counts)
Esempio n. 11
0
def main():
    N = 1000
    s1 = norm(0,1).rvs(size=N)
    s2 = binom(100,0.1).rvs(size=N)
    s3 = gamma(1,1).rvs(size=N)
    s4 = beta(1,2).rvs(size=N)
    s5 = multinomial(200,[1/3, 1/3, 1/3]).rvs(size=N)
    return s1[-1]+s2[-1]+s3[-1]+s4[-1]+s5[-1]
Esempio n. 12
0
def MixedNormalDistribution(weights, means, covariances, size):
    cases = multinomial(1, weights).rvs(size)
    rvs = []
    for case in cases:
        k = case.tolist().index(1)
        rvs.append(multivariate_normal.rvs(means[k], covariances[k]))
    rvs = np.asarray(rvs)
    return [rvs[:, k] for k in range(len(rvs[0]))]
Esempio n. 13
0
def state_logp(states, p_transition):
    logp = 0

    # states are 0, 1, 2, but we model them as [1, 0, 0], [0, 1, 0], [0, 0, 1]
    states_oh = np.eye(len(p_transition))
    for curr_state, next_state in zip(states[:-1], states[1:]):
        p_tr = p_transition[curr_state]
        logp += multinomial(n=1, p=p_tr).logpmf(states_oh[next_state])
    return logp
 def GeneratePoint(self, probs):
     mb = ss.multinomial(
         1, probs)  # set n = 1 in multinomial to simulate an multi-bernulli
     state = np.flatnonzero(
         mb.rvs(1)[0])[0] + 1  # should always be 1 in this sample
     self.states.append(state)
     gaussian = self.rvs[state - 1]
     point = gaussian.rvs(1)
     self.points.append(point)
Esempio n. 15
0
def resample(observation_states,
             observation_actions,
             support_states,
             support_policy,
             prior=None,
             alpha=1.,
             punishment=0.,
             support_feature_ids=None,
             support_feature_state_dict=None,
             observation_goal_actions=None,
             T=1.,
             return_best=False,
             **kwargs):
    """
    \sum p(O_i | g_z_i) x p(g_j)
    """
    # prob_vector over support_states or feature ids
    prob_vector  = likelihood_vector(observation_states, observation_actions,
                                     support_policy, \
                                     support_states, alpha=alpha, punishment=punishment,
                                     support_feature_ids=support_feature_ids,
                                     support_feature_state_dict=support_feature_state_dict,
                                     observation_goal_actions=observation_goal_actions,)

    if prior is not None: prob_vector *= prior

    if return_best:
        chosen = np.argmax(prob_vector)
    else:
        prob_vector /= (np.sum(prob_vector) + eps)
        prob_vector = prob_vector**T
        prob_vector /= (np.sum(prob_vector) + eps)
        prob_sum = np.sum(prob_vector)

        while prob_sum < 0.99:
            prob_vector /= (np.sum(prob_vector) + eps)
            prob_sum = np.sum(prob_vector)
        try:
            assert prob_sum >= 0.95, "prob sum {} is lower than 1.".format(
                prob_sum)
        except:
            from IPython import embed
            embed()
            sys.exit()
        rv = multinomial(n=1, p=prob_vector)
        chosen = np.argmax(rv.rvs(1))

    if support_feature_ids is None:
        goal_chosen = support_states[chosen]
        policy_chosen = support_policy[goal_chosen]
        return [goal_chosen, policy_chosen]
    else:
        #chosen is feature id
        goal_chosen = support_feature_state_dict[chosen]
        policy_chosen = support_policy[goal_chosen[0]]
        return [goal_chosen[0], policy_chosen, chosen]
Esempio n. 16
0
def test_entropy():
    """
    Test entropy.
    """
    cat_benchmark = stats.multinomial(n=1, p=[0.7, 0.3])
    expect_entropy = cat_benchmark.entropy().astype(np.float32)
    entropy = EntropyH()
    output = entropy()
    tol = 1e-6
    assert (np.abs(output.asnumpy() - expect_entropy) < tol).all()
Esempio n. 17
0
 def test_entropy_scalar(self):
     # The TFP Multinomial does not implement `entropy`, so we use scipy for
     # the tests.
     probs = np.asarray([0.1, 0.5, 0.4])
     total_count = 5
     scipy_entropy = stats.multinomial(n=total_count, p=probs).entropy()
     distrax_entropy_fn = self.variant(lambda x, y: multinomial.Multinomial.
                                       _entropy_scalar(total_count, x, y))
     self.assertion_fn(distrax_entropy_fn(probs, np.log(probs)),
                       scipy_entropy)
Esempio n. 18
0
def mnll(true_counts, logits=None, probs=None):
    """
        Compute the multinomial negative log-likelihood between true
        counts and predicted values of a BPNet-like profile model
        
        One of `logits` or `probs` must be given. If both are
        given `logits` takes preference.

        Args:
            true_counts (numpy.array): observed counts values
            
            logits (numpy.array): predicted logits values
            
            probs (numpy.array): predicted values as probabilities
          
        Returns:
            float: cross entropy
    
    """

    dist = None

    if logits is not None:

        # check for length mismatch
        if len(logits) != len(true_counts):
            raise quietexception.QuietException(
                "Length of logits does not match length of true_counts")

        # convert logits to softmax probabilities
        probs = logits - logsumexp(logits)
        probs = np.exp(probs)

    elif probs is not None:

        # check for length mistmatch
        if len(probs) != len(true_counts):
            raise quietexception.QuietException(
                "Length of probs does not match length of true_counts")

        # check if probs sums to 1
        if abs(1.0 - np.sum(probs)) > 1e-3:
            raise quietexception.QuietException(
                "'probs' array does not sum to 1")

    else:

        # both 'probs' and 'logits' are None
        raise quietexception.QuietException(
            "At least one of probs or logits must be provided. "
            "Both are None.")

    # compute the nmultinomial distribution
    mnom = multinomial(np.sum(true_counts), probs)
    return -(mnom.logpmf(true_counts) / len(true_counts))
Esempio n. 19
0
    def weighted_random_sampling(qnodes, coeffs, shots, argnums, *args,
                                 **kwargs):
        """Returns an array of length ``shots`` containing single-shot estimates
        of the Hamiltonian gradient. The shots are distributed randomly over
        the terms in the Hamiltonian, as per a multinomial distribution.

        Args:
            qnodes (Sequence[.QNode]): Sequence of QNodes, each one when evaluated
                returning the corresponding expectation value of a term in the Hamiltonian.
            coeffs (Sequence[float]): Sequences of coefficients corresponding to
                each term in the Hamiltonian. Must be the same length as ``qnodes``.
            shots (int): The number of shots used to estimate the Hamiltonian expectation
                value. These shots are distributed over the terms in the Hamiltonian,
                as per a Multinomial distribution.
            argnums (Sequence[int]): the QNode argument indices which are trainable
            *args: Arguments to the QNodes
            **kwargs: Keyword arguments to the QNodes

        Returns:
            array[float]: the single-shot gradients of the Hamiltonian expectation value
        """

        # determine the shot probability per term
        prob_shots = np.abs(coeffs) / np.sum(np.abs(coeffs))

        # construct the multinomial distribution, and sample
        # from it to determine how many shots to apply per term
        si = multinomial(n=shots, p=prob_shots)
        shots_per_term = si.rvs()[0]

        grads = []

        for h, c, p, s in zip(qnodes, coeffs, prob_shots, shots_per_term):

            # if the number of shots is 0, do nothing
            if s == 0:
                continue

            # set the QNode device shots
            h.device.shots = [(1, s)]

            jacs = []
            for i in argnums:
                j = qml.jacobian(h, argnum=i)(*args, **kwargs)

                if s == 1:
                    j = np.expand_dims(j, 0)

                # Divide each term by the probability per shot. This is
                # because we are sampling one at a time.
                jacs.append(c * j / p)

            grads.append(jacs)

        return [np.concatenate(i) for i in zip(*grads)]
Esempio n. 20
0
 def test_entropy(self, dist_params):
     # The TFP Multinomial does not implement `entropy`, so we use scipy for
     # the tests.
     dist_params.update({
         'total_count': np.asarray([3, 10]),
     })
     dist = self.distrax_cls(**dist_params)
     entropy = list()
     for probs, counts in zip(dist.probs, dist.total_count):
         entropy.append(stats.multinomial(n=counts, p=probs).entropy())
     self.assertion_fn(self.variant(dist.entropy)(), np.asarray(entropy))
def calc_full_log_likelihood(block_pair_events,
                             node_membership,
                             bp_mu,
                             bp_alpha,
                             bp_beta,
                             duration,
                             num_classes,
                             add_com_assig_log_prob=True):
    """
    Calculates the full log likelihood of the CHIP model.

    :param block_pair_events: (list) n_classes x n_classes where entry ij is a list of event lists between nodes in
                              block i to nodes in block j.
    :param node_membership: (list) membership of every node to one of K classes.
    :param bp_mu: n_classes x n_classes where entry ij is the mu of the block pair ij
    :param bp_alpha: n_classes x n_classes where entry ij is the alpha of the block pair ij
    :param bp_beta: n_classes x n_classes where entry ij is the beta of the block pair ij
    :param duration: (int) duration of the network
    :param num_classes: (int) number of blocks / classes
    :param add_com_assig_log_prob: if True, adds the likelihood the community assignment to the total log-likelihood.

    :return: log-likelihood of the CHIP model
    """

    log_likelihood = 0
    for b_i in range(num_classes):
        for b_j in range(num_classes):
            bp_size = len(np.where(node_membership == b_i)[0]) * len(
                np.where(node_membership == b_j)[0])
            if b_i == b_j:
                bp_size -= len(np.where(node_membership == b_i)[0])

            log_likelihood += estimate_utils.block_pair_full_hawkes_log_likelihood(
                block_pair_events[b_i][b_j],
                bp_mu[b_i, b_j],
                bp_alpha[b_i, b_j],
                bp_beta[b_i, b_j],
                duration,
                block_pair_size=bp_size)

    if add_com_assig_log_prob:
        # Adding the log probability of the community assignments to the full log likelihood
        n_nodes = len(node_membership)
        _, block_count = np.unique(node_membership, return_counts=True)
        class_prob_mle = block_count / sum(block_count)
        rv_multi = multinomial(n_nodes, class_prob_mle)
        log_prob_community_assignment = rv_multi.logpmf(block_count)

        log_likelihood += log_prob_community_assignment

    return log_likelihood
Esempio n. 22
0
    def makeRGB(self, s):
        #random.seed(10)
        N = random.randint(1, self._max_block)
        mu = [1 / 3, 1 / 3, 1 / 3]
        rv = multinomial(N, mu, seed=s)
        X = rv.rvs(1)

        self._RED = X[0][0]
        self._GREEN = X[0][1]
        self._BLUE = X[0][2]
        self._N = self._RED + self._GREEN + self._BLUE
        self._orderdate = int(s / 10) + 1  #시간임의로 설정?#datetime.now()
        #print(self._fill_date)
        self.print_info()
Esempio n. 23
0
 def posterior_mean(self, data):
     N = np.sum(data)
     p_hat = np.zeros(self.data_model.get_params_count())
     margin = 0
     for p, bscc_dist in zip(self.traces, self.traces_bscc_dist):
         # P = self.data_model.eval_bscc(p)
         prior = 1
         for p_i in p:
             prior *= beta(self.hyperparams['alpha'],
                           self.hyperparams['beta']).pdf(p_i)
         # llh = multinomial(N, P).pmf(data)
         llh = multinomial(N, bscc_dist).pmf(data)
         margin += llh * prior
         p_hat += np.array(p) * llh * prior
     p_hat = p_hat / margin
     log_llh = self.np_llh(self.data_model.eval_bscc(p_hat), data)
     return p_hat, log_llh
Esempio n. 24
0
    def __init__(self, n, p):
        if n >= 0 and isinstance(n, numbers.Integral):
            self.n = n
        #elif n == 0:
        #raise NotImplementedError
        #TODO
        else:
            raise Exception("n must be a non-negative integer")

        if sum(p) == 1 and min(p) >= 0:
            self.p = p
        else:
            raise Exception("Elements of p must be non-negative" +
                            " and sum to 1.")

        self.discrete = False
        self.pdf = lambda x: stats.multinomial(x, n, p)
Esempio n. 25
0
 def posterior_mean(self, data):
     N = np.sum(data)
     p_hat = np.zeros(self.data_model.get_params_count())
     margin = 0
     for p in self.traces:
         P = self.data_model.sample_run_chain(p, max_trials=self.max_trials)
         prior = 1
         for p_i in p:
             prior *= beta(self.hyperparams['alpha'],
                           self.hyperparams['beta']).pdf(p_i)
         llh = multinomial(N, P).pmf(data)
         margin += llh * prior
         p_hat = p_hat + np.array(p) * llh * prior
     p_hat = p_hat / margin
     log_llh = self.np_llh(self.data_model.sample_run_chain(
         p_hat, max_trials=self.max_trials*10), data)
     return p_hat, log_llh
Esempio n. 26
0
def generate_data(n_gen):
    x_gen = stats.skewnorm.rvs(scale=0.1, size=n_gen * x_dim, a=2)
    x_gen = x_gen.reshape((n_gen, x_dim))

    mu_gen = np.apply_along_axis(func, 1, x_gen)

    y_gen = stats.skewnorm.rvs(loc=beta0, scale=sigma, size=n_gen, a=4)
    y_gen = mu_gen + y_gen

    rv = stats.multinomial(1, [0.4, 0.3, 0.2, 0.1])
    y_gen += (rv.rvs(n_gen) * [1.4, -.2, 0, -1.8]).sum(1)

    y_gen = np.array(y_gen, dtype='f4')
    y_gen = torch.from_numpy(y_gen)
    y_gen = F.sigmoid(y_gen).numpy()

    return x_gen, y_gen[:, None]
Esempio n. 27
0
def Sample_Mixture(f, num):
    """
    Inputs:
        f: the mixture to be sampled
        num: the number of points to sample from the mixture
    Outputs:
        m_points: the model points that have been selected (Model_Points)
    """
    #so we need a multinomial distribution for component selection
    m_all = []
    q_pick = multinomial(num, f.w).rvs(1)[0]
    for j in range(f.n):
        fj = multivariate_normal(mean=f.m[j, :], cov=f.cov[j, :, :])
        m_j = fj.rvs(q_pick[j]).reshape([q_pick[j], f.d])
        m_all.append(m_j)
    m_points = Model_Points(np.concatenate(m_all))
    return m_points
Esempio n. 28
0
    def _fit_multinomial(self, X, col_idx, y):
        """
        Fits classwise multinomial distributions to `X[:, col_idx]`
        using the sample parameter MLEs.

        Parameters
        ----------
        X : np.ndarray
            Matrix of features.

        col_idx : int
            The column index for the column to fit the multinomial to.

        y : np.ndarray
            Vector of target classes
        """
        fitted_distributions = {}
        all_X_values = list(range(int(X[:, col_idx].max()) + 1))
        # For each class...
        for val in sorted(set(y)):
            n = np.sum(y == val)  # Number of instances in the class
            relevant_subset = X[y == val,
                                col_idx]  # Rows in X belonging to class
            value_counts = Counter(
                relevant_subset)  # Counts of the values in X in the class
            all_x_value_counts_smoothed = OrderedDict({
                x_val: self.alpha  # Just alpha if no values
                if x_val not in value_counts else value_counts[x_val] +
                self.alpha  # Alpha + Num value occurences otherwise
                for x_val in
                all_X_values  # across the values in the column of X
            })
            # n + Alpha * m
            normalizer = n + self.alpha * len(all_X_values)

            # Create the distribution for each class.
            fitted_distributions[val] = stats.multinomial(
                n=n,
                p=np.array(list(all_x_value_counts_smoothed.values())) /
                normalizer)

        if self.verbose:
            logger.info(f"Fitted multinomials for column {col_idx}")
            for k, v in fitted_distributions.items():
                logger.info(f"Class: {k} p: {np.round(v.p, 2)}")
        return fitted_distributions
Esempio n. 29
0
def P_tot(l, n_array, pr1, pr2):
    p0, p1, p2, p3 = Ps(pr1, pr2)
    va = multinomial(l, [p0, p1, p2, p3])
    lista_soma = []
    for h in range(0, n_array.size):
        n = n_array[h]
        soma = 0
        for k in range(0, int(n / 3) + 1):
            for j in range(0, int(n / 2) + 1):
                i = 0
                while i + 2 * j + 3 * k <= n:
                    if i + 2 * j + 3 * k == n:
                        soma += va.pmf([l - (i + j + k), i, j, k])
                    i += 1
        lista_soma.append(soma)
    n_soma = np.array(lista_soma)
    return n_soma
Esempio n. 30
0
    def __compute_B(self, data):
        self.multinomial = [multinomial(1, self.eta[i, :]) for i in range(self.K)]
        self.b = np.zeros((self.T, self.K))
        for t in range(self.T):
            self.b[t, :] = [self.eta[y, int(data[t, y])] for y in range(self.K)]
        '''
        T = len(data)
        self.b = np.zeros((T, self.K))
        for t in range(T):
            print(data[t,:])
            self.b[t, :] = [self.eta[y, data[t, :]] for y in range(self.K)]
        '''

        # other computation for log-scale
        if self.hmm_type == 'log-scale':
            self.log_b = np.zeros((self.T, self.K))
            for t in range(self.T):
                self.log_b[t, :] = np.log(self.b[t, :])
        return