def merge_final_log_probs(self, source1_decoder_attention_score, 
                              source2_decoder_attention_score,
                              source1_local_words_ids, 
                              source2_local_words_ids, 
                              gate_score):
        """
        根据三个概率,计算全词表上的对数似然。
        """
        # 获取group_size和两个序列的长度
        group_size, seq_max_len_1 = source1_decoder_attention_score.size()
        group_size, seq_max_len_2 = source2_decoder_attention_score.size()

        # 需要和source1相乘的gate概率,shape: (group_size, seq_max_len_1)
        gate_1 = gate_score.expand(seq_max_len_1, -1).t()
        # 需要和source2相乘的gate概率,shape: (group_size, seq_max_len_2)
        gate_2 = (1 - gate_score).expand(seq_max_len_2, -1).t()

        # 加权后的source1分值,shape: (group_size, seq_max_len_1)
        source1_decoder_attention_score = source1_decoder_attention_score * gate_1
        # 加权后的source2分值,shape: (group_size, seq_max_len_2)
        source2_decoder_attention_score = source2_decoder_attention_score * gate_2

        # shape: (group_size, seq_max_len_1)
        log_probs_1 = (source1_decoder_attention_score + 1e-45).log()
        # shape: (group_size, seq_max_len_2)
        log_probs_2 = (source2_decoder_attention_score + 1e-45).log()
        
        # 初始化全词表上的概率为全0, shape: (group_size, target_vocab_size)
        final_log_probs = (source1_decoder_attention_score.new_zeros((group_size,
                2 * self.max_seq_len)) + 1e-45).log()

        for i in range(seq_max_len_1):  # 遍历source1的所有时间步
            # 当前时间步的预测概率,shape: (group_size, 1)
            log_probs_slice = log_probs_1[:, i].unsqueeze(-1)
            #print(log_probs_slice)
            # 当前时间步的token ids,shape: (group_size, 1)
            source_to_target_slice = source1_local_words_ids[:, i].unsqueeze(-1)
            #print(source_to_target_slice)

            # 选出要更新位置,原有的词表概率,shape: (group_size, 1)
            #print(source_to_target_slice.shape,"\t",final_log_probs.shape)
            selected_log_probs = final_log_probs.gather(-1, source_to_target_slice)
            # 更新后的概率值(原有概率+更新概率,混合),shape: (group_size, 1)
            combined_scores = logsumexp(torch.cat((selected_log_probs,
                    log_probs_slice), dim=-1)).unsqueeze(-1)
            # 将combined_scores设置回final_log_probs中
            final_log_probs = final_log_probs.scatter(-1, source_to_target_slice, combined_scores)
        
        # 对source2也同样做一遍
        for i in range(seq_max_len_2):
            log_probs_slice = log_probs_2[:, i].unsqueeze(-1)
            source_to_target_slice = source2_local_words_ids[:, i].unsqueeze(-1)
            selected_log_probs = final_log_probs.gather(-1, source_to_target_slice)
            combined_scores = logsumexp(torch.cat((selected_log_probs,
                    log_probs_slice), dim=-1)).unsqueeze(-1)
            final_log_probs = final_log_probs.scatter(-1, source_to_target_slice, combined_scores)
        
        return final_log_probs
Example #2
0
    def loss(self, input, target, mask=None):
        '''

        Args:
            input: Tensor
                the input tensor with shape = [batch, length, input_size]
            target: Tensor
                the tensor of target labels with shape [batch, length]
            mask:Tensor or None
                the mask tensor with shape = [batch, length]

        Returns: Tensor
                A 1D tensor for minus log likelihood loss
        '''
        batch, length, _ = input.size()
        energy = self.forward(input, mask=mask)
        # shape = [length, batch, num_label, num_label]
        energy_transpose = energy.transpose(0, 1)
        # shape = [length, batch]
        target_transpose = target.transpose(0, 1)
        # shape = [length, batch, 1]
        mask_transpose = None
        if mask is not None:
            mask_transpose = mask.unsqueeze(2).transpose(0, 1)

        # shape = [batch, num_label]
        partition = None

        if input.is_cuda:
            # shape = [batch]
            batch_index = torch.arange(0, batch).long().cuda()
            prev_label = torch.cuda.LongTensor(batch).fill_(self.num_labels - 1)
            tgt_energy = torch.zeros(batch).cuda()
        else:
            # shape = [batch]
            batch_index = torch.arange(0, batch).long()
            prev_label = torch.LongTensor(batch).fill_(self.num_labels - 1)
            tgt_energy = torch.zeros(batch)

        for t in range(length):
            # shape = [batch, num_label, num_label]
            curr_energy = energy_transpose[t]
            if t == 0:
                partition = curr_energy[:, -1, :]
            else:
                # shape = [batch, num_label]
                partition_new = utils.logsumexp(curr_energy + partition.unsqueeze(2), dim=1)
                if mask_transpose is None:
                    partition = partition_new
                else:
                    mask_t = mask_transpose[t]
                    partition = partition + (partition_new - partition) * mask_t
            tgt_energy += curr_energy[batch_index, prev_label, target_transpose[t].data]
            prev_label = target_transpose[t].data

        return utils.logsumexp(partition, dim=1) - tgt_energy
Example #3
0
    def gammaKsi(self, logB):
        """ Compute gamma (posterior distribution) and Ksi (joint succesive 
            posterior distrbution) values.
        
        gamma [i,n] =  conditional probability of the event state 'i'
        at time 'n', given the complete observation sequence.
        
        ksi[n,i,j]  = joint posterior probability of two succesive hidden
        states 'i' and 'j' at time 'n'.          
            
    
        Parameters
        ----------
        logB : ndarray
            The observation probability matrix in logarithmic space.

    
        Returns
        -------
        llh : float
            The normalized log-likelihood.
        logGamma : ndarray
            The log posterior distribution.
        logKsi : ndarray
            The log joint posterior probability distribution.
        logAlpha : ndarray
            The log scaled alpha distribution. 
        logBeta : ndarray
            The log scaled beta distribution.             
                 
    
        """          

        K = self.K        
        N = logB.shape[1]                
        logKsi = np.zeros(( N-1, K, K), dtype=np.float)
        logGamma = np.zeros((K,N), dtype = np.float)        
        logAlpha = self.alpha(logB)        
        logBeta = self.beta(logB)      
        loglikelihood = logsumexp(logAlpha[:,-1])         
        #compute gamma 
        logGamma = (logAlpha + logBeta)    
        logGamma-=logsumexp(logGamma,0)
        #compute ksi
        for n in xrange (N-1):
            temp=logB[:,n+1] + logBeta[:,n+1]  
            logKsi[n,:,:] = (logAlpha[:,n][:,np.newaxis] + 
                                self.logA[:-1][:,:] + temp)
            logKsi[n, :, :] -= logsumexp(logKsi[n, :, :].flatten())    
        return (loglikelihood, logGamma , logKsi,logAlpha,logBeta)        
Example #4
0
    def compute_estimator(self, log_p_all, log_q_all):
        n_samples = tt.shape(log_p_all)[1]

        # See equation 14, for definition of I see equation 2
        f_x_h = log_p_all - log_q_all  # f_x_h: (batch_size, n_samples)
        sum_p_over_q = logsumexp(f_x_h, axis=1)  # sum_p_over_q: (batch_size, )
        L = sum_p_over_q - tt.log(n_samples)  # L: (batch_size, )

        # Equation 10
        sum_min_i = logsubexp(sum_p_over_q.dimshuffle(0, 'x'), f_x_h)
        sum_min_i_normalized = sum_min_i - np.log(n_samples - 1).astype(
            theano.config.floatX)

        L_h_given_h = L.dimshuffle(0,
                                   'x') - sum_min_i_normalized  # equation (10)

        # Get gradient of log Q and scale
        part_1 = L_h_given_h * log_q_all  # equation 11, part 1

        weights = f_x_h - sum_p_over_q.dimshuffle(0, 'x')
        exp_weights = tt.exp(weights)

        part_2 = exp_weights * f_x_h

        estimator = (part_1 + part_2).sum() / self.batch_size

        gradients = tt.grad(estimator,
                            self.params.values(),
                            consider_constant=[exp_weights, L_h_given_h])

        likelihood = L.sum() / self.batch_size

        return likelihood, gradients
Example #5
0
 def log_cond_prob(self, y, y_pred):
     a = torch.sum(y * y_pred, 2)
     b = logsumexp(y_pred, 2)
     # print("y_pred",torch.sum(y_pred))
     prob = a - b
     # print("loss",prob)
     return prob
Example #6
0
File: VIMCO.py Project: y0ast/VIMCO
    def compute_estimator(self, log_p_all, log_q_all):
        n_samples = tt.shape(log_p_all)[1]
        
        # See equation 14, for definition of I see equation 2
        f_x_h = log_p_all - log_q_all  # f_x_h: (batch_size, n_samples)
        sum_p_over_q = logsumexp(f_x_h, axis=1)  # sum_p_over_q: (batch_size, )
        L = sum_p_over_q - tt.log(n_samples) # L: (batch_size, )

        # Equation 10
        sum_min_i = logsubexp(sum_p_over_q.dimshuffle(0, 'x'), f_x_h)
        sum_min_i_normalized = sum_min_i - np.log(n_samples - 1).astype(theano.config.floatX)

        L_h_given_h = L.dimshuffle(0, 'x') - sum_min_i_normalized  # equation (10)

        # Get gradient of log Q and scale
        part_1 = L_h_given_h * log_q_all  # equation 11, part 1

        weights = f_x_h - sum_p_over_q.dimshuffle(0, 'x')
        exp_weights = tt.exp(weights)

        part_2 = exp_weights * f_x_h

        estimator = (part_1 + part_2).sum() / self.batch_size

        gradients = tt.grad(estimator,
                            self.params.values(),
                            consider_constant=[exp_weights, L_h_given_h])

        likelihood = L.sum() / self.batch_size

        return likelihood, gradients
Example #7
0
 def logProbability(self, x):
     logProbability = [self.flow.logProbability(x)]
     for op in self.symmetryList:
         logProbability.append(self.flow.logProbability(op(x)))
     logp = logsumexp(logProbability).view(-1)
     logp = logp - math.log(len(self.symmetryList) + 1)
     return logp
Example #8
0
def dp_inside_batch(batch_size,sentence_len,tags_dim,weights):
    inside_table = torch.DoubleTensor(batch_size, sentence_len * sentence_len * 8, tags_dim, tags_dim)
    inside_table.fill_(-np.inf)
    if torch.cuda.is_available():
        inside_table = inside_table.cuda()
    m = sentence_len
    seed_spans, base_left_spans, base_right_spans, left_spans, right_spans, ijss, ikss, kjss, id_span_map, span_id_map = test_constituent_indexes(
            m, False)

    for ii in seed_spans:
        inside_table[:, ii, :, :] = 0.0

    for ii in base_right_spans:
        (l, r, c) = id_span_map[ii]
        swap_weights = weights.permute(0, 1, 4, 3, 2)
        inside_table[:, ii, :, :] = swap_weights[:, r, :, l, :]

    for ii in base_left_spans:
        (l, r, c) = id_span_map[ii]
        inside_table[:, ii, :, :] = weights[:, l, :, r, :]

    for ij in ijss:
        (l, r, c) = id_span_map[ij]
        if ij in left_spans:
            ids = span_id_map.get((l, r, get_state_code(0, 0, 0)), -1)
            prob = inside_table[:, ids, :, :] + weights[:, l, :, r, :]
            inside_table[:, ij, :, :] = utils.logaddexp(inside_table[:, ij, :, :], prob)
        elif ij in right_spans:
            ids = span_id_map.get((l, r, get_state_code(0, 0, 0)), -1)
            swap_weights = weights.permute(0, 1, 4, 3, 2)
            prob = inside_table[:, ids, :, :] + swap_weights[:, r, :, l, :]
            inside_table[:, ij, :, :] = utils.logaddexp(inside_table[:, ij, :, :], prob)
        else:
            num_k = len(ikss[ij])
            beta_ik, beta_kj = inside_table[:, ikss[ij], :, :], inside_table[:, kjss[ij], :, :]
            probs = beta_ik.contiguous().view(batch_size, num_k, tags_dim, tags_dim, 1) +\
                        beta_kj.contiguous().view(batch_size, num_k, 1, tags_dim, tags_dim)
            probs = utils.logsumexp(probs, axis=(1, 3))
            inside_table[:, ij, :, :] = utils.logaddexp(inside_table[:, ij, :, :], probs)

    id1 = span_id_map.get((0, m - 1, get_state_code(0, 1, 0)), -1)
    id2 = span_id_map.get((0, m - 1, get_state_code(0, 1, 1)), -1)

    score1 = inside_table[:, id1, 0, :].contiguous().view(batch_size, 1, tags_dim)
    score2 = inside_table[:, id2, 0, :].contiguous().view(batch_size, 1, tags_dim)
    ll = utils.logaddexp(utils.logsumexp(score1, axis=2), utils.logsumexp(score2, axis=2))
    return inside_table, ll
Example #9
0
    def predict(self, data, mc_test):
        out = self.likelihood.predict(self.layer_out)

        nll = - tf.reduce_sum(-np.log(mc_test) + utils.logsumexp(self.likelihood.log_cond_prob(self.Y, self.layer_out), 0))
        #nll = - tf.reduce_sum(tf.reduce_mean(self.likelihood.log_cond_prob(self.Y, self.layer_out), 0))
        pred, neg_ll = self.session.run([out, nll], feed_dict={self.X:data.X, self.Y: data.Y, self.mc:mc_test})
        mean_pred = np.mean(pred, 0)
        return mean_pred, neg_ll
Example #10
0
 def _calculate_log_p(self, W, b):
     log_p = 0.
     log_p += logsumexp(
         torch.stack([
             float(np.log(self.pi)) +
             self._calculate_log_gaussian(W, 0, self.sigma1),
             float(np.log(1 - self.pi)) +
             self._calculate_log_gaussian(W, 0, self.sigma2)
         ])).sum()
     log_p += logsumexp(
         torch.stack([
             float(np.log(self.pi)) +
             self._calculate_log_gaussian(b, 0, self.sigma1),
             float(np.log(1 - self.pi)) +
             self._calculate_log_gaussian(b, 0, self.sigma2)
         ])).sum()
     return log_p
Example #11
0
 def estimatepostduration(self, logalpha, logbeta, logB, rankn, g , llh):
     """Estimate state durations based on the posterior distribution.
     
     Since the durations are truncated by the timeout parameter, we use
     a distribution free method.
     
     Parameters
     -----------
     logalpha : ndarray
         Log scaled alpha distribution.
     logbeta : ndarray
         Log scaled beta values.
     logB : ndarray
         Observation probability distribution in log-space.
     rankn : ndarray
         the top ranked 'n' for eah state 'k', used to estimate state durations.
     g : ndarray
         log scaled posterior distribution ('logGamma')
     llh : float
         the normalized log-likelihood.
         
     Returns
     --------
     int
         The estimated durations in each state.
     ndarray
         The expected value of the state duration at the 'rankn'.
         
     Notes
     ------
     The QDHMM EM algorithm requires good initial estimates of the model 
     parameters in order to converge to a good solution. We propose a
     distribution free method to find the expected value of state durations
     in a standard HMM model, which is then used to initialize the QDHMM 
     'tau' parameters.
     
     """
     sub = len(rankn[0])        
     N = logalpha.shape[1]
     K=self.K
     durations = np.zeros((K))
     res = np.zeros((K,sub))        
     for o,k in enumerate(range(K)):    
         inotk = set(range(K)) - set([k])
         for idx,n in enumerate(rankn[o]):
             const = np.zeros(len(inotk))
             #Base Case
             tmp = (N-1-n)*self.logA[k,k] - np.log(1-np.exp(self.logA[k,k]))
             #Induction
             for t in xrange(N-2, n,-1):
                 tmp = (np.logaddexp(logbeta[k,t], logB[k,t+1]
                             + self.logA[k,k]+ tmp))
             for x,i  in enumerate(inotk):
                 const[x]=(logalpha[i,n] + self.logA[i,k]+logB[k,n+1]  
                                            - (g[i,n] + llh) )   
             res[o,idx] = logsumexp(const) + tmp       
     durations = np.max(np.exp(res), 1 )   
     return durations.astype(int), np.exp(res)
Example #12
0
 def predict(self, latent_val):
     """
     return the probabilty for all the samples, datapoints and calsses
     :param latent_val:
     :return:
     """
     logprob = latent_val - tf.expand_dims(utils.logsumexp(latent_val, 2),
                                           2)
     return tf.exp(logprob)
Example #13
0
 def predict(self, y_pred):
     """
     return the probabilty for all the samples, datapoints and classes
     param: y_pred
     return:
     """
     logprob = y_pred - logsumexp(y_pred, 2).unsqueeze(2).expand(
         self.mc, self.batch_size, self.classes)
     return torch.exp(logprob)
def log_likelihood(X_apps, theta_unconstrained, pi_unconstrained):
    """
	X_apps.shape = [n, 1, d]
	theta_unconstrained \in \mathbb{R}^{k, d}, where d is the number of app cats
	"""
    log_pi = logsoftmax(pi_unconstrained, dim=-1)
    logp_theta = torch.sum(
        Bernoulli(logits=theta_unconstrained).log_prob(X_apps), dim=-1)
    logps = logsumexp(log_pi + logp_theta, dim=-1)
    return logps.sum()
Example #15
0
    def logposterior(self, data):
        """
		Calculate log-posterior over scales given the data points.

		@type  data: array_like
		@param data: data stored in columns
		"""

        jnt = self.logjoint(data)
        return jnt - logsumexp(jnt, 0)
Example #16
0
    def predict_log_proba(self, X):
        """
        Estimate log-probability

        :param X_test: array-like of shape (n_samples, n_features) -- Input data
        :return: ndarray of shape (n_samples, n_features) -- Estimated log-probabilities
        """
        values = self._decision_function(X)
        loglikelihood = (values - values.max(axis=1)[:, np.newaxis])
        normalization = logsumexp(loglikelihood, axis=1)
        return loglikelihood - normalization[:, np.newaxis]
Example #17
0
def get_const(log_lambdas, desired_k):
    base_inc_probs = np.log(desired_k) + log_lambdas
    remaining_prob = 1 - np.exp(utils.logsumexp(log_lambdas))
    c = desired_k / expected_k(base_inc_probs)
    start = c * desired_k
    results = opt.minimize(
        lambda x:
        (desired_k -
         (expected_k(log_lambdas + x) + desired_k * remaining_prob))**2,
        np.log(start))
    return np.exp(results.x[0])
Example #18
0
 def step(obs, prev):
     # The "outer sum" of the previous log-probabilities and the
     # observation log-probabilities gives a (num_states,
     # num_states) matrix of the paired log-probabilities, and
     # adding in the transition log-probabilities gives the matrix
     # representing the log-probabilities of moving from each state
     # to each other and generating the observation. Summing these
     # (in the standard [0,1] probability space, not the log space)
     # along the 1st axis then gives the vector with the total
     # log-probability of being in each state after the step.
     return logsumexp(outer_sum(prev, obs, batch) + transitions, axis=axis)
Example #19
0
def get_agg_kl(data, model, meta_optimizer):
    model.eval()
    criterion = nn.NLLLoss().cuda()
    means = []
    logvars = []
    all_z = []
    for i in range(len(data)):
        sents, length, batch_size = data[i]
        if args.gpu >= 0:
            sents = sents.cuda()
        mean, logvar = model._enc_forward(sents)
        z_samples = model._reparameterize(mean, logvar)
        if args.model == 'savae':
            mean_svi = Variable(mean.data, requires_grad=True)
            logvar_svi = Variable(logvar.data, requires_grad=True)
            var_params_svi = meta_optimizer.forward([mean_svi, logvar_svi],
                                                    sents)
            mean_svi_final, logvar_svi_final = var_params_svi
            z_samples = model._reparameterize(mean_svi_final, logvar_svi_final)
            preds = model._dec_forward(sents, z_samples)
            nll_svi = sum([
                criterion(preds[:, l], sents[:, l + 1]) for l in range(length)
            ])
            kl_svi = utils.kl_loss_diag(mean_svi_final, logvar_svi_final)
            mean, logvar = mean_svi_final, logvar_svi_final
        means.append(mean.data)
        logvars.append(logvar.data)
        all_z.append(z_samples.data)
    means = torch.cat(means, 0)
    logvars = torch.cat(logvars, 0)
    all_z = torch.cat(all_z, 0)
    N = float(means.size(0))
    mean_prior = torch.zeros(1, means.size(1)).cuda()
    logvar_prior = torch.zeros(1, means.size(1)).cuda()
    agg_kl = 0.
    count = 0.
    for i in range(all_z.size(0)):
        z_i = all_z[i].unsqueeze(0).expand_as(means)
        log_agg_density = utils.log_gaussian(z_i, means,
                                             logvars)  # log q(z|x) for all x
        log_q = utils.logsumexp(log_agg_density, 0)
        log_q = -np.log(N) + log_q
        log_p = utils.log_gaussian(all_z[i].unsqueeze(0), mean_prior,
                                   logvar_prior)
        agg_kl += log_q.sum() - log_p.sum()
        count += 1
    mean_var = mean.var(0)
    print('active units', (mean_var > 0.02).float().sum())
    print(mean_var)

    return agg_kl / count
Example #20
0
def log_sample_poisson(log_lambdas, k=1, normalize=True, seed=0):
    np.random.seed(seed=seed)
    J = []

    inc_probs = np.log(k) + log_lambdas
    if normalize:
        inc_probs -= utils.logsumexp(log_lambdas)

    for i, l in enumerate(inc_probs):
        u = np.random.uniform()
        if np.log(u) < l:
            J.append(i)
    #print(len(J))
    return J, inc_probs
		def logdrcdf(norm):
			"""
			Logarithm of the derivative of the radial CDF.
			"""

			# allocate memory
			result = zeros([self.gsm.num_scales, len(norm)])

			tmp = sqrt(self.gsm.scales)

			for j in range(self.gsm.num_scales):
				result[j, :] = log(self.gsm.priors[j]) + logdgrcdf(tmp[j] * norm, self.gsm.dim) + log(tmp[j])

			return logsumexp(result, 0)
Example #22
0
    def posterior(self, data):
        """
		Compute posterior over component indices.
		"""

        log_post = empty([len(self), data.shape[1]])

        for k in range(len(self.components)):
            log_post[k] = self.components[k].expected_log_likelihood(data)

        log_post += (psi(self.gamma) - psi(sum(self.gamma)))
        log_post -= logsumexp(log_post, 0)

        return exp(log_post)
Example #23
0
        def logdrcdf(norm):
            """
			Logarithm of the derivative of the radial CDF.
			"""

            # allocate memory
            result = zeros([self.gsm.num_scales, len(norm)])

            tmp = sqrt(self.gsm.scales)

            for j in range(self.gsm.num_scales):
                result[j, :] = log(self.gsm.priors[j]) + logdgrcdf(
                    tmp[j] * norm, self.gsm.dim) + log(tmp[j])

            return logsumexp(result, 0)
Example #24
0
def compute_log_model_probability(scores, ranking, gpu_id=None):
    """
    more stable version
    if rel is provided, use it to calculate probability only till
    all the relevant documents are found in the ranking
    """
    subtracts = torch.zeros_like(scores)
    log_probs = torch.zeros_like(scores)
    if gpu_id is not None:
        subtracts, log_probs = convert_vars_to_gpu([subtracts, log_probs],
                                                   gpu_id)
    for j in range(scores.size()[0]):
        posj = ranking[j]
        log_probs[j] = scores[posj] - logsumexp(scores - subtracts, dim=0)
        subtracts[posj] = scores[posj] + 1e6
    return torch.sum(log_probs)
    def sample_nav_article_nav_assignments(self, article_id):
        nav_article_log_proportions = np.log(
            self.nav_article_proportions[-1][article_id])

        updated_assignments = []
        for nav_id in self.training_data.article_navs[article_id]:
            assignment_proportions = nav_article_log_proportions + np.array([
             self.calculate_topic_nav_logprob(topic_id, nav_id) \
             for topic_id in range(self.num_nav_topics)
            ])
            assignment_probabilities = np.exp(
                assignment_proportions - logsumexp(assignment_proportions))
            updated_assignments.append(
                choice(self.num_nav_topics, p=assignment_probabilities))

        return updated_assignments
Example #26
0
def xe(z, targets, predict=False, error=False, addon=0):
    """
    Cross entropy error.
    """
    if predict:
        return gpu.argmax(z, axis=1)

    _xe = z - logsumexp(z, axis=1)
    n, _ = _xe.shape
    xe = -gpu.mean(_xe[np.arange(n), targets])
    if error:
        err = gpu.exp(_xe)
        err[np.arange(n), targets] -= 1
        return xe + addon, err / n
    else:
        return xe + addon
def calc_marg_log_prob(X, obs_rvs, w, all_rvs_params=None, g=None):
    """
    Calculate marginal log probabilities of observations
    :param g:
    :param X: M x N_o matrix of (partial) observations, where N_o is the number of obs nodes; alternatively a N_o vector
    :param obs_rvs: obs_rvs: length N_o list of observed rvs
    :param params:
    :return:
    """
    comp_log_probs = calc_marg_comp_log_prob(X,
                                             obs_rvs,
                                             all_rvs_params=all_rvs_params,
                                             g=g)  # M x K
    out = utils.logsumexp(np.log(w) + comp_log_probs,
                          axis=-1)  # reduce along the last dimension
    return out
    def logLikelihood(self, X, Xcov):
        """
        Compute the log-likelihood of data given the model

        Parameters
        ----------
        X: array_like
            data, shape = (n_samples, n_features)
        Xcov: array_like
            covariances, shape = (n_samples, n_features, n_features)

        Returns
        -------
        logL : float
            log-likelihood
        """
        return np.sum(logsumexp(self.logprob_a(X, Xcov), -1))
Example #29
0
    def logLikelihood(self, X, Xcov):
        """
        Compute the log-likelihood of data given the model

        Parameters
        ----------
        X: array_like
            data, shape = (n_samples, n_features)
        Xcov: array_like
            covariances, shape = (n_samples, n_features, n_features)

        Returns
        -------
        logL : float
            log-likelihood
        """
        return np.sum(logsumexp(self.logprob_a(X, Xcov), -1))
Example #30
0
	def logposterior(self, data):
		"""
		Computes the log-posterior distribution over components.

		@type  data: array_like
		@param data: data points stored in columns

		@rtype: ndarray
		@return: a posterior distribution for each data point
		"""

		# compute unnormalized log-posterior
		def logposterior_(i):
			return self[i].loglikelihood(data) + log(self.priors[i])
		logpost = vstack(map(logposterior_, range(len(self))))

		# normalize posterior
		return asarray(logpost) - logsumexp(logpost, 0)
Example #31
0
	def loglikelihood(self, data):
		"""
		Computes the log-likelihood of the model for the given data.

		@type  data: array_like
		@param data: data points stored in columns

		@rtype: ndarray
		@return: a log-likelihood for each data point
		"""

		# compute joint density over components and data points
		def loglikelihood_(i):
			return self[i].loglikelihood(data) + log(self.priors[i])
		logjoint = vstack(map(loglikelihood_, range(len(self))))

		# marginalize
		return logsumexp(logjoint, 0).flatten()
    def get_negative_log_likelihood(self, source1_decoder_attention_score, source2_decoder_attention_score,
                                    source1_token_mask, source2_token_mask, target_to_source1,
                                    target_to_source2, target_tokens, gate_score):

        # shape: (batch_size, seq_max_len_1)
        combined_log_probs_1 = ((source1_decoder_attention_score * target_to_source1.float()).sum(-1) + 1e-20).log()

        # shape: (batch_size, seq_max_len_2)
        combined_log_probs_2 = ((source2_decoder_attention_score * target_to_source2.float()).sum(-1) + 1e-20).log()

        # 计算 log(p1 * gate + p2 * (1-gate))
        log_gate_score_1 = (gate_score + 1e-20).log()  # shape: (batch_size,)
        log_gate_score_2 = (1 - gate_score + 1e-20).log()  # shape: (batch_size,)
        
        item_1 = (log_gate_score_1 + combined_log_probs_1).unsqueeze(-1)  
        item_2 = (log_gate_score_2 + combined_log_probs_2).unsqueeze(-1)  
        step_log_likelihood = logsumexp(torch.cat((item_1, item_2), -1))  


        return step_log_likelihood
Example #33
0
    def alpha(self, logB):
        """ Compute alpha (forward) distribution.
        
            alpha [i,n] =  joint probability of being in state i, 
            after observing 1..N observations.   .
    
        Parameters
        ----------
        logB : ndarray
            The observation probability matrix in logarithmic space.
    
        Returns
        -------
        logalpha : ndarray
            The log scaled alpha distribution.   
            
        Notes
        -----
        Refer to Tobias Man's paper [1]_ for the motivation behind the
        scaling factors used here. Note that this scaling methods is suitable 
        when the dynamics of the system is not highly sparse. Adaptation of
        log-scaling in the QDHMM would require the use to construct a new
        sparse data structure        

        References
        ----------       
        .. [1] Mann, T. P. Numerically Stable Hidden Markov Model 
               Implementation 2006.                 
   
        """   
        K = self.K
        N = logB.shape[1]    
        assert logB.shape == (K,N)       
        logAlpha = np.zeros((K, N), dtype=np.float)           
        # Base case, when n=0       
        logAlpha[:,0] = self.logA[-1] + logB[:,0]                                   
        #induction
        for n in xrange(1, N):      
            logAlpha[:,n] = logsumexp(self.logA[:-1][:,:].T + \
                                                logAlpha[:,n-1],1) + logB[:,n]                                                        
        return logAlpha        
Example #34
0
    def beta(self, logB):
        """ Compute beta (backward) distribution.
        
            beta [i,n] =  conditional probability generating observations 
            Y_n+1..Y_N, given Z_n.
    
        Parameters
        ----------
        logB : ndarray
            The observation probability matrix in logarithmic space.
    
        Returns
        -------
        logbeta : ndarray
            The log scaled beta distribution.   
            
        Notes
        -----
        Refer to Tobias Man's paper [1]_ for the motivation behind the
        scaling factors used here. Note that this scaling methods is suitable 
        when the dynamics of the system is not highly sparse. Adaptation of
        log-scaling in the QDHMM would require the use to construct a new
        sparse data structure        

        References
        ----------       
        .. [1] Mann, T. P. Numerically Stable Hidden Markov Model 
               Implementation 2006.                 
   
        """   
        K = self.K
        N = logB.shape[1]           
        logBeta = np.zeros((K, N), dtype=np.float)               
        #Base case when n = N
        logBeta[:,-1] = 0.0    
        #Induction
        for n in xrange(N-2, -1, -1):
            logBeta[:,n] = logsumexp(logBeta[:,n+1]+\
                                            self.logA[:-1][:,:] \
                                                            + logB[:,n+1],1)        
        return logBeta
Example #35
0
def get_agg_kl(q_data, test_data, model):
    model.eval()
    means = []
    logvars = []
    all_z = []
    for datum in q_data:
        img, _ = datum
        batch_size = img.size(0)
        img = Variable(img.cuda())
        mean, logvar = model._enc_forward(img)
        z_samples = model._reparameterize(mean, logvar)
        means.append(mean.data)
        logvars.append(logvar.data)
        all_z.append(z_samples.data)
    means = torch.cat(means, 0)
    logvars = torch.cat(logvars, 0)
    N = float(means.size(0))
    mean_prior = torch.zeros(1, means.size(1)).cuda()
    logvar_prior = torch.zeros(1, means.size(1)).cuda()
    agg_kl = 0.
    count = 0.
    for datum in test_data:
        img, _ = datum
        batch_size = img.size(0)
        img = Variable(img.cuda())
        mean, logvar = model._enc_forward(img)
        z_samples = model._reparameterize(mean, logvar).data
        for i in range(z_samples.size(0)):
            z_i = z_samples[i].unsqueeze(0).expand_as(means)
            log_agg_density = utils.log_gaussian(
                z_i, means, logvars)  # log q(z|x) for all x
            log_q = utils.logsumexp(log_agg_density, 0)
            log_q = -np.log(N) + log_q
            log_p = utils.log_gaussian(z_samples[i].unsqueeze(0), mean_prior,
                                       logvar_prior)
            agg_kl += log_q.sum() - log_p.sum()
            count += 1
    mean_var = means.var(0)
    print('active units', (mean_var > 0.02).float().sum())
    print(mean_var)
    return agg_kl / count
Example #36
0
 def fit(self, obs, logweights):
     """Fit a Gaussian to the state distributions after observing the data.
     
     Parameters
     -----------
     obs : ndarray
         Observation sequence.
     logweights : ndarray
         The weights attached to each state (posterior distribution). 
         In log-space.
         
     
     """      
     #oldmeans = self.mu.copy()
     logGamma = np.concatenate(logweights, 1)            
     normalizer = np.exp(logGamma - logsumexp(logGamma,1)[:,np.newaxis])    
     for k in range(self.K):
         self.mu[:,k] = np.dot(normalizer[k,:][np.newaxis,:] , obs.T) 
         obs_bar = obs - self.mu[:,k][:,np.newaxis]
         self.covar[k,:,:] = np.dot(obs_bar * normalizer[k,:] 
                                 , obs_bar.T) 
Example #37
0
def forward(observations, transitions, sequence_len, batch=False):
    """Implementation of the forward algorithm in Keras.

    Returns the log probability of the given observations and transitions
    by recursively summing over the probabilities of all paths through
    the state space. All probabilities are in logarithmic space.

    See e.g. https://en.wikipedia.org/wiki/Forward_algorithm .

    Args:
        observations (tensor): A tensor of the observation log
            probabilities, shape (sequence_len, num_states) if 
            batch is False, (batch_size, sequence_len, num_states)
            otherwise.
        transitions (tensor): A (num_states, num_states) tensor of
            the transition weights (log probabilities).
        sequence_len (int): The number of steps in the sequence.
            This must be given because unrolling scan() requires a
            definite (not tensor) value.
        batch (bool): Whether to run in batchwise mode. If True, the
            first dimension of observations corresponds to the batch.

    Returns:
        Total log probability if batch is False or vector of log
        probabiities otherwise.
    """
    step = make_forward_step(transitions, batch)
    if not batch:
        first, rest = observations[0, :], observations[1:, :]
    else:
        first, rest = observations[:, 0, :], observations[:, 1:, :]
    sequence_len -= 1  # exclude first
    outputs, _ = scan(step, rest, first, n_steps=sequence_len, batch=batch)

    if not batch:
        last, axis = outputs[sequence_len - 1], 0
    else:
        last, axis = outputs[:, sequence_len - 1], 1
    return logsumexp(last, axis=axis)
Example #38
0
    def log_likelihood(self, reward_hypothesis, q_values, demonstrations):
        #input is reward weights, q_values as a list [q(s0,a0), q(s1,a0), ..., q(sn,am)]
        # and demonstrations = [(s0,a0), ..., (sm,am)] list of state-action pairs
        #if self.prior is None:
        log_sum = 0.0
        if self.prior == "non-pos":
            #check if weights are all non-pos
            for r in reward_hypothesis:
                if r > 0:
                    return -np.inf
        for s, a in demonstrations:
            if s not in self.mdp_env.terminals and a is not None:  #there are no counterfactuals in a terminal state
                if self.likelihood == "birl":
                    Z_exponents = []
                    for b in range(self.num_actions):
                        Z_exponents.append(self.beta *
                                           q_values[s + self.num_states * b])
                    #print Z_exponents
                    log_sum += self.beta * q_values[
                        s + self.num_states * a] - utils.logsumexp(Z_exponents)
                    #print "likelihood:", np.exp(self.beta * placement_reward - scipy.misc.logsumexp(Z_exponents))
                    #plt.show()
                elif self.likelihood == "uniform":
                    #print(s,self.mdp_env.get_readable_actions(a))
                    hinge_losses = 0.0

                    for b in range(self.num_actions):
                        # print(b)
                        # print(q_values[s + self.num_states * b])
                        # print(a)
                        # print(q_values[s + self.num_states * a])
                        hinge_losses += max(
                            q_values[s + self.num_states * b] -
                            q_values[s + self.num_states * a], 0.0)
                        # print(hinge_losses)
                    log_sum += -self.beta * hinge_losses
                else:
                    raise NotImplementedError
        return log_sum
Example #39
0
def runSmc(args):
    smcData, settings, do_metrics = args
    print '\nInitializing SMC\n'
    # precomputation
    (particles, param, log_weights, cache, cache_tmp) = bdtsmc.init_smc(smcData, settings)

    # Run smc
    print '\nRunning SMC'
    (particles, ess_itr, log_weights_itr, log_pd, particle_stats_itr_d, particles_itr_d, log_pd_islands) = \
            bdtsmc.run_smc(particles, smcData, settings, param, log_weights, cache)
    
    # Printing some diagnostics
    print
    print 'Estimate of log marginal probability i.e. log p(Y|X) = %s ' % log_pd
    print 'Estimate of log marginal probability for different islands = %s' % log_pd_islands
    print 'logsumexp(log_pd_islands) - np.max(log_pd_islands) = %s\n' % \
            (logsumexp(log_pd_islands) - np.max(log_pd_islands))
    if settings.debug == 1:
        print 'log_weights_itr = \n%s' % log_weights_itr
        # check if log_weights are computed correctly
        for i_, p in enumerate(particles):
            log_w = log_weights_itr[-1, i_] + np.log(settings.n_particles) - np.log(settings.n_islands)
            logprior_p = p.compute_logprior()
            loglik_p = p.compute_loglik()
            logprob_p = p.compute_logprob()
            if (np.abs(settings.ess_threshold) < 1e-15) and (settings.proposal == 'prior'):
                # for the criterion above, only loglik should contribute to the weight update
                try:
                    check_if_zero(log_w - loglik_p)
                except AssertionError:
                    print 'Incorrect weight computation: log_w (smc) = %s, loglik_p = %s' % (log_w, loglik_p)
                    raise AssertionError
            try:
                check_if_zero(logprob_p - loglik_p - logprior_p)
            except AssertionError:
                print 'Incorrect weight computation'
                print 'check if 0: %s, logprior_p = %s, loglik_p = %s' % (logprob_p - loglik_p - logprior_p, logprior_p, loglik_p)
                raise AssertionError

    # Evaluate
    print 'Results on training data (log predictive prob is bogus)'
    # log_predictive on training data is bogus ... you are computing something like \int_{\theta} p(data|\theta) p(\theta|data)
    if settings.weight_islands == 1:
        # each island's prediction is weighted by its marginal likelihood estimate which is equivalent to micro-averaging globally
        weights_prediction = softmax(log_weights_itr[-1, :])
        assert('islandv1' in settings.tag)
    else:
        # correction for macro-averaging predictions across islands
        weights_prediction = np.ones(settings.n_particles) / settings.n_islands
        n_particles_tmp = settings.n_particles / settings.n_islands
        for i_ in range(settings.n_islands):
            pid_min, pid_max = i_ * n_particles_tmp, (i_ + 1) * n_particles_tmp - 1
            pid_range_tmp = range(pid_min, pid_max+1)
            weights_prediction[pid_range_tmp] *= softmax(log_weights_itr[-1, pid_range_tmp]) 
    (pred_prob_overall_train, metrics_train) = \
            evaluate_predictions_smc(particles, smcData, smcData['x_train'], smcData['y_train'], settings, param, weights_prediction, do_metrics)
    print '\nResults on test data'
    (pred_prob_overall_test, metrics_test) = \
            evaluate_predictions_smc(particles, smcData, smcData['x_test'], smcData['y_test'], settings, param, weights_prediction, do_metrics)

    #return pred_prob_overall_test, particles, param, weights_prediction
    return pred_prob_overall_test, 
Example #40
0
def s82_star_galaxy_classification(model_parms_file, epoch, Nstar,
                                   features, filters, r_pmm, figname,
                                   threshold=0., Nthreads=4):
    """
    Compare quality of classifcation for a model with the s82 coadd.  Should 
    be a model trained on s82 single epoch data.
    """
    # get the data
    single, singlecov = fetch_matched_s82data(epoch, features=features,
                                              filters=filters)
    coadd, coaddecov = fetch_matched_s82data(epoch, features=features,
                                             filters=filters, use_single=False)

    # classfy the single epoch data
    single_class = np.zeros(single.shape[0])
    ind = np.abs(single[:, r_pmm]) < 0.145 
    single_class[ind] = 1.

    alpha, mu, V, _, _ = load_xd_parms(model_parms_file)
    logls = log_multivariate_gaussian_Nthreads(single, mu, V, singlecov,
                                               Nthreads)
    logls += np.log(alpha)
    logodds = logsumexp(logls[:, :Nstar], axis=1)
    logodds -= logsumexp(logls[:, Nstar:], axis=1)
    ind = logodds > threshold
    model_class = np.zeros(single.shape[0])
    model_class[ind] = 1.

    coadd_class = np.zeros(single.shape[0])
    ind = np.abs(coadd[:, r_pmm]) < 0.03
    coadd_class[ind] = 1.

    fs = 10
    f = pl.figure(figsize=(2 * fs, 2 * fs))
    pl.subplot(221)
    pl.plot(single[single_class==0, 0], single[single_class==0, r_pmm], '.',
            color='#ff6633', alpha=0.2)
    pl.plot(single[single_class==1, 0], single[single_class==1, r_pmm], '.',
            color='#3b5998', alpha=0.2)
    pl.ylim(-0.2, 0.5)
    pl.subplot(222)
    pl.plot(coadd[single_class==0, 0], coadd[single_class==0, r_pmm], '.',
            color='#ff6633', alpha=0.2)
    pl.plot(coadd[single_class==1, 0], coadd[single_class==1, r_pmm], '.',
            color='#3b5998', alpha=0.2)
    pl.plot([17.5, 22.], [0.03, 0.03], 'k')
    pl.ylim(-0.2, 0.5)
    pl.subplot(223)
    pl.plot(single[model_class==0, 0], single[model_class==0, r_pmm], '.',
            color='#ff6633', alpha=0.2)
    pl.plot(single[model_class==1, 0], single[model_class==1, r_pmm], '.',
            color='#3b5998', alpha=0.2)
    pl.ylim(-0.2, 0.5)
    pl.subplot(224)
    pl.plot(coadd[model_class==0, 0], coadd[model_class==0, r_pmm], '.',
            color='#ff6633', alpha=0.2)
    pl.plot(coadd[model_class==1, 0], coadd[model_class==1, r_pmm], '.',
            color='#3b5998', alpha=0.2)
    pl.plot([17.5, 22.], [0.03, 0.03], 'k')
    pl.ylim(-0.2, 0.5)
    f.savefig(figname, bbox_inches='tight')
Example #41
0
    def hmmFit(self, obs, maxiter = 50 , epsilon = 0.0001, debug= False):
        """Fit the standard HMM to the given data using the (adapted Baum-Welch) 
           EM algorithm.
    
        Parameters
        ----------
        obs : list
            The list of observations sequences where every sequence is a 
            ndarray. The sequences can be of different length, but
            the dimension of the features needs to be identical.
        maxiter : int, optional
            The maximum number of iterations of the EM algorithm. Default = 50.
        epsilon : float, optional
            The minimum allowed threshold in the variation of the log-likelihood 
            between succesive iterations of the EM algorithm. Once the variation
            exceeds 'epsilon' the algorithm is said to have converged. 
            Default = 1e-6.
        debug : bool, optional
            Display verbose On/off.           

    
        Returns
        -------
        float
            The normalized log-likelihood.
        list
            The list of log-likelihoods for each iteration of the EM algorithm.
            To check for monotonicity of the log-likelihoods. 
        int
            The duration estimates of each HMM state from the posterior 
            distribution.
        ndarray
            The top ranked 'n'  which are used to estimate the state
            durations.
        ndarray
            The expected value of the state durations obtained at
            the top ranked 'n'.
     
        """
        if debug:
            logger.setLevel(logging.DEBUG)
        logger.debug('Running the HMM EM algorithm..')
        ll=[]
        numseq = len(obs)
        lastavgloglikelihood = -np.inf
        logksilist = [None] * numseq
        logGammalist = [None] * numseq
        logAlphalist = [None] * numseq
        logBetalist = [None] * numseq
        llhlist = [None] * numseq
        obsmatrix = [None] * numseq
        logB = [None] * numseq
        duration = [None]* numseq
        res = [None]* numseq
        rankn = [None]* numseq
        N=[None] * numseq
    
        for iteration in xrange(maxiter):      
            start_time=time.time()
            logger.debug('-------------------------------------------')
            logger.debug('iter: %d'% iteration ) 
            logger.debug('E step..' )
            for seq,obsseq in enumerate(obs):                    
                N[seq] = obsseq.shape[1]                      
                if self.O.__class__.__name__=='Discrete':                    
                    obsmatrix[seq] = np.zeros((len(self.O.c),N))
                    obsmatrix[seq][ np.int_(obsseq),xrange(N)] = 1                                         
                #E-step
                #calcualte the posterior probability for each sequence                                         
                logB[seq] = self.O.loglikelihood(obsseq)                                     
                llhlist[seq], logGammalist[seq], logksilist[seq],\
                                    logAlphalist[seq],logBetalist[seq] \
                                                    = self.gammaKsi(logB[seq])
            normalizellhlist = np.divide(llhlist, N)                                        
            loglikelihood = np.sum(normalizellhlist) 
            ll.append(loglikelihood) 
            logger.debug('LLH: %0.10f' % (loglikelihood))
            if abs(loglikelihood - lastavgloglikelihood) < epsilon:
                for seq in range(len(obs)):
                    rankn[seq] = self.rankn(logksilist[seq])                
                    duration[seq], res[seq] = \
                        self.estimatepostduration(logAlphalist[seq], \
                                                logBetalist[seq], logB[seq], 
                                  rankn[seq], logGammalist[seq], llhlist[seq] )  
                logger.info("Convergence after %d iterations" % iteration)
                break            
            lastavgloglikelihood = loglikelihood                                                   
            obsarray = np.concatenate(obs, 1)
            if self.O.__class__.__name__=='Discrete':      
                obsarray = np.concatenate(obsmatrix,1)            
            #M-step    
            logger.debug('M step..' )                
            self.logA[-1] = (logsumexp(np.array([g[:,0] 
                            for g in logGammalist]),axis = 0) -
                                np.log(np.double(numseq)))           
            logKsiarray = np.concatenate(logksilist, axis = 0)   
            logGammasArray = (np.concatenate(map(lambda x: x[:,:-1],
                                logGammalist),axis =  1))
            self.logA[:-1] = (logsumexp(logKsiarray,axis =  0) - 
                                      logsumexp(logGammasArray, axis= 1)
                                      [:,np.newaxis])                
            self.O.fit(obsarray, logGammalist)          
            end_time=time.time()
            logger.debug('Time to run iter : %.5f s' %(end_time-start_time))   
            if iteration == maxiter-1:
                for seq in range(len(obs)):
                    rankn[seq] = self.rankn(logksilist[seq])                
                    duration[seq], res[seq] = \
                            self.estimatepostduration(logAlphalist[seq], \
                                            logBetalist[seq], logB[seq], 
                                  rankn[seq], logGammalist[seq], llhlist[seq] )             
                logger.info('No convergence after %d iterations'%(iteration+1))
                break                  
        return (loglikelihood,ll, duration, rankn, res)                
Example #42
0
 def fit(self,obs,logweights):
     logGamma = np.concatenate(logweights, 1)        
     normalizer = np.exp(logGamma - logsumexp(logGamma, axis = 1)[:, np.newaxis])          
     for k in range(self.K):
         self.p[k]=np.exp(np.log(np.sum(normalizer[k,:]*obs, 1))-\
                                         np.log(np.sum(normalizer[k,:])))