def _RIVER_likelihood(e, g, beta, phi): # log p(z = 1 | g) log_p_z_1_given_g = lr.log_prob(g, beta) # log p(z = 0 | g) log_p_z_0_given_g = np.log(1.0 - np.exp(log_p_z_1_given_g)) # log p(e | z = 1) log_p_e_given_z_1 = nb.log_prob(e, 1, phi) # log p(e | z = 0) log_p_e_given_z_0 = nb.log_prob(e, 0, phi) import pdb pdb.set_trace() #x_1 = #m = np.maximum() return 1
def eStepLocal(self, i, data, beta, phi): ''' Compute p(z | ...) for tissue i i : int tissue index data : panda data frame core data structure containing genomic features, expression, updated posteriors. beta : numpy array : 1 x M coefficients for genomic features phi : numpy array either 2 x 2 numpy array for categorical distribution or 1 x 2 for noisy or ''' # log p(z | g) log_prob_z_1_given_g = lr.log_prob(data[self.genomic_features].values, beta) log_prob_z_0_given_g = np.log(1.0 - np.exp(log_prob_z_1_given_g)) # log p(e | z, q) if self.e_distribution == 'noisyor': # noisy OR log_prob_e_given_z_1 = nb.log_prob_noisyor_2_params( data['expr_label'], 1, data["eqtl"], phi) log_prob_e_given_z_0 = nb.log_prob_noisyor_2_params( data[i]['expr_label'], 0, data["eqtl"], phi) # log p(e | z) else: # naive bayes log_prob_e_given_z_1 = nb.log_prob(data['expr_label'].values, 1, self.phi) log_prob_e_given_z_0 = nb.log_prob(data['expr_label'].values, 0, self.phi) # p(e|z =1) * p(z = 1 | g) / (\sum_{z \in S} p(z = s | g) * p(e | z = s)) log_q = log_prob_e_given_z_1 + log_prob_z_1_given_g - np.log( np.exp(log_prob_e_given_z_0) * np.exp(log_prob_z_0_given_g) + np.exp(log_prob_e_given_z_1) * np.exp(log_prob_z_1_given_g)) return np.exp(log_q)
def eStepLocalTest(self, i, beta, phi): ''' Compute expectation for tissue i ''' # log P(Z = 1 | G) log_prob_z_1_given_g = lr.log_prob( self.test_list[i][self.genomic_features].values, beta) # log P(Z = 0 | G) log_prob_z_0_given_g = np.log(1.0 - np.exp(log_prob_z_1_given_g)) # log P(E | Z = 1) log_prob_e_given_z_1 = nb.log_prob( self.test_list[i][self.label].values, 1, phi) # log P(E | Z = 0) log_prob_e_given_z_0 = nb.log_prob( self.test_list[i][self.label].values, 0, phi) log_q = log_prob_e_given_z_1 + log_prob_z_1_given_g - np.log( np.exp(log_prob_e_given_z_0) * np.exp(log_prob_z_0_given_g) + np.exp(log_prob_e_given_z_1) * np.exp(log_prob_z_1_given_g)) return np.exp(log_q)
def computeLikelihood(self): ll = self.log_p_beta() # P(beta^c | beta) for i in range(self.num_tissues): ll += self.log_p_beta_child_given_beta(i) for i in range(self.num_tissues): try: log_prob_z_1_g = lr.log_prob( self.train_list[i][self.genomic_features], self.getBetaLeaf(i)) log_prob_z_0_g = np.log(1.0 - np.exp(log_prob_z_1_g)) log_prob_e_z_1 = nb.log_prob(self.train_list[i]['expr_label'], 1, self.phi) b = log_prob_e_z_1 + lr.log_prob( self.train_list[i][self.genomic_features], self.getBetaLeaf(i)) except: continue a = nb.log_prob(self.train_list[i]['expr_label'], 0, self.phi) + np.log(1.0 - np.exp(log_prob_z_1_g)) # log sum exp trick s = np.maximum(a, b) unnormalized_prob = s + np.log(np.exp(a - s) + np.exp(b - s)) ll_tissue = np.nansum(unnormalized_prob) ll += ll_tissue