def get_posterior_joint_lik_z_only(self, pos, z, z_prev): subject = self.subject K = self.panel.get_haplotype_num() # load c code: code_file = open(self.dir + "/loops/joint_probability.c") probability_code = "\n".join(code_file.readlines()) code_file.close() prev_pos = self.positions.get_previous_position(pos) fwd_const = logsumexp(self.fwd_values[prev_pos][:, z_prev]) bwd_const = logsumexp(self.bwd_values[pos][:, z]) fwd_prev = np.exp(self.fwd_values[prev_pos] - fwd_const) bwd = np.exp(self.bwd_values[pos] - bwd_const) panel = self.panel.get_matrix_at_position(pos) sample = subject.haplotype_at_position(pos) log_prob_equal, log_prob_nequal = self.get_transition_liks(pos) prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal) joint_prob = self.run_inner_loop(pos, probability_code, locals()) log_joint_prob = log(joint_prob) + fwd_const + bwd_const return log_joint_prob - self.log_data_prob
def run_forwards_factorial(self): positions = self.positions subject = self.subject is_start_position = positions.is_start_position K = self.panel.get_haplotype_num() fwd_values = dict() fwd_prev = np.zeros((K ** 2, 2)) # load c code: code_file = open(self.dir + "/loops/forwards_factorial.c") forwards_code = "\n".join(code_file.readlines()) code_file.close() # actual forwards algorithm starts here: for j, pos in enumerate(positions): if j % 200 == 0: print "%d/%d" % (positions.get_position_index(pos), len(positions)) sys.stdout.flush() if not is_start_position(pos): prev_pos = positions.get_previous_position(pos) else: # the C code doesn't use any variables, we just need to fill them with something prev_pos = pos fwd = np.zeros((K ** 2, 2)) fwd_values[pos] = fwd log_norm_const = logsumexp(fwd_values[prev_pos]) fwd_prev = np.exp(fwd_values[prev_pos] - log_norm_const) log_prob_equal, log_prob_nequal = self.get_transition_liks(pos) prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal) block_start_position = int(subject.is_block_start(pos)) start_position = int(positions.is_start_position(pos)) panel = self.panel.get_matrix_at_position(pos) sample = subject.haplotype_at_position(pos) self.run_inner_loop(pos, forwards_code, locals()) if not is_start_position(pos): fwd_values[pos] = np.log(fwd_values[pos]) + log_norm_const last_pos = positions.last_position log_data_prob = logsumexp(fwd_values[last_pos]) self.fwd_values = fwd_values self.fwd_log_data_prob = log_data_prob
def get_posterior_lik_z_only(self, pos, z_index): return logsumexp(self.fwd_values[pos][:, z_index] + self.bwd_values[pos][:, z_index] - self.log_data_prob)
def run_backwards_factorial(self): positions = self.positions subject = self.subject is_end_position = positions.is_end_position K = self.panel.get_haplotype_num() bwd_values = dict() for pos in positions: bwd_values[pos] = np.zeros((K ** 2, 2)) # load c code: code_file = open(self.dir + "/loops/backwards_factorial.c") backwards_code = "\n".join(code_file.readlines()) code_file.close() # actual backwards algorithm starts here: for j, pos in enumerate(reversed(positions)): if j % 200 == 0: print "%d/%d" % (positions.get_position_index(pos), len(positions)) sys.stdout.flush() if not is_end_position(pos): next_pos = positions.get_next_position(pos) else: # the C code doesn't use any variables, we just need to fill them with something next_pos = pos block_start_position = int(subject.is_block_start(next_pos)) start_position = int(positions.is_start_position(next_pos)) panel = self.panel.get_matrix_at_position(next_pos) # TODO: rename panel to matrix here and in C code sample = subject.haplotype_at_position(next_pos) log_prob_equal, log_prob_nequal = self.get_transition_liks(next_pos) prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal) log_norm_const = logsumexp(bwd_values[next_pos]) bwd_next = np.exp(bwd_values[next_pos] - log_norm_const) bwd = bwd_values[pos] end_position = int(is_end_position(pos)) self.run_inner_loop(pos, backwards_code, locals()) if not is_end_position(pos): bwd_values[pos] = np.log(bwd_values[pos]) + log_norm_const # log_data_prob: log_data_prob = -np.inf y_states = [(i, j) for i in xrange(K) for j in xrange(K)] z_states = (0, 1), (1, 0) start_pos = positions.first_position sample = subject.haplotype_at_position(start_pos) panel = self.panel.get_matrix_at_position(start_pos) lam = self.common_arguments["lam"] for y_index, y in enumerate(y_states): for z_index, z in enumerate(z_states): # compute emission probability: h = panel[y[0]], panel[y[1]] log_emission_prob = 0 for x in (0, 1): if sample[x] == h[z[x]]: log_emission_prob += log(1 - lam) else: log_emission_prob += log(lam) log_data_prob = logaddexp( log_data_prob, bwd_values[start_pos][y_index, z_index] + log_emission_prob + log(0.5) + log(1.0 / (K ** 2)) # + log(hap_counts[y[0]]/T) # + log(hap_counts[y[1]]/T) ) if np.isnan(log_data_prob): exit("ERROR: log_data_prob=NaN") self.bwd_values = bwd_values self.bwd_log_data_prob = log_data_prob self.log_data_prob = log_data_prob