Example #1
0
    def get_posterior_joint_lik_z_only(self, pos, z, z_prev):
        subject = self.subject
        K = self.panel.get_haplotype_num()

        # load c code:
        code_file = open(self.dir + "/loops/joint_probability.c")
        probability_code = "\n".join(code_file.readlines())
        code_file.close()

        prev_pos = self.positions.get_previous_position(pos)

        fwd_const = logsumexp(self.fwd_values[prev_pos][:, z_prev])
        bwd_const = logsumexp(self.bwd_values[pos][:, z])

        fwd_prev = np.exp(self.fwd_values[prev_pos] - fwd_const)
        bwd = np.exp(self.bwd_values[pos] - bwd_const)

        panel = self.panel.get_matrix_at_position(pos)
        sample = subject.haplotype_at_position(pos)

        log_prob_equal, log_prob_nequal = self.get_transition_liks(pos)
        prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal)

        joint_prob = self.run_inner_loop(pos, probability_code, locals())

        log_joint_prob = log(joint_prob) + fwd_const + bwd_const
        return log_joint_prob - self.log_data_prob
Example #2
0
    def run_forwards_factorial(self):
        positions = self.positions
        subject = self.subject
        is_start_position = positions.is_start_position
        K = self.panel.get_haplotype_num()

        fwd_values = dict()
        fwd_prev = np.zeros((K ** 2, 2))

        # load c code:
        code_file = open(self.dir + "/loops/forwards_factorial.c")
        forwards_code = "\n".join(code_file.readlines())
        code_file.close()

        # actual forwards algorithm starts here:
        for j, pos in enumerate(positions):
            if j % 200 == 0:
                print "%d/%d" % (positions.get_position_index(pos), len(positions))
            sys.stdout.flush()

            if not is_start_position(pos):
                prev_pos = positions.get_previous_position(pos)
            else:
                # the C code doesn't use any variables, we just need to fill them with something
                prev_pos = pos

            fwd = np.zeros((K ** 2, 2))
            fwd_values[pos] = fwd

            log_norm_const = logsumexp(fwd_values[prev_pos])
            fwd_prev = np.exp(fwd_values[prev_pos] - log_norm_const)

            log_prob_equal, log_prob_nequal = self.get_transition_liks(pos)
            prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal)

            block_start_position = int(subject.is_block_start(pos))
            start_position = int(positions.is_start_position(pos))
            panel = self.panel.get_matrix_at_position(pos)
            sample = subject.haplotype_at_position(pos)

            self.run_inner_loop(pos, forwards_code, locals())

            if not is_start_position(pos):
                fwd_values[pos] = np.log(fwd_values[pos]) + log_norm_const

        last_pos = positions.last_position
        log_data_prob = logsumexp(fwd_values[last_pos])

        self.fwd_values = fwd_values
        self.fwd_log_data_prob = log_data_prob
Example #3
0
 def get_posterior_lik_z_only(self, pos, z_index):
     return logsumexp(self.fwd_values[pos][:, z_index] + self.bwd_values[pos][:, z_index] - self.log_data_prob)
Example #4
0
    def run_backwards_factorial(self):
        positions = self.positions
        subject = self.subject
        is_end_position = positions.is_end_position
        K = self.panel.get_haplotype_num()

        bwd_values = dict()
        for pos in positions:
            bwd_values[pos] = np.zeros((K ** 2, 2))

        # load c code:
        code_file = open(self.dir + "/loops/backwards_factorial.c")
        backwards_code = "\n".join(code_file.readlines())
        code_file.close()

        # actual backwards algorithm starts here:
        for j, pos in enumerate(reversed(positions)):
            if j % 200 == 0:
                print "%d/%d" % (positions.get_position_index(pos), len(positions))
            sys.stdout.flush()

            if not is_end_position(pos):
                next_pos = positions.get_next_position(pos)
            else:
                # the C code doesn't use any variables, we just need to fill them with something
                next_pos = pos

            block_start_position = int(subject.is_block_start(next_pos))
            start_position = int(positions.is_start_position(next_pos))
            panel = self.panel.get_matrix_at_position(next_pos)  # TODO: rename panel to matrix here and in C code
            sample = subject.haplotype_at_position(next_pos)

            log_prob_equal, log_prob_nequal = self.get_transition_liks(next_pos)
            prob_equal, prob_nequal = exp(log_prob_equal), exp(log_prob_nequal)

            log_norm_const = logsumexp(bwd_values[next_pos])
            bwd_next = np.exp(bwd_values[next_pos] - log_norm_const)

            bwd = bwd_values[pos]
            end_position = int(is_end_position(pos))

            self.run_inner_loop(pos, backwards_code, locals())

            if not is_end_position(pos):
                bwd_values[pos] = np.log(bwd_values[pos]) + log_norm_const

        # log_data_prob:
        log_data_prob = -np.inf
        y_states = [(i, j) for i in xrange(K) for j in xrange(K)]
        z_states = (0, 1), (1, 0)

        start_pos = positions.first_position
        sample = subject.haplotype_at_position(start_pos)
        panel = self.panel.get_matrix_at_position(start_pos)
        lam = self.common_arguments["lam"]

        for y_index, y in enumerate(y_states):
            for z_index, z in enumerate(z_states):

                # compute emission probability:
                h = panel[y[0]], panel[y[1]]
                log_emission_prob = 0

                for x in (0, 1):
                    if sample[x] == h[z[x]]:
                        log_emission_prob += log(1 - lam)
                    else:
                        log_emission_prob += log(lam)

                log_data_prob = logaddexp(
                    log_data_prob,
                    bwd_values[start_pos][y_index, z_index] + log_emission_prob + log(0.5) + log(1.0 / (K ** 2))
                    # + log(hap_counts[y[0]]/T)
                    # + log(hap_counts[y[1]]/T)
                )

                if np.isnan(log_data_prob):
                    exit("ERROR: log_data_prob=NaN")

        self.bwd_values = bwd_values
        self.bwd_log_data_prob = log_data_prob
        self.log_data_prob = log_data_prob