예제 #1
0
    def seq_err(self, member):
        """
        Calculate the probability of sequencing error. Assume each chromosome
        is equally-likely to be sequenced.

        The probability is drawn from a Dirichlet multinomial distribution:
        This is a point of divergence from the Cartwright et al. paper
        mentioned in the other functions.

        When the Dirichlet multinomial is called, the max element is stored in
        max_elems, so that the scaling of the probability matrix can be
        manipulated later.

        Args:
            member: Integer representing index of the read counts for a
                family member in the trio model.

        Returns:
            1 x 16 probability vector that needs to be multiplied by a
            transition matrix.
        """
        # TODO: add bias when alpha freq are added
        alpha_mat = ut.get_alphas(self.seq_err_rate) * self.dm_disp

        prob_mat = np.zeros((ut.GENOTYPE_COUNT))
        for i, alpha in enumerate(alpha_mat):
            log_proba = ut.dirichlet_multinomial(alpha, self.reads[member])
            prob_mat[i] = log_proba

        prob_mat_rescaled, max_elem = ut.normalspace(prob_mat)
        self.max_elems.append(max_elem)

        return prob_mat_rescaled
예제 #2
0
    def seq_err(self, member):
        """
        Calculate the probability of sequencing error. Assume each chromosome
        is equally-likely to be sequenced.

        The probability is drawn from a Dirichlet multinomial distribution:
        This is a point of divergence from the Cartwright et al. paper
        mentioned in the other functions.

        When the Dirichlet multinomial is called, the max element is stored in
        max_elems, so that the scaling of the probability matrix can be
        manipulated later.

        Args:
            member: Integer representing index of the read counts for a
                family member in the trio model.

        Returns:
            1 x 16 probability vector that needs to be multiplied by a
            transition matrix.
        """
        # TODO: add bias when alpha freq are added
        alpha_mat = ut.get_alphas(self.seq_err_rate) * self.dm_disp

        prob_mat = np.zeros((ut.GENOTYPE_COUNT))
        for i, alpha in enumerate(alpha_mat):
            log_proba = ut.dirichlet_multinomial(alpha, self.reads[member])
            prob_mat[i] = log_proba

        prob_mat_rescaled, max_elem = ut.normalspace(prob_mat)
        self.max_elems.append(max_elem)

        return prob_mat_rescaled
예제 #3
0
    def dm_sample(self, soma_idx):
        """
        Use alpha frequencies based on the somatic genotype to select
        nucleotide frequencies and use these frequencies to draw sequencing
        reads at a specified coverage (Dirichlet multinomial).

        Args:
            soma_idx: Index of somatic genotype to get the appropriate alpha
            frequencies.

        Returns:
            Array containing read counts [#A, #C, #G, #T].
        """
        alpha_mat = (ut.get_alphas(self.trio_model.seq_err_rate) *
            self.trio_model.dm_disp)
        alpha = np.random.dirichlet(alpha_mat[soma_idx])
        return np.random.multinomial(self.cov, alpha)
예제 #4
0
    def dm_sample(self, soma_idx):
        """
        Use alpha frequencies based on the somatic genotype to select
        nucleotide frequencies and use these frequencies to draw sequencing
        reads at a specified coverage (Dirichlet multinomial).

        Args:
            soma_idx: Index of somatic genotype to get the appropriate alpha
            frequencies.

        Returns:
            Array containing read counts [#A, #C, #G, #T].
        """
        alpha_mat = (ut.get_alphas(self.trio_model.seq_err_rate) *
                     self.trio_model.dm_disp)
        alpha = np.random.dirichlet(alpha_mat[soma_idx])
        return np.random.multinomial(self.cov, alpha)