def test_get_de_novo_counts_loss_of_function(self):
        """ check the de novo counts for loss-of-function consequences
        """

        # check all the loss-of-function-equivalent SNV VEP consequences
        snv = [
            "stop_gained", "splice_acceptor_variant", "splice_donor_variant",
            "initiator_codon_variant", "start_lost",
            "conserved_exon_terminus_variant"
        ]

        self.expected["lof_snv"] = 1
        for cq in snv:
            self.de_novos["consequence"] = cq
            computed = get_de_novo_counts(self.de_novos)
            self.compare_tables(computed, self.expected)

        # check all the loss-of-function-equivalent inframe insertion and
        # deletion VEP consequences
        indel = ["frameshift_variant"]

        self.expected["lof_snv"] = 0
        self.expected["lof_indel"] = 1
        self.de_novos["ref"] = "GG"
        for cq in indel:
            self.de_novos["consequence"] = cq
            computed = get_de_novo_counts(self.de_novos)
            self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_loss_of_function(self):
     """ check the de novo counts for loss-of-function consequences
     """
     
     # check all the loss-of-function-equivalent SNV VEP consequences
     snv = ["stop_gained", "splice_acceptor_variant", "splice_donor_variant",
          "initiator_codon_variant", "start_lost",
         "conserved_exon_terminus_variant"]
     
     self.expected["lof_snv"] = 1
     for cq in snv:
         self.de_novos["consequence"] = cq
         computed = get_de_novo_counts(self.de_novos)
         self.compare_tables(computed, self.expected)
     
     # check all the loss-of-function-equivalent inframe insertion and
     # deletion VEP consequences
     indel = ["frameshift_variant"]
     
     self.expected["lof_snv"] = 0
     self.expected["lof_indel"] = 1
     self.de_novos["ref"] = "GG"
     for cq in indel:
         self.de_novos["consequence"] = cq
         computed = get_de_novo_counts(self.de_novos)
         self.compare_tables(computed, self.expected)
    def test_get_de_novo_counts_missense(self):
        """ check the de novo counts for missense-equivalent consequences
        """

        # check all the possible missense-equivalent SNV VEP consequences
        snv = [
            "missense_variant", "stop_lost", "coding_sequence_variant",
            "protein_altering_variant"
        ]

        self.expected["missense_snv"] = 1
        for cq in snv:
            self.de_novos["consequence"] = cq
            computed = get_de_novo_counts(self.de_novos)
            self.compare_tables(computed, self.expected)

        # check all the possible missense-equivalent inframe insertion and
        # deletion VEP consequences
        indel = ["inframe_deletion", "inframe_insertion"]

        self.expected["missense_snv"] = 0
        self.expected["missense_indel"] = 1
        self.de_novos["ref"] = "GG"
        for cq in indel:
            self.de_novos["consequence"] = cq
            computed = get_de_novo_counts(self.de_novos)
            self.compare_tables(computed, self.expected)
    def test_get_de_novo_counts_multiple(self):
        """ check the de novo counts for multiple variants
        """

        # add another variant to the dataframe
        new_row = self.de_novos.copy()
        self.de_novos = self.de_novos.append(new_row, ignore_index=True)

        # counting two of the same variant give a count of 2
        self.expected["missense_snv"] = 2
        self.de_novos["consequence"] = "missense_variant"
        computed = get_de_novo_counts(self.de_novos)
        self.compare_tables(computed, self.expected)

        # counting two different variant types gives two counts of 1
        self.expected["missense_snv"] = 1
        self.expected["lof_snv"] = 1
        self.de_novos.loc[0, "consequence"] = "stop_gained"
        computed = get_de_novo_counts(self.de_novos)
        self.compare_tables(computed, self.expected)

        # now check when we have two variants in different genes
        new_row = self.expected.copy()
        self.expected = self.expected.append(new_row, ignore_index=True)
        self.de_novos.loc[0, "hgnc"] = "ARID1B"
        self.expected.loc[0, "hgnc"] = "ARID1B"
        self.expected.loc[0, "missense_snv"] = 0
        self.expected.loc[1, "lof_snv"] = 0
        computed = get_de_novo_counts(self.de_novos)
        self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_multiple(self):
     """ check the de novo counts for multiple variants
     """
     
     # add another variant to the dataframe
     new_row = self.de_novos.copy()
     self.de_novos = self.de_novos.append(new_row, ignore_index=True)
     
     # counting two of the same variant give a count of 2
     self.expected["missense_snv"] = 2
     self.de_novos["consequence"] = "missense_variant"
     computed = get_de_novo_counts(self.de_novos)
     self.compare_tables(computed, self.expected)
     
     # counting two different variant types gives two counts of 1
     self.expected["missense_snv"] = 1
     self.expected["lof_snv"] = 1
     self.de_novos.loc[0, "consequence"] = "stop_gained"
     computed = get_de_novo_counts(self.de_novos)
     self.compare_tables(computed, self.expected)
     
     # now check when we have two variants in different genes
     new_row = self.expected.copy()
     self.expected = self.expected.append(new_row, ignore_index=True)
     self.de_novos.loc[0, "hgnc"] = "ARID1B"
     self.expected.loc[0, "hgnc"] = "ARID1B"
     self.expected.loc[0, "missense_snv"] = 0
     self.expected.loc[1, "lof_snv"] = 0
     computed = get_de_novo_counts(self.de_novos)
     self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_missense(self):
     """ check the de novo counts for missense-equivalent consequences
     """
     
     # check all the possible missense-equivalent SNV VEP consequences
     snv = ["missense_variant", "stop_lost", "coding_sequence_variant",
         "protein_altering_variant"]
     
     self.expected["missense_snv"] = 1
     for cq in snv:
         self.de_novos["consequence"] = cq
         computed = get_de_novo_counts(self.de_novos)
         self.compare_tables(computed, self.expected)
     
     # check all the possible missense-equivalent inframe insertion and
     # deletion VEP consequences
     indel = ["inframe_deletion", "inframe_insertion"]
     
     self.expected["missense_snv"] = 0
     self.expected["missense_indel"] = 1
     self.de_novos["ref"] = "GG"
     for cq in indel:
         self.de_novos["consequence"] = cq
         computed = get_de_novo_counts(self.de_novos)
         self.compare_tables(computed, self.expected)
def check_enrichment(constraint, de_novos, cache_dir, male, female, threshold,
                     ratio):
    rates = get_rates_by_constraint(constraint, cache_dir, threshold, ratio)

    constrained_exp = get_expected_mutations(
        prepare_rates(rates['constrained']), male, female)
    unconstrained_exp = get_expected_mutations(
        prepare_rates(rates['unconstrained']), male, female)

    in_constraint = classify_de_novos_by_constraint(constraint, de_novos,
                                                    cache_dir, threshold,
                                                    ratio)

    constrained_obs = get_de_novo_counts(de_novos[in_constraint])
    unconstrained_obs = get_de_novo_counts(
        de_novos[[not x for x in in_constraint]])

    constrained_enrich = enrichment(constrained_obs, constrained_exp)
    unconstrained_enrich = enrichment(unconstrained_obs, unconstrained_exp)

    ptv_diff = compare_regions(constrained_enrich, unconstrained_enrich, 'PTV')
    pav_diff = compare_regions(constrained_enrich, unconstrained_enrich, 'PAV')

    return {
        'constrained_enrich': constrained_enrich,
        'unconstrained_enrich': unconstrained_enrich,
        'PTV diff': ptv_diff,
        'PAV diff': pav_diff
    }
Beispiel #8
0
def analyse_enrichment(de_novos, trios, rates=None, plot_path=None):
    """ analyse whether de novo mutations are enriched in genes
    
    Args:
        de_novos: data frame containing all the observed de novos for all the
            genes
        trios: dictionary of male and female proband counts in the population
        plot_path: path to save enrichment plots to, or None
        rates: gene-based mutation rates data frame, or None
    
    Returns:
        data frame containing results from testing for enrichment of de
        in each gene with de novos in it.
    """

    observed = get_de_novo_counts(de_novos)
    expected = get_expected_mutations(rates, trios["male"], trios["female"])

    # calculate p values for each gene using the mutation rates
    enrichment = gene_enrichment(expected, observed)

    # make a manhattan plot of enrichment P values
    if plot_path is not None:
        num_tests = 18500
        plot_enrichment(enrichment,
                        num_tests,
                        plot_path,
                        p_columns=["p_lof", "p_func"])

    # remove the position column (which is only used to be able to locate the
    # gene's position on a chromosome on a Manhattan plot).
    del enrichment["start_pos"]

    return enrichment
Beispiel #9
0
def analyse_enrichment(de_novos, trios, rates=None, plot_path=None):
    """ analyse whether de novo mutations are enriched in genes
    
    Args:
        de_novos: data frame containing all the observed de novos for all the
            genes
        trios: dictionary of male and female proband counts in the population
        plot_path: path to save enrichment plots to, or None
        rates: gene-based mutation rates data frame, or None
    
    Returns:
        data frame containing results from testing for enrichment of de
        in each gene with de novos in it.
    """
    
    observed = get_de_novo_counts(de_novos)
    expected = get_expected_mutations(rates, trios["male"], trios["female"])
    
    # calculate p values for each gene using the mutation rates
    enrichment = gene_enrichment(expected, observed)
    
    # make a manhattan plot of enrichment P values
    if plot_path is not None:
        num_tests = 18500
        plot_enrichment(enrichment, num_tests, plot_path, p_columns=["p_lof", "p_func"])
    
    # remove the position column (which is only used to be able to locate the
    # gene's position on a chromosome on a Manhattan plot).
    del enrichment["start_pos"]
    
    return enrichment
    def test_get_de_novo_counts_synonymous(self):
        """ check the de novo counts for synonymous consequences
        """

        self.expected["synonymous_snv"] = 1
        self.de_novos["consequence"] = 'synonymous_variant'
        computed = get_de_novo_counts(self.de_novos)
        self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_synonymous(self):
     """ check the de novo counts for synonymous consequences
     """
     
     self.expected["synonymous_snv"] = 1
     self.de_novos["consequence"] = 'synonymous_variant'
     computed = get_de_novo_counts(self.de_novos)
     self.compare_tables(computed, self.expected)
    def test_get_de_novo_counts_type_included(self):
        """ check the de novo counts when a type column is included
        """

        # set the variant type before counting
        self.de_novos["type"] = "snv"
        self.expected["missense_snv"] = 1
        computed = get_de_novo_counts(self.de_novos)
        self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_type_included(self):
     """ check the de novo counts when a type column is included
     """
     
     # set the variant type before counting
     self.de_novos["type"] = "snv"
     self.expected["missense_snv"] = 1
     computed = get_de_novo_counts(self.de_novos)
     self.compare_tables(computed, self.expected)
    def test_get_de_novo_counts_nonfunctional(self):
        """ check the de novo counts for nonfunctional consequence types
        """

        snv = ["transcript_ablation", "transcript_amplification", \
            "incomplete_terminal_codon_variant", "stop_retained_variant", \
            "mature_miRNA_variant", \
            "5_prime_UTR_variant", "3_prime_UTR_variant", \
            "non_coding_transcript_exon_variant", "intron_variant", \
            "NMD_transcript_variant", "non_coding_transcript_variant", \
            "upstream_gene_variant", "downstream_gene_variant", \
            "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \
            "regulatory_region_ablation", "regulatory_region_amplification", \
            "feature_elongation", "regulatory_region_variant", \
            "feature_truncation", "intergenic_variant"]

        if pandas.__version__ < '0.18.0':
            # a dataframe with only nonfunctional consequences should raise an
            # error, due to difficulties in counting rows
            for cq in snv:
                self.de_novos["consequence"] = cq
                with self.assertRaises(KeyError):
                    computed = get_de_novo_counts(self.de_novos)
        else:
            expected = DataFrame(columns=[
                'hgnc', 'chrom', 'start_pos', 'lof_indel', 'lof_snv',
                'missense_indel', 'missense_snv', 'synonymous_snv'
            ])
            for cq in snv:
                self.de_novos["consequence"] = cq
                computed = get_de_novo_counts(self.de_novos)
                self.compare_tables(computed, expected)

        # add a functional variant to the dataframe, so that the counting
        # doesn't raise an error
        new_row = self.de_novos.copy()
        new_row["consequence"] = "missense_variant"
        self.de_novos = self.de_novos.append(new_row, ignore_index=True)
        self.expected["missense_snv"] = 1

        for cq in snv:
            self.de_novos.loc[0, "consequence"] = cq
            computed = get_de_novo_counts(self.de_novos)
            self.compare_tables(computed, self.expected)
 def test_get_de_novo_counts_nonfunctional(self):
     """ check the de novo counts for nonfunctional consequence types
     """
     
     snv = ["transcript_ablation", "transcript_amplification", \
         "incomplete_terminal_codon_variant", "stop_retained_variant", \
         "mature_miRNA_variant", \
         "5_prime_UTR_variant", "3_prime_UTR_variant", \
         "non_coding_transcript_exon_variant", "intron_variant", \
         "NMD_transcript_variant", "non_coding_transcript_variant", \
         "upstream_gene_variant", "downstream_gene_variant", \
         "TFBS_ablation", "TFBS_amplification", "TF_binding_site_variant", \
         "regulatory_region_ablation", "regulatory_region_amplification", \
         "feature_elongation", "regulatory_region_variant", \
         "feature_truncation", "intergenic_variant"]
     
     if pandas.__version__ < '0.18.0':
         # a dataframe with only nonfunctional consequences should raise an
         # error, due to difficulties in counting rows
         for cq in snv:
             self.de_novos["consequence"] = cq
             with self.assertRaises(KeyError):
                 computed = get_de_novo_counts(self.de_novos)
     else:
         expected = DataFrame(columns=['hgnc', 'chrom', 'start_pos',
             'lof_indel', 'lof_snv', 'missense_indel', 'missense_snv',
             'synonymous_snv'])
         for cq in snv:
             self.de_novos["consequence"] = cq
             computed = get_de_novo_counts(self.de_novos)
             self.compare_tables(computed, expected)
     
     # add a functional variant to the dataframe, so that the counting
     # doesn't raise an error
     new_row = self.de_novos.copy()
     new_row["consequence"] = "missense_variant"
     self.de_novos = self.de_novos.append(new_row, ignore_index=True)
     self.expected["missense_snv"] = 1
     
     for cq in snv:
         self.de_novos.loc[0, "consequence"] = cq
         computed = get_de_novo_counts(self.de_novos)
         self.compare_tables(computed, self.expected)