def analyse_gene(ensembl, mut_dict, cadd, symbol, de_novos, constraint,
                 weights):
    ''' analyse the severity of de novos found in a gene
    
    Args:
        ensembl: EnsemblRequest object, for transcript coordinates and sequence
        mut_dict: list of sequence-context mutation probabilities.
        cadd: pysam.TabixFile object for CADD scores (SNVs only)
        symbol: HGNC symbol for current gene
        de_novos: list of de novo mutations observed in current gene. Each entry
            is a dict with 'position', 'ref', 'alt', and 'consequence' keys.
        weights: dictionary of objects to weight CADD severity scores. We have
            different weights for protein-truncating and protein-altering
            variants, and within the protein-altering variants, different
            weights for variants in constrained and unconstrained regions.
    
    Returns:
        p-value for the observed total severity with respect to a null
        distribution of severities for the gene.
    '''

    sites = [x['pos'] for x in de_novos]
    try:
        # create gene/transcript for de novo mutations
        transcripts = load_gene(ensembl, symbol, sites)
    except IndexError:
        return 'NA'

    # get per site/allele mutation rates
    rates_by_cq = get_site_sampler(transcripts, mut_dict)

    chrom = transcripts[0].get_chrom()

    # get per site/allele severity scores, weighted by enrichment of missense
    # in known dominant at different severity thresholds
    constrained = get_constrained_positions(ensembl, constraint, symbol)
    severity = get_severity(cadd, chrom, rates_by_cq, weights, constrained)

    # convert the rates per site per consequence to rates per site
    rates = WeightedChoice()
    for cq in sorted(rates_by_cq):
        rates.append(rates_by_cq[cq])

    # get summed score for observed de novos
    observed = sum((get_severity(cadd, chrom, de_novos, weights, constrained)))

    # simulate distribution of summed scores within transcript
    return analyse(rates, severity, observed, len(de_novos), 1000000)
Example #2
0
    def test_append(self):
        """ test that append() works correctly
        """

        # construct two objects
        a = WeightedChoice()
        a.add_choice(1, 0.5)

        b = WeightedChoice()
        b.add_choice(2, 1)

        # add one object to the other
        a.append(b)

        # check that the first object has changed correctly, but the other
        # remains unchanged
        self.assertEqual(a.get_summed_rate(), 1.5)
        self.assertEqual(b.get_summed_rate(), 1.0)
 def test_append(self):
     """ test that append() works correctly
     """
     
     # construct two objects
     a = WeightedChoice()
     a.add_choice(1, 0.5)
     
     b = WeightedChoice()
     b.add_choice(2, 1)
     
     # add one object to the other
     a.append(b)
     
     # check that the first object has changed correctly, but the other
     # remains unchanged
     self.assertEqual(a.get_summed_rate(), 1.5)
     self.assertEqual(b.get_summed_rate(), 1.0)