Ejemplo n.º 1
0
def filter_missing_indels(candidates, *args):
    """ filter the candidate missing indels.
    
    We have a set of sites that have been called in the child, but not in the
    parents. These are the candidates for de novo mutations. Many of these sites
    have been checked by denovogear, for which we have a different filtering
    process. This function filters candidate sites which have not been examined
    by denovogear. Denovogear has a reduced sensitivity for indels, so this
    filtering only examines candidate indel sites not examined by denovogear.
    
    Args:
        candidates: pandas dataframe of de novo indel sites
    
    Returns:
        dataframe of candidate sites that pass the required criteria.
    """
    
    counts = extract_alt_and_ref_counts(candidates)
    depths = get_depths_and_proportions(counts)
    
    depths["min_parent_depth"] = depths[["mom_depth", "dad_depth"]].min(axis=1)
    depths["max_parent_proportion"] = depths[["mom_prp", "dad_prp"]].max(axis=1)
    
    # apply the filtering criteria for the missing indels
    good_depth = depths["child_alts"] > 2
    low_parental_alt = counts["min_parent_alt"] < 2
    good_parental_depth = depths["min_parent_depth"] > 7
    good_parental_proportion = depths["max_parent_proportion"] < 0.1
    good_child_proportion = depths["child_prp"] > 0.2
    
    return good_depth & low_parental_alt & good_parental_depth & \
        good_parental_proportion & good_child_proportion
Ejemplo n.º 2
0
 def test_get_depths_and_proportions(self):
     ''' check that counting the depths and proportions works correctly
     '''
     
     expected = DataFrame({
         'child_depth': [90, 60], 'dad_depth': [61, 61], 'mom_depth': [36, 40],
         'child_alts': [55, 30], 'dad_alts': [1, 1], 'mom_alts': [1, 0],
         'child_prp': [55/90.0, 30/60.0], 'dad_prp': [1/61.0, 1/61.0],
         'mom_prp': [1/36.0, 0/40.0]}, index=self.counts.index)
     
     self.compare_tables(get_depths_and_proportions(self.counts), expected)
Ejemplo n.º 3
0
    def test_get_depths_and_proportions(self):
        ''' check that counting the depths and proportions works correctly
        '''

        expected = DataFrame(
            {
                'child_depth': [90, 60],
                'dad_depth': [61, 61],
                'mom_depth': [36, 40],
                'child_alts': [55, 30],
                'dad_alts': [1, 1],
                'mom_alts': [1, 0],
                'child_prp': [55 / 90.0, 30 / 60.0],
                'dad_prp': [1 / 61.0, 1 / 61.0],
                'mom_prp': [1 / 36.0, 0 / 40.0]
            },
            index=self.counts.index)

        self.compare_tables(get_depths_and_proportions(self.counts), expected)