def filter_missing_indels(candidates, *args): """ filter the candidate missing indels. We have a set of sites that have been called in the child, but not in the parents. These are the candidates for de novo mutations. Many of these sites have been checked by denovogear, for which we have a different filtering process. This function filters candidate sites which have not been examined by denovogear. Denovogear has a reduced sensitivity for indels, so this filtering only examines candidate indel sites not examined by denovogear. Args: candidates: pandas dataframe of de novo indel sites Returns: dataframe of candidate sites that pass the required criteria. """ counts = extract_alt_and_ref_counts(candidates) depths = get_depths_and_proportions(counts) depths["min_parent_depth"] = depths[["mom_depth", "dad_depth"]].min(axis=1) depths["max_parent_proportion"] = depths[["mom_prp", "dad_prp"]].max(axis=1) # apply the filtering criteria for the missing indels good_depth = depths["child_alts"] > 2 low_parental_alt = counts["min_parent_alt"] < 2 good_parental_depth = depths["min_parent_depth"] > 7 good_parental_proportion = depths["max_parent_proportion"] < 0.1 good_child_proportion = depths["child_prp"] > 0.2 return good_depth & low_parental_alt & good_parental_depth & \ good_parental_proportion & good_child_proportion
def test_get_depths_and_proportions(self): ''' check that counting the depths and proportions works correctly ''' expected = DataFrame({ 'child_depth': [90, 60], 'dad_depth': [61, 61], 'mom_depth': [36, 40], 'child_alts': [55, 30], 'dad_alts': [1, 1], 'mom_alts': [1, 0], 'child_prp': [55/90.0, 30/60.0], 'dad_prp': [1/61.0, 1/61.0], 'mom_prp': [1/36.0, 0/40.0]}, index=self.counts.index) self.compare_tables(get_depths_and_proportions(self.counts), expected)
def test_get_depths_and_proportions(self): ''' check that counting the depths and proportions works correctly ''' expected = DataFrame( { 'child_depth': [90, 60], 'dad_depth': [61, 61], 'mom_depth': [36, 40], 'child_alts': [55, 30], 'dad_alts': [1, 1], 'mom_alts': [1, 0], 'child_prp': [55 / 90.0, 30 / 60.0], 'dad_prp': [1 / 61.0, 1 / 61.0], 'mom_prp': [1 / 36.0, 0 / 40.0] }, index=self.counts.index) self.compare_tables(get_depths_and_proportions(self.counts), expected)