def test_check_coding(self):
     ''' check that check_coding works correctly
     '''
     
     # checking whether a site is coding or not works correctly
     self.variants['consequence'] = ['synonymous_variant', 'intergenic_variant']
     status = check_coding(self.variants)
     self.assertTrue(all(status == Series([True, False])))
     
     # make sure we can easily use a different column name
     self.variants = self.variants.drop('consequence', axis=1)
     self.variants['cq'] = ['synonymous_variant', 'intergenic_variant']
     status = check_coding(self.variants, cq_name='cq')
     self.assertTrue(all(status == Series([True, False])))
     
     # raise an error if we try a missing column.
     with self.assertRaises(KeyError):
         check_coding(self.variants, cq_name='UNKNOWN')
Beispiel #2
0
def main():
    args = get_options()
    
    # set a blank dataframe
    de_novos = pandas.DataFrame(columns=["person_stable_id", "chrom", "pos",
        "ref", "alt", "symbol", "consequence", "max_af", "pp_dnm"])
    de_novos['pos'] = de_novos['pos'].astype(int)
    
    denovogear = screen_candidates(args.de_novos, args.sample_fails,
        filter_denovogear_sites, maf=0.01, fix_symbols=args.fix_missing_genes,
        annotate_only=args.annotate_only, build=args.build)
    
    indels = screen_candidates(args.de_novos_indels, args.sample_fails_indels,
        filter_missing_indels, maf=0.0001, fix_symbols=args.fix_missing_genes,
        annotate_only=args.annotate_only, build=args.build)
    
    de_novos = de_novos.append(denovogear, ignore_index=True)
    de_novos = de_novos.append(indels, ignore_index=True)
    
    if not args.include_noncoding and not args.annotate_only:
        de_novos = de_novos[check_coding(de_novos)]
    
    if args.last_base_sites is not None:
        de_novos = change_conserved_last_base_consequence(de_novos, args.last_base_sites)
    
    # include sex, to later check if chrX candidates are likely pathogenic.
    families = pandas.read_table(args.families, sep='\t')
    sex = dict(zip(families['individual_id'], families['sex']))
    de_novos['sex'] = de_novos['person_stable_id'].map(sex)
    
    if not args.include_recurrent:
        family_ids = dict(zip(families['individual_id'], families['family_id']))
        independent = check_independence(de_novos, family_ids)
        
        if args.annotate_only:
            de_novos['pass'] = de_novos['pass'] & independent
        else:
            de_novos = de_novos[independent]
    
    ids = ['DDDP123847', 'DDDP138759', 'DDDP135949', 'DDDP100238', 'DDDP125725', 'DDDP118316']
    de_novos = de_novos[~de_novos.person_stable_id.isin(ids)]
    
    de_novos.to_csv(args.output, sep= "\t", index=False, na_rep='NA')
def main():
    args = get_options()
    
    # set a blank dataframe
    de_novos = pandas.DataFrame(columns=["person_stable_id", "chrom", "pos",
        "ref", "alt", "symbol", "consequence", "max_af", "pp_dnm"])
    
    denovogear = screen_candidates(args.de_novos, args.sample_fails,
        filter_denovogear_sites, maf=0.01, fix_symbols=args.fix_missing_genes,
        annotate_only=args.annotate_only)
    
    indels = screen_candidates(args.de_novos_indels, args.sample_fails_indels,
        filter_missing_indels, maf=0.0, fix_symbols=args.fix_missing_genes,
        annotate_only=args.annotate_only)
    
    de_novos = de_novos.append(denovogear, ignore_index=True)
    de_novos = de_novos.append(indels, ignore_index=True)
    
    if not args.include_noncoding and not args.annotate_only:
        de_novos = de_novos[check_coding(de_novos)]
    
    if args.last_base_sites is not None:
        de_novos = change_conserved_last_base_consequence(de_novos, args.last_base_sites)
    
    # include sex, to later check if chrX candidates are likely pathogenic.
    families = pandas.read_table(args.families, sep='\t')
    sex = dict(zip(families['individual_id'], families['sex']))
    de_novos['sex'] = de_novos['person_stable_id'].map(sex)
    
    if not args.include_recurrent:
        family_ids = dict(zip(families['individual_id'], families['family_id']))
        independent = check_independence(de_novos, family_ids)
        
        if args.annotate_only:
            de_novos['pass'] = de_novos['pass'] & independent
        else:
            de_novos = de_novos[independent]
    
    de_novos.to_csv(args.output, sep= "\t", index=False, na_value='NA')