def run(args): '''Main wrapper function for filtering VarScan2 files''' # Print the filters to the log logger.info('<FILTER>LowDPN=Normal DP < {0.min_normal_depth}'.format(args)) logger.info('<FILTER>LowDPT=Tumor DP < {0.min_tumor_depth}'.format(args)) logger.info('<FILTER>NAF=Normal FREQ >= {0.max_alt_freq_normal:.2f}'.format(args)) logger.info('<FILTER>TAF=Tumor FREQ < {0.min_alt_freq_tumor:.2f}'.format(args)) logger.info('<FILTER>SPV=SPV >= {0.pval_cutoff:.4e}'.format(args)) logger.info('<FILTER>GPV=GPV >= {0.pval_cutoff:.4e}'.format(args)) # Chromosome order list contigs = load_contigs(args) chrm_order = [] for i in contigs: chrm_order.append(pattern.match(i).groups()[0]) ## New filter and info lines new_header = """##fileformat=VCFv4.1 ##fileDate={fdate} ##source=VarScan2 ##reference=file://{ref} {contigs} ##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status call SOMATIC=somatic, LOH=loss of het, GERM=germline"> ##INFO=<ID=GPV,Number=1,Type=Float,Description="Variant p-value for germline events"> ##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic p-value for Somatic/LOH events"> ##INFO=<ID=INDEL,Number=0,Type=Flag,Description="If this is an InDel"> ##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Reads supporting reference in sample"> ##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Reads supporting variant in sample"> ##FORMAT=<ID=FREQ,Number=1,Type=Float,Description="Variant allele frequency in sample"> ##FORMAT=<ID=GT,Number=1,Type=String,Description="Concensus genotype call"> ##FORMAT=<ID=DP,Number=4,Type=Integer,Description="REF reads +,REF read -, ALT reads +, ALT reads -"> ##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}"> ##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}"> ##FILTER=<ID=NAF,Description="(SS==SOM) && (Normal FREQ >= {0.max_alt_freq_normal:.2f})"> ##FILTER=<ID=TAF,Description="(SS==SOM) && (Tumor FREQ < {0.min_alt_freq_tumor:.2f})"> ##FILTER=<ID=SPV,Description="(SS==SOM) && (SPV >= {0.pval_cutoff:.4e}"> ##FILTER=<ID=GPV,Description="(SS==GERM) && (GPV >= {0.pval_cutoff:.4e}"> """.format(args,fdate=datetime.date.today(),ref=os.path.abspath(args.reference), contigs="\n".join(contigs)) # CHROM header line chromline = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{0}\t{1}\n'.format( args.normal_name, args.tumor_name) # Load InDels and SNVs var_dict = {} var_dict = load_indel(args, var_dict) var_dict = load_snv(args, var_dict) # Write the new VCF file with open(args.output_vcf, 'wb') as o: # Write header o.write(new_header) # Write CHROM line o.write(chromline) # Write results for c in chrm_order: if c in var_dict: for p in sorted(var_dict[c]): o.write(var_dict[c][p] + '\n')
def run(args): '''Main wrapper function for filtering SomaticSniper VCF files''' # Print the filters to the log logger.info( '<FILTER>LowDPN="Normal DP < {0.min_normal_depth}"'.format(args)) logger.info('<FILTER>LowDPT="Tumor DP < {0.min_tumor_depth}"'.format(args)) logger.info('<FILTER>LowMQT="Tumor MQ < {0.min_mapq_tumor}"'.format(args)) logger.info( '<FILTER>LowMQN="Normal MQ < {0.min_mapq_normal}"'.format(args)) logger.info('<FILTER>LowGQT="Tumor GQ < {0.min_gq_tumor}"'.format(args)) logger.info('<FILTER>LowGQN="Normal GQ < {0.min_gq_normal}"'.format(args)) logger.info( '<FILTER>LowScore="Somatic score < {0.min_somatic_score}"'.format( args)) # New info and filter lines info = [ '##INFO=<ID=NTYPE,Number=1,Type=String,Description="Normal type, can be REF,GERM,SOMATIC,LOH,UK">', '##INFO=<ID=TTYPE,Number=1,Type=String,Description="Tumor type REF,GERM,SOMATIC,LOH,UK">' ] filters = [ '##FILTER=<ID=PASS,Description="Accept as a confident somatic mutation">', '##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}">'. format(args), '##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}">'. format(args), '##FILTER=<ID=LowMQT,Description="Tumor MQ < {0.min_mapq_tumor}">'. format(args), '##FILTER=<ID=LowMQN,Description="Normal MQ < {0.min_mapq_normal}">'. format(args), '##FILTER=<ID=LowGQT,Description="Tumor GQ < {0.min_gq_tumor}">'. format(args), '##FILTER=<ID=LowGQN,Description="Normal GQ < {0.min_gq_normal}">'. format(args), '##FILTER=<ID=LowScore,Description="Somatic score < {0.min_somatic_score}">' .format(args), '##FILTER=<ID=UK,Description="Unknown variant type">' ] # Load the contigs contigs = load_contigs(args) # Process the file with open(args.output_vcf, 'wb') as o: sniper_reader = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_vcf) sniper_reader.Open() sniper_reader.get_header() sniper_reader.write_new_header(o, filters=filters, info=info, contigs=contigs) sniper_reader.apply_filters(o, args, SniperRecord) sniper_reader.Close() logger.info('Filtered and formatted VCF file: {0}'.format( os.path.abspath(args.output_vcf)))
def __init__(self, args): self.germline = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_germline) self.loh = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_loh) self.somatic = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_somatic) self.info = [] self.flt = [] self.contigs = load_contigs(args) self.other = [] self.chrom_order = [] self.tumor_name = args.tumor_name self.normal_name = args.normal_name
def run(args): '''Main wrapper function for filtering Shimmer VCF files''' # Print the filters to the log logger.info('<FILTER>LowDPN=Normal DP < {0.min_normal_depth}'.format(args)) logger.info('<FILTER>LowDPT=Tumor DP < {0.min_tumor_depth}'.format(args)) logger.info('<FILTER>TAF=Tumor AF < {0.min_alt_freq_tumor:.3f}>'.format(args)) logger.info('<FILTER>NAF=Normal AF >= {0.max_alt_freq_normal:.3f}>'.format(args)) logger.info('<FILTER>LowQual=QUAL < {0.min_qual}>'.format(args)) # New filter and format lines formats = ['##FORMAT=<ID=AF,Number=1,Type=Float,Description="Ratio of reads with alternate base">'] filters = [ '##FILTER=<ID=PASS,Description="Accept as a confident somatic mutation">', '##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}">'.format(args), '##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}">'.format(args), '##FILTER=<ID=TAF,Description="Tumor AF < {0.min_alt_freq_tumor:.3f}">'.format(args), '##FILTER=<ID=NAF,Description="Normal AF >= {0.max_alt_freq_normal:.3f}">'.format(args), '##FILTER=<ID=LowQual,Description="QUAL < {0.min_qual}">'.format(args) ] # Load the VarSifter dictionary varsifter = load_varsifter(args) # Load the contigs reffile = '##reference=file://' + os.path.abspath(args.reference) contigs = load_contigs(args) # Process the file with open(args.output_vcf, 'wb') as o: shimmer_reader = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_vcf) shimmer_reader.Open() shimmer_reader.get_header() shimmer_reader.write_new_header(o, filters=filters, formats=formats, refpath=reffile, contigs=contigs) shimmer_reader.apply_filters(o, args, ShimmerRecord, vsdict=varsifter) shimmer_reader.Close()