Esempio n. 1
0
def run(args):
    '''Main wrapper function for filtering VarScan2 files'''
    # Print the filters to the log
    logger.info('<FILTER>LowDPN=Normal DP < {0.min_normal_depth}'.format(args))
    logger.info('<FILTER>LowDPT=Tumor DP < {0.min_tumor_depth}'.format(args))
    logger.info('<FILTER>NAF=Normal FREQ >= {0.max_alt_freq_normal:.2f}'.format(args))
    logger.info('<FILTER>TAF=Tumor FREQ < {0.min_alt_freq_tumor:.2f}'.format(args))
    logger.info('<FILTER>SPV=SPV >= {0.pval_cutoff:.4e}'.format(args))
    logger.info('<FILTER>GPV=GPV >= {0.pval_cutoff:.4e}'.format(args))

    # Chromosome order list
    contigs    = load_contigs(args)
    chrm_order = []
    for i in contigs:
        chrm_order.append(pattern.match(i).groups()[0])

    ## New filter and info lines
    new_header = """##fileformat=VCFv4.1
##fileDate={fdate}
##source=VarScan2
##reference=file://{ref}
{contigs}
##INFO=<ID=SS,Number=1,Type=String,Description="Somatic status call SOMATIC=somatic, LOH=loss of het, GERM=germline">
##INFO=<ID=GPV,Number=1,Type=Float,Description="Variant p-value for germline events">
##INFO=<ID=SPV,Number=1,Type=Float,Description="Somatic p-value for Somatic/LOH events">
##INFO=<ID=INDEL,Number=0,Type=Flag,Description="If this is an InDel">
##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Reads supporting reference in sample">
##FORMAT=<ID=AD,Number=1,Type=Integer,Description="Reads supporting variant in sample">
##FORMAT=<ID=FREQ,Number=1,Type=Float,Description="Variant allele frequency in sample">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Concensus genotype call">
##FORMAT=<ID=DP,Number=4,Type=Integer,Description="REF reads +,REF read -, ALT reads +, ALT reads -">
##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}">
##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}">
##FILTER=<ID=NAF,Description="(SS==SOM) && (Normal FREQ >= {0.max_alt_freq_normal:.2f})">
##FILTER=<ID=TAF,Description="(SS==SOM) && (Tumor FREQ < {0.min_alt_freq_tumor:.2f})">
##FILTER=<ID=SPV,Description="(SS==SOM) && (SPV >= {0.pval_cutoff:.4e}">
##FILTER=<ID=GPV,Description="(SS==GERM) && (GPV >= {0.pval_cutoff:.4e}">
""".format(args,fdate=datetime.date.today(),ref=os.path.abspath(args.reference),
           contigs="\n".join(contigs))

    # CHROM header line
    chromline = '#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{0}\t{1}\n'.format(
                 args.normal_name, args.tumor_name)

    # Load InDels and SNVs 
    var_dict = {}
    var_dict = load_indel(args, var_dict)
    var_dict = load_snv(args, var_dict)

    # Write the new VCF file 
    with open(args.output_vcf, 'wb') as o:
        # Write header
        o.write(new_header)
        # Write CHROM line
        o.write(chromline)
        # Write results
        for c in chrm_order:
            if c in var_dict:
                for p in sorted(var_dict[c]):
                    o.write(var_dict[c][p] + '\n')
Esempio n. 2
0
def run(args):
    '''Main wrapper function for filtering SomaticSniper VCF files'''
    # Print the filters to the log
    logger.info(
        '<FILTER>LowDPN="Normal DP < {0.min_normal_depth}"'.format(args))
    logger.info('<FILTER>LowDPT="Tumor DP < {0.min_tumor_depth}"'.format(args))
    logger.info('<FILTER>LowMQT="Tumor MQ < {0.min_mapq_tumor}"'.format(args))
    logger.info(
        '<FILTER>LowMQN="Normal MQ < {0.min_mapq_normal}"'.format(args))
    logger.info('<FILTER>LowGQT="Tumor GQ < {0.min_gq_tumor}"'.format(args))
    logger.info('<FILTER>LowGQN="Normal GQ < {0.min_gq_normal}"'.format(args))
    logger.info(
        '<FILTER>LowScore="Somatic score < {0.min_somatic_score}"'.format(
            args))

    # New info and filter lines
    info = [
        '##INFO=<ID=NTYPE,Number=1,Type=String,Description="Normal type, can be REF,GERM,SOMATIC,LOH,UK">',
        '##INFO=<ID=TTYPE,Number=1,Type=String,Description="Tumor type REF,GERM,SOMATIC,LOH,UK">'
    ]
    filters = [
        '##FILTER=<ID=PASS,Description="Accept as a confident somatic mutation">',
        '##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}">'.
        format(args),
        '##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}">'.
        format(args),
        '##FILTER=<ID=LowMQT,Description="Tumor MQ < {0.min_mapq_tumor}">'.
        format(args),
        '##FILTER=<ID=LowMQN,Description="Normal MQ < {0.min_mapq_normal}">'.
        format(args),
        '##FILTER=<ID=LowGQT,Description="Tumor GQ < {0.min_gq_tumor}">'.
        format(args),
        '##FILTER=<ID=LowGQN,Description="Normal GQ < {0.min_gq_normal}">'.
        format(args),
        '##FILTER=<ID=LowScore,Description="Somatic score < {0.min_somatic_score}">'
        .format(args), '##FILTER=<ID=UK,Description="Unknown variant type">'
    ]

    # Load the contigs
    contigs = load_contigs(args)

    # Process the file
    with open(args.output_vcf, 'wb') as o:
        sniper_reader = VcfReader(args.choice, args.tumor_name,
                                  args.normal_name, args.input_vcf)
        sniper_reader.Open()
        sniper_reader.get_header()
        sniper_reader.write_new_header(o,
                                       filters=filters,
                                       info=info,
                                       contigs=contigs)
        sniper_reader.apply_filters(o, args, SniperRecord)
        sniper_reader.Close()
    logger.info('Filtered and formatted VCF file: {0}'.format(
        os.path.abspath(args.output_vcf)))
Esempio n. 3
0
 def __init__(self, args):
     self.germline    = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_germline)
     self.loh         = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_loh)
     self.somatic     = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_all_somatic)
     self.info        = []
     self.flt         = []
     self.contigs     = load_contigs(args) 
     self.other       = []
     self.chrom_order = []
     self.tumor_name  = args.tumor_name
     self.normal_name = args.normal_name
Esempio n. 4
0
def run(args):
    '''Main wrapper function for filtering Shimmer VCF files'''
    # Print the filters to the log
    logger.info('<FILTER>LowDPN=Normal DP < {0.min_normal_depth}'.format(args))
    logger.info('<FILTER>LowDPT=Tumor DP < {0.min_tumor_depth}'.format(args))
    logger.info('<FILTER>TAF=Tumor AF < {0.min_alt_freq_tumor:.3f}>'.format(args))
    logger.info('<FILTER>NAF=Normal AF >= {0.max_alt_freq_normal:.3f}>'.format(args))
    logger.info('<FILTER>LowQual=QUAL < {0.min_qual}>'.format(args))

    # New filter and format lines
    formats = ['##FORMAT=<ID=AF,Number=1,Type=Float,Description="Ratio of reads with alternate base">']
    filters = [
        '##FILTER=<ID=PASS,Description="Accept as a confident somatic mutation">',
        '##FILTER=<ID=LowDPN,Description="Normal DP < {0.min_normal_depth}">'.format(args),
        '##FILTER=<ID=LowDPT,Description="Tumor DP < {0.min_tumor_depth}">'.format(args),
        '##FILTER=<ID=TAF,Description="Tumor AF < {0.min_alt_freq_tumor:.3f}">'.format(args),
        '##FILTER=<ID=NAF,Description="Normal AF >= {0.max_alt_freq_normal:.3f}">'.format(args),
        '##FILTER=<ID=LowQual,Description="QUAL < {0.min_qual}">'.format(args)
    ]

    # Load the VarSifter dictionary
    varsifter = load_varsifter(args)

    # Load the contigs
    reffile = '##reference=file://' + os.path.abspath(args.reference)
    contigs = load_contigs(args)

    # Process the file
    with open(args.output_vcf, 'wb') as o:
        shimmer_reader = VcfReader(args.choice, args.tumor_name, args.normal_name, args.input_vcf)
        shimmer_reader.Open()
        shimmer_reader.get_header()
        shimmer_reader.write_new_header(o, filters=filters, formats=formats,
                                        refpath=reffile, contigs=contigs)
        shimmer_reader.apply_filters(o, args, ShimmerRecord, vsdict=varsifter)
        shimmer_reader.Close()