Beispiel #1
0
def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None)
    parser.add_option("--addchr", action="store_true", dest="addchr",  help="pre-pend 'chr' to chrom column ", default=False)
    parser.add_option("--siteinfo", action="store_true", dest="siteinfo", help="use if vcf only has site information and lacks FORMAT column")
    parser.add_option("--dump", action="store_true", dest="dump", help="dump everything after teh ID column in the 4th bed column")
    parser.add_option("--chr", type="string", dest="chr", default=None, help="restrct to chromosome number specified by --chr")
    (options, args)=parser.parse_args()

    vcfilename=args[0]
    #basename, extension = os.path.splitext(vcfilename)
    #bedfile=basename+".bed"
    #bedfh=open(bedfile,'w')
    vcfh=open(vcfilename,'r')
    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)
    #parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaAndHeaderLines(vcfh)

    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        fields=dataline.strip().split('\t')
        if options.siteinfo == True:
            (chrom,pos,id,ref,alt,qual,filtercode,info)=fields[0:8]
        else:
            (chrom,pos,id,ref,alt,qual,filtercode,info,format)=fields[0:9]
        if options.chr != None and chrom != options.chr: continue
        if options.addchr ==True:
            chrom='chr'+chrom
        if filtercode != options.filter and options.filter != None : continue
        (start,end) = (int(pos)-1, int(pos))
        if options.dump == True:
            # @type options
            if options.siteinfo == True:
                gstrings=",".join(fields[8::])
            else:
                gstrings=",".join(fields[9::])
            dumpstring="".join([ref,alt,qual,filtercode,info,gstrings])
            bedstring= "\t".join( [ chrom, str(start), str(end), id ,dumpstring] )
        else:
            bedstring= "\t".join( [ chrom, str(start), str(end), id] )

        print bedstring
Beispiel #2
0
def main():
    
    usage = "usage: %prog [options] vcf_file_one vcf|bed_file_two\n\nFind regions in the first vcf file that overlap regions of the second vcf or bed file\n"
    parser = OptionParser(usage)
    parser.add_option("--minCols", type="int", dest="mincols", default=1, help="mininum basepair overlap (default is one)")
    parser.add_option("--v", action="store_true", dest="reverse",  help="Print regions in first vcf  that DO NOT overlap second vcf|bed file")
    parser.add_option("--filter", type="string", dest="filter", default=None, help="intersect records only set with filter (default is None")
    parser.add_option("--info", type="string", dest="infotag", help="INFO tag id that annotates what type of variant the VCF record is", default="TYPE")
    parser.add_option("--type", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None)
    parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file one  has no header line", default=False)
    parser.add_option("--nochrprefix", action="store_false", dest="chrprefix", help="use if the bed  doesn't  have chr prefix in chrom column", default=True)
    
    (options, args)=parser.parse_args()

    sys.stderr.write("intersecting two files ...\n")
    
    vcf_file_one=args[0]
    in2_fname=args[1]

    in2_fname_ext= os.path.splitext(in2_fname)[1][1:]
   
    if "bed" == in2_fname_ext:
       
        bitsets = binned_bitsets_from_file( open( in2_fname ) )

    if "vcf" ==  in2_fname_ext:
         bitsets = binned_bitsets_from_vcffile( in2_fname , options.filter)

   
    vcfobj=VcfFile(vcf_file_one)
    vcfh=open(vcf_file_one,'r')

    if options.noheader == False:
        vcfobj.parseMetaAndHeaderLines(vcfh)
        header=vcfobj.returnHeader()
        #print header
        #vcfobj.parseMetaAndHeaderLines(vcfh)
    
        #descriptors = vcfobj.getMetaInfoDescription()
        #infoids=[]
        #for (tag, description) in descriptors:
        #    infoids.append(tag)

        #if options.infotag  not in infoids and options.infotag != 'QUAL'  and  options.infotag != "" and options.noheader == False:
        #    sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n")
        #    exit(1)
        print header

    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        fields=dataline.strip().split('\t')
        (chrom,pos,id,ref,alt,qual,filtercode,info)=fields[0:8]
        (start,end) = (int(pos)-1, int(pos))

        #pass the filter code
        if filtercode != options.filter and options.filter != None:
            continue

        #check to see if record is the correct variant TYPE
        if options.variantype != None:
            pattern=options.infotag+'=('+options.variantype+')'
            if re.search(pattern, info ) == None:
                continue
           


        if options.chrprefix == True:
            chrom="chr"+chrom
            
        if chrom in bitsets and bitsets[chrom].count_range( start, end-start ) >= options.mincols:
            if not options.reverse:
                print dataline
        else:
            if options.reverse == True:
                print dataline
Beispiel #3
0
def main():
    usage = "usage: %prog [options] maf file.vcf"
    parser = OptionParser(usage)
  
    parser.add_option("--maftag", type="string", dest="maftag", help="INFO tag id that annotates the allele freq of the record", default="AF")
    parser.add_option("--variantag", type="string", dest="vtag", help="INFO tag that annotates the type of variant type", default="VT")
    parser.add_option("--variantype", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None)
    parser.add_option("--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None)
    parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file  has no header file", default=False)
    parser.add_option("--quiet", action="store_true", dest="quiet", help="don't print vcf output to stdout", default=False)
    parser.add_option("--leq", type="float", dest="leq", default=1.0, help="keep variants with AF <= (default 1)")
    parser.add_option("--geq", type="float", dest="geq", default=0.0, help="keep variants with AF >= (default 0)")
    (options, args)=parser.parse_args()

    

    if len(args)!=1:
        sys.stderr.write(usage+"\n")
        exit(1)
    vcfilename=args[0]
    #maf=float(args[0])

    freqfh=open('freq.log', 'w')

    vcfh=open(vcfilename,'r')

    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)
    #parse its metainfo lines (ones that begin with ##)
    if options.noheader == False:
        vcfobj.parseMetaLines(vcfh)
    #vcfobj.printMetaLines()
    descriptors = vcfobj.getMetaInfoDescription()
    infoids=[]
    for (tag, description) in descriptors:
        infoids.append(tag)

    if options.maftag  not in infoids and options.maftag != 'QUAL' and options.noheader == False:
        sys.stderr.write(options.maftag + " tag not in ##INFO headers!\n")
        exit(1)

    if options.vtag  not in infoids and options.vtag != 'QUAL' and options.noheader==False:
        sys.stderr.write(options.vtag + " tag not in ##INFO headers!\n")
        exit(1)

   
    #vcfh.seek(0)
    if options.noheader == False:
        vcfobj.parseHeaderLine(vcfh)
  


    if options.variantype==None:
        variantpattern=options.vtag+'=(\w+);'
    else:
        variantpattern=options.vtag+'=('+options.variantype+');'
    mafpattern=options.maftag+'=(0.\d+)'

    #print mafpattern, variantpattern


    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        #print dataline
        fields=dataline.strip().split('\t')

        (chrom,pos,id,ref,alt,qual,filtercode,info)=fields[0:8]
        #if filtercode != options.filter and options.filter != None : continue

        
        if re.search(variantpattern, info ) == None:
            #sys.stderr.write("no variant pattern\n")
            continue
        
        variant_type=re.search(variantpattern, info ).groups()[0]
        
        
        if re.search(mafpattern, info ) == None:
            #sys.stderr.write("No mafpattern!\n")
            #sys.stderr.write(dataline+"\n")
            continue
        
        maf_value=re.search(mafpattern, info ).groups()[0]
        
        if float(maf_value) <= options.leq and float(maf_value) >= options.geq:

            if options.quiet == False:
                print dataline
            logstring="\t".join([chrom,pos,id,ref,alt,variant_type, options.maftag, maf_value])
            freqfh.write(logstring+'\n')
def main():
    usage = "usage: %prog [options] file.vcf\n print records belonging to a certain type of variant class (e.g. SNP) in a VCF file\n\n"
    parser = OptionParser(usage)
    parser.add_option(
        "--info",
        type="string",
        dest="infotag",
        help="INFO tag id that annotates what type of variant the VCF record is",
        default="TYPE",
    )
    parser.add_option("--type", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None)
    parser.add_option(
        "--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None
    )
    parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file  has no header file")

    (options, args) = parser.parse_args()
    if options.infotag == "":
        sys.stderr.write("provide a value for --info parameter!\n")
        exit(1)
    if options.variantype == "":
        sys.stderr.write("provide a value of --type parameter!\n")
        exit(1)

    variant_dict = {}

    vcfilename = args[0]
    vcfh = open(vcfilename, "r")

    # instantiate a VcfFile object
    vcfobj = VcfFile(vcfilename)
    # parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaAndHeaderLines(vcfh)
    vcfobj.printMetaAndHeaderLines()

    descriptors = vcfobj.getMetaInfoDescription()
    infoids = []
    for (tag, description) in descriptors:
        infoids.append(tag)

    if options.infotag not in infoids and options.infotag != "QUAL":
        sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n")
        exit(1)

    if options.variantype != None:
        pattern = options.infotag + "=(" + options.variantype + ")"

    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        fields = dataline.strip().split("\t")
        (chrom, pos, id, ref, alt, qual, filtercode, info) = fields[0:8]
        if filtercode != options.filter and options.filter != None:
            continue

        if options.variantype != None:
            if re.search(pattern, info) == None:
                continue
            else:
                value = re.search(pattern, info).groups()[0]
                print dataline
        else:
            print dataline