def main():

    """ prints the description of ##INFO metalines in a VCF  """
    
    usage = "usage: %prog [options] file.vcf"
    parser = OptionParser(usage)
    parser.add_option("--infotag", type="string", dest="infotag", help="prints the  description for the INFO id infotag")
    parser.add_option("--all", action="store_true", dest="all",  help="prints  the  description for  *every* INFO  tag in VCF")
    parser.add_option
    (options, args)=parser.parse_args()
    
    vcfilename=args[0]
    vcfh=open(vcfilename, 'r')
    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)

    #parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaLines(vcfh)

    descriptors = vcfobj.getMetaInfoDescription()
    found_tag=0
    for (id, description) in descriptors:
        if options.all==True:
            print id, description
            found_tag=1
            continue
        if id == options.infotag:
            print id, "\t", description
            found_tag=1
    if found_tag  ==0  : sys.stderr.write(options.infotag + " not in ##INFO headers\n")
def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("--info", type="string", dest="infotag", help="INFO tag id that annotates what type of variant the VCF record is", default="TYPE")
    parser.add_option("--filter", type="string", dest="filter", help="only analyze records with matching filter (default is None)", default=None)

    (options, args)=parser.parse_args()
    if options.infotag == "":
        sys.stderr.write("provide a value for --info parameter!\n")
        exit(1)


    variant_dict={} #key variant type value VcfRecord object

    vcfilename=args[0]
    vcfh=open(vcfilename,'r')

    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)
    #parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaAndHeaderLines(vcfh)
    
    descriptors = vcfobj.getMetaInfoDescription()
    infoids=[]
    for (tag, description) in descriptors:
        tag
        infoids.append(tag)

    if options.infotag  not in infoids and options.infotag != 'QUAL':
        sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n")
        exit(1)

    

    pattern=options.infotag+'=(\S+)'
    
    for vrec in vcfobj.yieldVcfRecord(vcfh):
        if vrec.getFilter() != options.filter and options.filter != None: continue
        
        searchresult=re.search(pattern, vrec.getInfo() )
        if re.search(pattern, vrec.getInfo() ) == None:
            continue
        else:
            value=re.search(pattern, vrec.getInfo() ).groups()[0]
            #rint value
            if value not in variant_dict.keys():
                variant_dict[value]=[]
                variant_dict[value].append( vrec )
            else:
                variant_dict[value].append( vrec )


    
    sum=0
    sys.stderr.write("types and count of different variant classes in " + vcfilename + "\n")
    for k in variant_dict.keys():
        print k, len( variant_dict[k] )
        sum+=len( variant_dict[k] )
    print "TOTAL:", sum
def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    usage = "usage: %prog [options] file.vcf \n print summary information about site depth in records of a VCF file\n"
    parser = OptionParser(usage)
    parser.add_option("--max", type="int", dest="max", help="skip records that are greater than or equal to max (default sys.maxint)", default=sys.maxint)
    #parser.add_option("--v", action="store_true", dest="snp",  help="restrict analysis to SNPs (must have INFO ID SNP in header")

    (options, args)=parser.parse_args()

    vcfilename=args[0]
    fileName, fileExtension = os.path.splitext(vcfilename)
    #nuller.12:80717441..80717681.vcf
    regionpattern='nuller.(\d+):(\d+)..(\d+)'
    results=re.search(regionpattern,fileName ).groups()
    regionstr="\t".join(list(results))
    vcfh=open(vcfilename,'r')

    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)
    #parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaLines(vcfh)
    descriptors = vcfobj.getMetaInfoDescription()
    infoids=[]
    for (tag, description) in descriptors:
        infoids.append(tag)

    if 'DP' not in infoids:
        sys.stderr.write("DP tag not in ##INFO headers!")
        exit(1)

    vcfh.seek(0)
    vcfobj.parseHeaderLine(vcfh)

    pattern='DP=(\d+)'
    depth_list=[]
    for vrec in vcfobj.yieldVcfRecord(vcfh):

        dp=re.search(pattern, vrec.getInfo() ).groups()[0]
        if dp == None:
            sys.stderr.write("unable to parse DP value from INFO field\n")
            continue
        else:
            if int(dp) >= options.max: continue
            depth_list.append(int(dp))

    maxDP=max( array (depth_list))
    minDP= min (array (depth_list))
    medianDP=median (array (depth_list))
    meanDP=mean( array(depth_list))
    length=len(depth_list)

    outstr="\t".join([regionstr, str(maxDP), str(minDP), str(medianDP), str(meanDP), str(length)])
    print outstr
Exemple #4
0
def main():
    usage = "usage: %prog [options] maf file.vcf"
    parser = OptionParser(usage)
  
    parser.add_option("--maftag", type="string", dest="maftag", help="INFO tag id that annotates the allele freq of the record", default="AF")
    parser.add_option("--variantag", type="string", dest="vtag", help="INFO tag that annotates the type of variant type", default="VT")
    parser.add_option("--variantype", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None)
    parser.add_option("--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None)
    parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file  has no header file", default=False)
    parser.add_option("--quiet", action="store_true", dest="quiet", help="don't print vcf output to stdout", default=False)
    parser.add_option("--leq", type="float", dest="leq", default=1.0, help="keep variants with AF <= (default 1)")
    parser.add_option("--geq", type="float", dest="geq", default=0.0, help="keep variants with AF >= (default 0)")
    (options, args)=parser.parse_args()

    

    if len(args)!=1:
        sys.stderr.write(usage+"\n")
        exit(1)
    vcfilename=args[0]
    #maf=float(args[0])

    freqfh=open('freq.log', 'w')

    vcfh=open(vcfilename,'r')

    #instantiate a VcfFile object
    vcfobj=VcfFile(vcfilename)
    #parse its metainfo lines (ones that begin with ##)
    if options.noheader == False:
        vcfobj.parseMetaLines(vcfh)
    #vcfobj.printMetaLines()
    descriptors = vcfobj.getMetaInfoDescription()
    infoids=[]
    for (tag, description) in descriptors:
        infoids.append(tag)

    if options.maftag  not in infoids and options.maftag != 'QUAL' and options.noheader == False:
        sys.stderr.write(options.maftag + " tag not in ##INFO headers!\n")
        exit(1)

    if options.vtag  not in infoids and options.vtag != 'QUAL' and options.noheader==False:
        sys.stderr.write(options.vtag + " tag not in ##INFO headers!\n")
        exit(1)

   
    #vcfh.seek(0)
    if options.noheader == False:
        vcfobj.parseHeaderLine(vcfh)
  


    if options.variantype==None:
        variantpattern=options.vtag+'=(\w+);'
    else:
        variantpattern=options.vtag+'=('+options.variantype+');'
    mafpattern=options.maftag+'=(0.\d+)'

    #print mafpattern, variantpattern


    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        #print dataline
        fields=dataline.strip().split('\t')

        (chrom,pos,id,ref,alt,qual,filtercode,info)=fields[0:8]
        #if filtercode != options.filter and options.filter != None : continue

        
        if re.search(variantpattern, info ) == None:
            #sys.stderr.write("no variant pattern\n")
            continue
        
        variant_type=re.search(variantpattern, info ).groups()[0]
        
        
        if re.search(mafpattern, info ) == None:
            #sys.stderr.write("No mafpattern!\n")
            #sys.stderr.write(dataline+"\n")
            continue
        
        maf_value=re.search(mafpattern, info ).groups()[0]
        
        if float(maf_value) <= options.leq and float(maf_value) >= options.geq:

            if options.quiet == False:
                print dataline
            logstring="\t".join([chrom,pos,id,ref,alt,variant_type, options.maftag, maf_value])
            freqfh.write(logstring+'\n')
def main():
    usage = "usage: %prog [options] file.vcf\n print records belonging to a certain type of variant class (e.g. SNP) in a VCF file\n\n"
    parser = OptionParser(usage)
    parser.add_option(
        "--info",
        type="string",
        dest="infotag",
        help="INFO tag id that annotates what type of variant the VCF record is",
        default="TYPE",
    )
    parser.add_option("--type", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None)
    parser.add_option(
        "--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None
    )
    parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file  has no header file")

    (options, args) = parser.parse_args()
    if options.infotag == "":
        sys.stderr.write("provide a value for --info parameter!\n")
        exit(1)
    if options.variantype == "":
        sys.stderr.write("provide a value of --type parameter!\n")
        exit(1)

    variant_dict = {}

    vcfilename = args[0]
    vcfh = open(vcfilename, "r")

    # instantiate a VcfFile object
    vcfobj = VcfFile(vcfilename)
    # parse its metainfo lines (ones that begin with ##)
    vcfobj.parseMetaAndHeaderLines(vcfh)
    vcfobj.printMetaAndHeaderLines()

    descriptors = vcfobj.getMetaInfoDescription()
    infoids = []
    for (tag, description) in descriptors:
        infoids.append(tag)

    if options.infotag not in infoids and options.infotag != "QUAL":
        sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n")
        exit(1)

    if options.variantype != None:
        pattern = options.infotag + "=(" + options.variantype + ")"

    for dataline in vcfobj.yieldVcfDataLine(vcfh):
        fields = dataline.strip().split("\t")
        (chrom, pos, id, ref, alt, qual, filtercode, info) = fields[0:8]
        if filtercode != options.filter and options.filter != None:
            continue

        if options.variantype != None:
            if re.search(pattern, info) == None:
                continue
            else:
                value = re.search(pattern, info).groups()[0]
                print dataline
        else:
            print dataline