def main(): """ prints the description of ##INFO metalines in a VCF """ usage = "usage: %prog [options] file.vcf" parser = OptionParser(usage) parser.add_option("--infotag", type="string", dest="infotag", help="prints the description for the INFO id infotag") parser.add_option("--all", action="store_true", dest="all", help="prints the description for *every* INFO tag in VCF") parser.add_option (options, args)=parser.parse_args() vcfilename=args[0] vcfh=open(vcfilename, 'r') #instantiate a VcfFile object vcfobj=VcfFile(vcfilename) #parse its metainfo lines (ones that begin with ##) vcfobj.parseMetaLines(vcfh) descriptors = vcfobj.getMetaInfoDescription() found_tag=0 for (id, description) in descriptors: if options.all==True: print id, description found_tag=1 continue if id == options.infotag: print id, "\t", description found_tag=1 if found_tag ==0 : sys.stderr.write(options.infotag + " not in ##INFO headers\n")
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("--info", type="string", dest="infotag", help="INFO tag id that annotates what type of variant the VCF record is", default="TYPE") parser.add_option("--filter", type="string", dest="filter", help="only analyze records with matching filter (default is None)", default=None) (options, args)=parser.parse_args() if options.infotag == "": sys.stderr.write("provide a value for --info parameter!\n") exit(1) variant_dict={} #key variant type value VcfRecord object vcfilename=args[0] vcfh=open(vcfilename,'r') #instantiate a VcfFile object vcfobj=VcfFile(vcfilename) #parse its metainfo lines (ones that begin with ##) vcfobj.parseMetaAndHeaderLines(vcfh) descriptors = vcfobj.getMetaInfoDescription() infoids=[] for (tag, description) in descriptors: tag infoids.append(tag) if options.infotag not in infoids and options.infotag != 'QUAL': sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n") exit(1) pattern=options.infotag+'=(\S+)' for vrec in vcfobj.yieldVcfRecord(vcfh): if vrec.getFilter() != options.filter and options.filter != None: continue searchresult=re.search(pattern, vrec.getInfo() ) if re.search(pattern, vrec.getInfo() ) == None: continue else: value=re.search(pattern, vrec.getInfo() ).groups()[0] #rint value if value not in variant_dict.keys(): variant_dict[value]=[] variant_dict[value].append( vrec ) else: variant_dict[value].append( vrec ) sum=0 sys.stderr.write("types and count of different variant classes in " + vcfilename + "\n") for k in variant_dict.keys(): print k, len( variant_dict[k] ) sum+=len( variant_dict[k] ) print "TOTAL:", sum
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) usage = "usage: %prog [options] file.vcf \n print summary information about site depth in records of a VCF file\n" parser = OptionParser(usage) parser.add_option("--max", type="int", dest="max", help="skip records that are greater than or equal to max (default sys.maxint)", default=sys.maxint) #parser.add_option("--v", action="store_true", dest="snp", help="restrict analysis to SNPs (must have INFO ID SNP in header") (options, args)=parser.parse_args() vcfilename=args[0] fileName, fileExtension = os.path.splitext(vcfilename) #nuller.12:80717441..80717681.vcf regionpattern='nuller.(\d+):(\d+)..(\d+)' results=re.search(regionpattern,fileName ).groups() regionstr="\t".join(list(results)) vcfh=open(vcfilename,'r') #instantiate a VcfFile object vcfobj=VcfFile(vcfilename) #parse its metainfo lines (ones that begin with ##) vcfobj.parseMetaLines(vcfh) descriptors = vcfobj.getMetaInfoDescription() infoids=[] for (tag, description) in descriptors: infoids.append(tag) if 'DP' not in infoids: sys.stderr.write("DP tag not in ##INFO headers!") exit(1) vcfh.seek(0) vcfobj.parseHeaderLine(vcfh) pattern='DP=(\d+)' depth_list=[] for vrec in vcfobj.yieldVcfRecord(vcfh): dp=re.search(pattern, vrec.getInfo() ).groups()[0] if dp == None: sys.stderr.write("unable to parse DP value from INFO field\n") continue else: if int(dp) >= options.max: continue depth_list.append(int(dp)) maxDP=max( array (depth_list)) minDP= min (array (depth_list)) medianDP=median (array (depth_list)) meanDP=mean( array(depth_list)) length=len(depth_list) outstr="\t".join([regionstr, str(maxDP), str(minDP), str(medianDP), str(meanDP), str(length)]) print outstr
def main(): usage = "usage: %prog [options] maf file.vcf" parser = OptionParser(usage) parser.add_option("--maftag", type="string", dest="maftag", help="INFO tag id that annotates the allele freq of the record", default="AF") parser.add_option("--variantag", type="string", dest="vtag", help="INFO tag that annotates the type of variant type", default="VT") parser.add_option("--variantype", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None) parser.add_option("--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None) parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file has no header file", default=False) parser.add_option("--quiet", action="store_true", dest="quiet", help="don't print vcf output to stdout", default=False) parser.add_option("--leq", type="float", dest="leq", default=1.0, help="keep variants with AF <= (default 1)") parser.add_option("--geq", type="float", dest="geq", default=0.0, help="keep variants with AF >= (default 0)") (options, args)=parser.parse_args() if len(args)!=1: sys.stderr.write(usage+"\n") exit(1) vcfilename=args[0] #maf=float(args[0]) freqfh=open('freq.log', 'w') vcfh=open(vcfilename,'r') #instantiate a VcfFile object vcfobj=VcfFile(vcfilename) #parse its metainfo lines (ones that begin with ##) if options.noheader == False: vcfobj.parseMetaLines(vcfh) #vcfobj.printMetaLines() descriptors = vcfobj.getMetaInfoDescription() infoids=[] for (tag, description) in descriptors: infoids.append(tag) if options.maftag not in infoids and options.maftag != 'QUAL' and options.noheader == False: sys.stderr.write(options.maftag + " tag not in ##INFO headers!\n") exit(1) if options.vtag not in infoids and options.vtag != 'QUAL' and options.noheader==False: sys.stderr.write(options.vtag + " tag not in ##INFO headers!\n") exit(1) #vcfh.seek(0) if options.noheader == False: vcfobj.parseHeaderLine(vcfh) if options.variantype==None: variantpattern=options.vtag+'=(\w+);' else: variantpattern=options.vtag+'=('+options.variantype+');' mafpattern=options.maftag+'=(0.\d+)' #print mafpattern, variantpattern for dataline in vcfobj.yieldVcfDataLine(vcfh): #print dataline fields=dataline.strip().split('\t') (chrom,pos,id,ref,alt,qual,filtercode,info)=fields[0:8] #if filtercode != options.filter and options.filter != None : continue if re.search(variantpattern, info ) == None: #sys.stderr.write("no variant pattern\n") continue variant_type=re.search(variantpattern, info ).groups()[0] if re.search(mafpattern, info ) == None: #sys.stderr.write("No mafpattern!\n") #sys.stderr.write(dataline+"\n") continue maf_value=re.search(mafpattern, info ).groups()[0] if float(maf_value) <= options.leq and float(maf_value) >= options.geq: if options.quiet == False: print dataline logstring="\t".join([chrom,pos,id,ref,alt,variant_type, options.maftag, maf_value]) freqfh.write(logstring+'\n')
def main(): usage = "usage: %prog [options] file.vcf\n print records belonging to a certain type of variant class (e.g. SNP) in a VCF file\n\n" parser = OptionParser(usage) parser.add_option( "--info", type="string", dest="infotag", help="INFO tag id that annotates what type of variant the VCF record is", default="TYPE", ) parser.add_option("--type", type="string", dest="variantype", help="type of variant (SNP INS DEL)", default=None) parser.add_option( "--filter", type="string", dest="filter", help="extract records matching filter (default is None)", default=None ) parser.add_option("--noheader", action="store_true", dest="noheader", help="VCF file has no header file") (options, args) = parser.parse_args() if options.infotag == "": sys.stderr.write("provide a value for --info parameter!\n") exit(1) if options.variantype == "": sys.stderr.write("provide a value of --type parameter!\n") exit(1) variant_dict = {} vcfilename = args[0] vcfh = open(vcfilename, "r") # instantiate a VcfFile object vcfobj = VcfFile(vcfilename) # parse its metainfo lines (ones that begin with ##) vcfobj.parseMetaAndHeaderLines(vcfh) vcfobj.printMetaAndHeaderLines() descriptors = vcfobj.getMetaInfoDescription() infoids = [] for (tag, description) in descriptors: infoids.append(tag) if options.infotag not in infoids and options.infotag != "QUAL": sys.stderr.write(options.infotag + " tag not in ##INFO headers!\n") exit(1) if options.variantype != None: pattern = options.infotag + "=(" + options.variantype + ")" for dataline in vcfobj.yieldVcfDataLine(vcfh): fields = dataline.strip().split("\t") (chrom, pos, id, ref, alt, qual, filtercode, info) = fields[0:8] if filtercode != options.filter and options.filter != None: continue if options.variantype != None: if re.search(pattern, info) == None: continue else: value = re.search(pattern, info).groups()[0] print dataline else: print dataline