def main(): usage = "%(prog)s [options]" parser = argparse.ArgumentParser( usage=usage,description=desc,epilog=epilog ) parser.add_argument("-v", dest="verbose", default=False, action="store_true") parser.add_argument("-i", dest="coords", default=sys.stdin, type=file, help="coords file name [stdin]") parser.add_argument("-f", dest="fasta", required=True, type=file, help="query fasta file" ) parser.add_argument("-o", dest="out", default=sys.stdout, type=argparse.FileType("w"), help="output base name [stdout]") o = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\n" % str(o) ) #load query genome query2fasta = genome2dict( o.fasta.name ) # load nucmer matches = nucmer2list( o.coords.name )#; print matches # sort hits by ref position sort_hits( matches,query2fasta,o.out,o.verbose )
def gff2fasta(gff, fasta, entireGene, codonTable, verbose): """Report gene, cds, peptide. not reporting 1000bp flanking intergenic sequence. """ #load genome chr2seq = genome2dict(fasta) #load gff gene2position, contig2gene = load_gtf(gff) #get out streams genout = open(gff.name+".gene.fa", "w") cdsout = open(gff.name+".cds.fa", "w") pepout = open(gff.name+".pep.fa", "w") #process entries i = 0 genes = set() for ci, contig in enumerate(sorted(contig2gene), 1): sys.stderr.write(" %s %s \r" % (ci, contig)) for s, e, feature, gene in contig2gene[contig]: i += 1 contig, boundaries, strand, function, frames = gene2position[gene] #store CDS and peptide cds, pep = get_cds_pep(chr2seq[contig], gene, boundaries, strand, function, frames, codonTable) cdsout.write(cds.format('fasta')) pepout.write(pep.format('fasta')) #get geneid and store gene geneid = ".".join(gene.split(".")[:-1])#; print geneid if geneid not in genes: genes.add(geneid) seq = chr2seq[contig][s-1:e] if strand == "-": seq = seq.reverse_complement() gen = SeqRecord(seq, id=geneid, name="", description=function) genout.write(gen.format('fasta'))
def main(): usage = "usage: %prog [options]" parser = OptionParser( usage=usage,version="%prog 1.0" ) #allow_interspersed_args=True parser.add_option("-i", dest="input", default="", help="input file with tab-separated ids [mandatory]" ) parser.add_option("-f", dest="fasta", default="", help="multifasta file [mandatory]" ) parser.add_option("-o", dest="out", default="out/out", help="output fname [%default]" ) parser.add_option("-s", dest="split", default=False, action="store_true", help="split fasta for ids from every line") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\n" % ( str(o), ) ) for f in ( o.input,o.fasta ): if not f: parser.error( "Specify mandatory parameters!" ) if not f.isdigit and not os.path.isfile( f ): parser.error( "No such file: %s" % f ) #check if outdir exists outdir = os.path.dirname(o.out) if not os.path.isdir( outdir ): os.makedirs( outdir ) #get id2fasta sys.stderr.write( "Loading multifasta...\n" ) id2fasta = genome2dict( o.fasta ) sys.stderr.write( "Saving fastas...\n" ) #load ids i = 1 #open common output if not o.split: outfn = "%s_%5i.fasta" % (o.out,i) outfn = outfn.replace(" ","0") out = open( outfn,"w" ) for l in open( o.input ): #open output for each line if requested if o.split: outfn = "%s_%5i.fasta" % (o.out,i) outfn = outfn.replace(" ","0") out = open( outfn,"w" ) for id in l.split(): sys.stderr.write( " %s \r" % id ) if id in id2fasta: seq = id2fasta[id] elif id+"_1" in id2fasta: seq = id2fasta[id+"_1"] else: sys.stderr.write( " No fasta for: %s\n" % id ) continue out.write( ">%s\n%s\n" % (id,seq) ) i += 1
def main(): usage = "%(prog)s [options]" parser = argparse.ArgumentParser(usage=usage, description=desc, epilog=epilog, \ formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-v", "--verbose", default=False, action="store_true", help="verbose") parser.add_argument('--version', action='version', version='1.1') parser.add_argument("-g", "--gtf", help="genome annotation gtf/gff [requires -f]" ) parser.add_argument("-f", "--fasta", help="genome fasta [can be gzipped]" ) parser.add_argument("-i", "--input", type=file, #default=sys.stdin, help="input stream [stdin]") parser.add_argument("-o", "--out", default=sys.stdout, help="output stream [stdout]") parser.add_argument("-p", "--pfam", default="", help="pfam tblout file") parser.add_argument("-q", "--faa", default="", help="proteome fasta (to get protein annotation)") parser.add_argument("-t", "--tab", default="", help="tab-delimited annotation") o = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\n"%str(o)) ctg2cds, id2gene, ctg2seq = {},{},{} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer("Fasta file (-f) is requeired!") elif not os.path.isfile( o.fasta ): parser.error("No such file: %s"%o.fasta) ctg2seq = genome2dict(o.fasta) # load genome annotation if not os.path.isfile(o.gtf): # check if correct file parser.error("No such file: %s"%o.gtf) # load gtf/gff if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff(o.gtf) else: id2gene,ctg2cds = load_gtf(o.gtf) if o.verbose: sys.stderr.write("Loaded annotation of %s CDS from %s\n"%(len(id2gene), o.gtf)) #load function annotation trans2ann = trans2pfam = trans2tab = {} if o.faa: trans2ann = load_fasta_headers(o.faa) if o.pfam: trans2pfam = load_pfam(o.pfam) if o.tab: trans2tab = load_tab(o.tab) # parse pileup parse_snps(o.input, o.out, ctg2cds, id2gene, ctg2seq, trans2ann, trans2pfam, \ trans2tab, o.verbose)
def main(): usage = "usage: %prog [options]" desc = """Parse multi-fasta file and report sequences without overlap with already reported sequences. Starts from the longest.""" epilog = "" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-i", dest="infile", help="multi-fasta file [mandatory]") parser.add_option("-m", dest="minIdentity", default=90, type=int, help="min identity [%default]") parser.add_option("-o", dest="overlap", default=0.3, type=float, help="max overlap allowed [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,args ) ) for fn in [ o.infile, ]: if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #load fastas fastas = genome2dict( o.infile ) #contigs by descending length contigs = sorted( fastas.keys(),key=lambda x: len(fastas[x]), reverse=True ) #report non-overlapping i = 0 added,skipped = set(), set() ##remove outfile if exists outfn = o.infile + ".collapsed_o%s_i%s.fa" % ( o.overlap,o.minIdentity ) if os.path.isfile( outfn ): os.unlink( outfn ) ##execute blat vs itself pslfn = run_blat( o.infile,o.infile,o.minIdentity,o.verbose ) matches = parse_blat( pslfn,o.verbose,header=0,skipSelfMatches=1 ) ##add contigs without overlap for c in contigs: i += 1 if o.verbose: sys.stderr.write( " %3s %20s [ %7.2f kb]\n" % (i,c,len(fastas[c])/1000.0) ) #get fasta entry fasta = ">%s\n%s\n" % (c,_get_formatted_seq(fastas[c])) #save contig if first or if no overlapping already processed if not added or not overlapping( c,added,matches,o.overlap,o.verbose ): added.add( c ) out = open(outfn,"a"); out.write( fasta ); out.close() else: skipped.add( c ) sys.stderr.write( "Selected %s [ %7.2f kb] out of %s [ %7.2f kb] contigs.\n" % ( len(added),sum([len(fastas[c]) for c in added])/10.0**3,len(fastas),sum([len(fastas[c]) for c in fastas])/10.0**3) )
def main(): usage = "usage: %prog [options]" desc = """Order contigs based on nucmer output.""" epilog = """Make sure, coords file is sorted by reference (show-coords -r). Monoploid number (-x) has to be specified correctly. For details look at: http://en.wikipedia.org/wiki/Ploidy""" parser = OptionParser( usage=usage,version="%prog 1.0",description=desc,epilog=epilog ) parser.add_option("-o", dest="outfn", default="out", help="output base name [%default]") parser.add_option("-i", dest="coords", default="", help="coords file name [mandatory]") parser.add_option("-q", dest="query", default="", help="query file name [mandatory]") parser.add_option("-r", dest="ref", default="", help="reference file name [mandatory]") parser.add_option("-c", dest="qOverlap", default=0.05, type=float, help="fract of query aligned [%default]") parser.add_option("-n", dest="haploid", default=2, type=int, help="haploid number [%default]") parser.add_option("-x", dest="monoploid", default=2, type=int, help="monoploid number [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nFastQ files: %s\n" % ( o,fnames ) ) # check input files for fn in [ o.coords,o.query,o.ref ]: if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) #load query genome query2fasta = genome2dict( o.query ) ref2fasta = genome2dict( o.ref ) # load nucmer matches = nucmer2list( o.coords ) # sort hits by ref position sort_hits( matches,query2fasta,ref2fasta,o.outfn,o.qOverlap,o.haploid,o.monoploid,o.verbose )
def main(): usage = "usage: %prog [options]\nfor f in *.bam; do echo `date` $f; bam2counts.py -rv -i $f -g F.oxysporum.gtf > $f.genecounts.txt; done" parser = OptionParser(usage=usage, version="%prog 1.0") #allow_interspersed_args=True parser.add_option("-i", dest="bam", default="", help="bam file") parser.add_option("-g", dest="gtf", default="", help="genome annotation gtf/gff") parser.add_option( "-r", dest="rpkm", default=False, action="store_true", help= "RPKM normalisation (reads per kb of gene per million of aligned reads)" ) parser.add_option("-f", dest="fasta", default="", help="genome fasta [required if -r]") parser.add_option("-v", dest="verbose", default=False, action="store_true") (o, fnames) = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\nArgs: %s\n" % (o, fnames)) for fn in (o.bam, o.gtf): if not fn: parser.error("Provide input file!") if not os.path.isfile(fn): parser.error("No such file: %s" % fn) ctg2cds, id2gene, ctg2seq = {}, {}, {} # load gtf/gff if o.gtf: if o.gtf.endswith(".gff"): id2gene, ctg2cds = load_gff(o.gtf) else: id2gene, ctg2cds = load_gtf(o.gtf) if o.verbose: sys.stderr.write("Loaded annotation of %s CDS from %s\n" % (len(id2gene), o.gtf)) if o.rpkm: if not o.fasta: parser.error("Specify genome fasta file!") if not os.path.isfile(o.fasta): parser.error("No such file: %s" % o.fasta) ctg2seq = genome2dict(o.fasta) bam2counts(o.bam, o.rpkm, id2gene, ctg2cds, ctg2seq, o.verbose)
def main(): usage = "%(prog)s [options]" parser = argparse.ArgumentParser( usage=usage, description=desc, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument("-v", "--verbose", default=False, action="store_true", help="verbose") parser.add_argument("--version", action="version", version="1.1") parser.add_argument("-g", "--gtf", help="genome annotation gtf/gff [requires -f]") parser.add_argument("-f", "--fasta", help="genome fasta [can be gzipped]") parser.add_argument("-i", "--input", type=file, help="input stream [stdin]") # default=sys.stdin, parser.add_argument("-o", "--out", default=sys.stdout, help="output stream [stdout]") parser.add_argument("-p", "--pfam", default="", help="pfam tblout file") parser.add_argument("-q", "--faa", default="", help="proteome fasta (to get protein annotation)") parser.add_argument("-t", "--tab", default="", help="tab-delimited annotation") o = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\n" % str(o)) ctg2cds, id2gene, ctg2seq = {}, {}, {} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer("Fasta file (-f) is requeired!") elif not os.path.isfile(o.fasta): parser.error("No such file: %s" % o.fasta) ctg2seq = genome2dict(o.fasta) # load genome annotation if not os.path.isfile(o.gtf): # check if correct file parser.error("No such file: %s" % o.gtf) # load gtf/gff if o.gtf.endswith(".gff"): id2gene, ctg2cds = load_gff(o.gtf) else: id2gene, ctg2cds = load_gtf(o.gtf) if o.verbose: sys.stderr.write("Loaded annotation of %s CDS from %s\n" % (len(id2gene), o.gtf)) # load function annotation trans2ann = trans2pfam = trans2tab = {} if o.faa: trans2ann = load_fasta_headers(o.faa) if o.pfam: trans2pfam = load_pfam(o.pfam) if o.tab: trans2tab = load_tab(o.tab) # parse pileup parse_snps(o.input, o.out, ctg2cds, id2gene, ctg2seq, trans2ann, trans2pfam, trans2tab, o.verbose)
def main(): usage = "usage: %prog [options]" parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]" ) parser.add_option("-f", dest="fasta", help="genome fasta [can be gzipped]" ) parser.add_option("-i", dest="fpath", help="input file [stdin]") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option("-d", dest="minDepth", default=10, type=int, help="minimal depth [%default]") parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\n" % ( str(o), ) ) ctg2cds,id2gene,ctg2seq = {},{},{} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer( "Fasta file (-f) is requeired!" ) elif not os.path.isfile( o.fasta ): parser.error( "No such file: %s" % o.fasta ) ctg2seq = genome2dict( o.fasta ) # load genome annotation if not os.path.isfile( o.gtf ): # check if correct file parser.error( "No such file: %s" % o.gtf ) # load gtf/gff if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff( o.gtf ) else: id2gene,ctg2cds = load_gtf( o.gtf ) if o.verbose: sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) ) # parse pileup parse_vcf( o.fpath,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
def main(): usage = "usage: %prog [options]\nfor f in *.bam; do echo `date` $f; bam2counts.py -rv -i $f -g F.oxysporum.gtf > $f.genecounts.txt; done" parser = OptionParser( usage=usage,version="%prog 1.0" ) #allow_interspersed_args=True parser.add_option("-i", dest="bam", default="", help="bam file") parser.add_option("-g", dest="gtf",default="", help="genome annotation gtf/gff" ) parser.add_option("-r", dest="rpkm", default=False, action="store_true", help="RPKM normalisation (reads per kb of gene per million of aligned reads)" ) parser.add_option("-f", dest="fasta", default="", help="genome fasta [required if -r]") parser.add_option("-v", dest="verbose", default=False, action="store_true" ) ( o, fnames ) = parser.parse_args() if o.verbose: sys.stderr.write( "Options: %s\nArgs: %s\n" % ( o,fnames ) ) for fn in ( o.bam,o.gtf ): if not fn: parser.error( "Provide input file!" ) if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) ctg2cds,id2gene,ctg2seq = {},{},{} # load gtf/gff if o.gtf: if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff( o.gtf ) else: id2gene,ctg2cds = load_gtf( o.gtf ) if o.verbose: sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) ) if o.rpkm: if not o.fasta: parser.error( "Specify genome fasta file!" ) if not os.path.isfile( o.fasta ): parser.error( "No such file: %s" % o.fasta ) ctg2seq = genome2dict( o.fasta ) bam2counts( o.bam,o.rpkm,id2gene,ctg2cds,ctg2seq,o.verbose )
def main(): usage = "usage: %prog [options]" desc = """Order contigs based on nucmer output.""" epilog = """Make sure, coords file is sorted by reference (show-coords -r). Monoploid number (-x) has to be specified correctly. For details look at: http://en.wikipedia.org/wiki/Ploidy""" parser = OptionParser(usage=usage, version="%prog 1.0", description=desc, epilog=epilog) parser.add_option("-o", dest="outfn", default="out", help="output base name [%default]") parser.add_option("-i", dest="coords", default="", help="coords file name [mandatory]") parser.add_option("-q", dest="query", default="", help="query file name [mandatory]") parser.add_option("-r", dest="ref", default="", help="reference file name [mandatory]") parser.add_option("-c", dest="qOverlap", default=0.05, type=float, help="fract of query aligned [%default]") parser.add_option("-n", dest="haploid", default=2, type=int, help="haploid number [%default]") parser.add_option("-x", dest="monoploid", default=2, type=int, help="monoploid number [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true") (o, fnames) = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\nFastQ files: %s\n" % (o, fnames)) # check input files for fn in [o.coords, o.query, o.ref]: if not fn: parser.error("Provide input file!") if not os.path.isfile(fn): parser.error("No such file: %s" % fn) #load query genome query2fasta = genome2dict(o.query) ref2fasta = genome2dict(o.ref) # load nucmer matches = nucmer2list(o.coords) # sort hits by ref position sort_hits(matches, query2fasta, ref2fasta, o.outfn, o.qOverlap, o.haploid, o.monoploid, o.verbose)
def main(): usage = "usage: %prog [options] *.vcf" parser = OptionParser( usage=usage,version="%prog 1.0" ) # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]" ) parser.add_option("-f", dest="fasta", help="genome fasta" ) parser.add_option("-1", dest="bam1", help="sample bam") parser.add_option("-2", dest="bam2", help="reference bam") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option("-d", dest="minDepth", default=5, type=int, help="""minimal depth; note both samples need to have pass depth filtering [%default]""") parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels [%default]") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") ( o, args ) = parser.parse_args() if o.verbose: sys.stderr.write( "%s\n" % ( str(o), ) ) if not args: parser.error( "At least one vcf file has to be specified!" ) for fn in args: if not os.path.isfile( fn ): parser.error( "No such file: %s" % fn ) ctg2cds,id2gene,ctg2seq = {},{},{} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer( "Fasta file (-f) is requeired!" ) elif not os.path.isfile( o.fasta ): parser.error( "No such file: %s" % o.fasta ) ctg2seq = genome2dict( o.fasta ) # load genome annotation if not os.path.isfile( o.gtf ): # check if correct file parser.error( "No such file: %s" % o.gtf ) # load gtf/gff if o.gtf.endswith(".gff"): id2gene,ctg2cds = load_gff( o.gtf ) else: id2gene,ctg2cds = load_gtf( o.gtf ) if o.verbose: sys.stderr.write( "Loaded annotation of %s CDS from %s\n" % ( len(id2gene),o.gtf ) ) # load possible SNPs coordinates coords = load_vcf( args,o.indels ) # check with mpileup check_snps( coords,o.bam1,o.bam2,o.fasta,o.outfn,ctg2cds,id2gene,ctg2seq,o.minDepth,o.minFreq,o.indels,o.bothStrands )
def main(): usage = "usage: %prog [options]" desc = """Parse multi-fasta file and report sequences without overlap with already reported sequences. Starts from the longest.""" epilog = "" parser = OptionParser(usage=usage, version="%prog 1.0", description=desc, epilog=epilog) parser.add_option("-i", dest="infile", help="multi-fasta file [mandatory]") parser.add_option("-m", dest="minIdentity", default=90, type=int, help="min identity [%default]") parser.add_option("-o", dest="overlap", default=0.3, type=float, help="max overlap allowed [%default]") parser.add_option("-v", dest="verbose", default=False, action="store_true") (o, args) = parser.parse_args() if o.verbose: sys.stderr.write("Options: %s\nArgs: %s\n" % (o, args)) for fn in [ o.infile, ]: if not fn: parser.error("Provide input file!") if not os.path.isfile(fn): parser.error("No such file: %s" % fn) #load fastas fastas = genome2dict(o.infile) #contigs by descending length contigs = sorted(fastas.keys(), key=lambda x: len(fastas[x]), reverse=True) #report non-overlapping i = 0 added, skipped = set(), set() ##remove outfile if exists outfn = o.infile + ".collapsed_o%s_i%s.fa" % (o.overlap, o.minIdentity) if os.path.isfile(outfn): os.unlink(outfn) ##execute blat vs itself pslfn = run_blat(o.infile, o.infile, o.minIdentity, o.verbose) matches = parse_blat(pslfn, o.verbose, header=0, skipSelfMatches=1) ##add contigs without overlap for c in contigs: i += 1 if o.verbose: sys.stderr.write(" %3s %20s [ %7.2f kb]\n" % (i, c, len(fastas[c]) / 1000.0)) #get fasta entry fasta = ">%s\n%s\n" % (c, _get_formatted_seq(fastas[c])) #save contig if first or if no overlapping already processed if not added or not overlapping(c, added, matches, o.overlap, o.verbose): added.add(c) out = open(outfn, "a") out.write(fasta) out.close() else: skipped.add(c) sys.stderr.write( "Selected %s [ %7.2f kb] out of %s [ %7.2f kb] contigs.\n" % (len(added), sum([len(fastas[c]) for c in added]) / 10.0**3, len(fastas), sum([len(fastas[c]) for c in fastas]) / 10.0**3))
def main(): usage = "usage: %prog [options] *.vcf" parser = OptionParser(usage=usage, version="%prog 1.0") # allow_interspersed_args=True parser.add_option("-g", dest="gtf", help="genome annotation gtf/gff [requires -f]") parser.add_option("-f", dest="fasta", help="genome fasta") parser.add_option("-1", dest="bam1", help="sample bam") parser.add_option("-2", dest="bam2", help="reference bam") parser.add_option("-o", dest="outfn", help="output fname [stdout]") parser.add_option( "-d", dest="minDepth", default=5, type=int, help= """minimal depth; note both samples need to have pass depth filtering [%default]""" ) parser.add_option("-m", dest="minFreq", default=0.8, type=float, help="min frequency of alternative base [%default]") parser.add_option("-n", dest="indels", default=True, action="store_false", help="ignore indels [%default]") parser.add_option("-b", dest="bothStrands", default=True, action="store_false", help="report events confirmed by single strand algs") parser.add_option("-v", dest="verbose", default=True, action="store_false") (o, args) = parser.parse_args() if o.verbose: sys.stderr.write("%s\n" % (str(o), )) if not args: parser.error("At least one vcf file has to be specified!") for fn in args: if not os.path.isfile(fn): parser.error("No such file: %s" % fn) ctg2cds, id2gene, ctg2seq = {}, {}, {} if o.gtf: # if annotation # load genome if not o.fasta: # fasta has to be provided parser.errer("Fasta file (-f) is requeired!") elif not os.path.isfile(o.fasta): parser.error("No such file: %s" % o.fasta) ctg2seq = genome2dict(o.fasta) # load genome annotation if not os.path.isfile(o.gtf): # check if correct file parser.error("No such file: %s" % o.gtf) # load gtf/gff if o.gtf.endswith(".gff"): id2gene, ctg2cds = load_gff(o.gtf) else: id2gene, ctg2cds = load_gtf(o.gtf) if o.verbose: sys.stderr.write("Loaded annotation of %s CDS from %s\n" % (len(id2gene), o.gtf)) # load possible SNPs coordinates coords = load_vcf(args, o.indels) # check with mpileup check_snps(coords, o.bam1, o.bam2, o.fasta, o.outfn, ctg2cds, id2gene, ctg2seq, o.minDepth, o.minFreq, o.indels, o.bothStrands)