def parseArgs(args): parser = argparse.ArgumentParser(prog="Honey.py force", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("bam", metavar="BAM", type=str, \ help="Assembled Contigs Bam") parser.add_argument("bed", metavar="BED", type=str, \ help="Bed of locations to force SV Calls") parser.add_argument("-s", "--sizebuffer", type=float, default=0.35, \ help=("Buffer of estimated sv size to " "create match (%(default)s)")) parser.add_argument("-d", "--maxDelta", type=int, default=500, \ help="Max distance between predicted and discovered variant (%(default)s)") parser.add_argument("-f", "--fetchbuffer", type=int, default=1000, \ help="Buffer for fetching reads from .bam (%(default)s)") #parser.add_argument("-o", "--overlapbuffer", type=float, default=0.50, \ #help="Percent overlap required from calls to tails (%(default)s)") parser.add_argument("-q", "--minMapq", type=int, default=100, \ help="Minimum mapping quality of a read and it's tail to consider (%(default)s)") parser.add_argument("-m", "--minErr", type=int, default=5, \ help="Minimum ins/del error size to consider (%(default)s)") #parser.add_argument("-a", "--asm", action="store_true", \ #help="Input reads are high-quality contigs") parser.add_argument("-p", "--bedPE", action="store_true", \ help="Input bed file is bedPE - only tails searching will be performed") parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(args) setupLogging(args.debug) return args
def parseArgs(argv): parser = argparse.ArgumentParser(prog="Honey.py cpxres", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("tails", metavar="TAILS", type=str, \ help="Input hon.tals file") parser.add_argument("-o", "--output", type=str, default=None, \ help="Output file (<tails>.cpx)") parser.add_argument("-c", "--minBlock", type=int, default=500, \ help=("To prevent 'tiny' reference bocks, remove " "those with a size less than (%(default)s)")) parser.add_argument("-s", "--maxSpan", type=int, default=100000, \ help=("Max Span of a breakpoint to be considered" " (%(default)s)")) parser.add_argument("-l", "--maxOvl", type=int, default=10, \ help=("Max number of overlaps in a cluster" " (%(default)s)")) parser.add_argument("-r", "--maxRefBlocks", type=int, default=10, help=("Max number of reference blocks to consider" " (%(default)s)")) parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(argv) setupLogging(args.debug) if args.output is None: args.output = args.tails + '.cpx' return args
def parseArgs(args): parser = argparse.ArgumentParser(prog="Honey.py force", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("bam", metavar="BAM", type=str, \ help="Assembled Contigs Bam") parser.add_argument("bed", metavar="BED", type=str, \ help="Bed of locations to force SV Calls") parser.add_argument("-s", "--sizebuffer", type=float, default=0.35, \ help=("Buffer of estimated sv size to " "create match (%(default)s)")) parser.add_argument("-d", "--maxDelta", type=int, default=500, \ help="Max distance between predicted and discovered variant (%(default)s)") parser.add_argument("-f", "--fetchbuffer", type=int, default=1000, \ help="Buffer for fetching reads from .bam (%(default)s)") #parser.add_argument("-o", "--overlapbuffer", type=float, default=0.50, \ #help="Percent overlap required from calls to tails (%(default)s)") parser.add_argument("-q", "--minMapq", type=int, default=100, \ help="Minimum mapping quality of a read and it's tail to consider (%(default)s)") parser.add_argument("-m", "--minErr", type=int, default=5, \ help="Minimum ins/del error size to consider (%(default)s)") #parser.add_argument("-a", "--asm", action="store_true", \ #help="Input reads are high-quality contigs") parser.add_argument("-p", "--bedPE", action="store_true", \ help="Input bed file is bedPE - only tails searching will be performed") parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(args) setupLogging(args.debug) return args
def parseArgs(argv): parser = argparse.ArgumentParser(prog="Honey.py cpxres", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("tails", metavar="TAILS", type=str, \ help="Input hon.tals file") parser.add_argument("-o", "--output", type=str, default=None, \ help="Output file (<tails>.cpx)") parser.add_argument("-c", "--minBlock", type=int, default=500, \ help=("To prevent 'tiny' reference bocks, remove " "those with a size less than (%(default)s)")) parser.add_argument("-s", "--maxSpan", type=int, default=100000, \ help=("Max Span of a breakpoint to be considered" " (%(default)s)")) parser.add_argument("-l", "--maxOvl", type=int, default=10, \ help=("Max number of overlaps in a cluster" " (%(default)s)")) parser.add_argument("-r", "--maxRefBlocks", type=int, default=10, help=("Max number of reference blocks to consider" " (%(default)s)")) parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(argv) setupLogging(args.debug) if args.output is None: args.output = args.tails + '.cpx' return args
def test(argv): numpy.seterr(all="ignore") args = parseArgs(argv) setupLogging(True) #keep debug on.. you're testing! logging.critical(("Running HSpots.py directly implements testing mode. " "If you're trying to run the full, actual program, use " "Honey.py spots")) bam = pysam.Samfile(args.bam) reference = pysam.Fastafile(args.reference) try: if bam.header["HD"]["SO"] != "coordinate": logging.warning( "BAM is not sorted by coordinates! Performance may be slower") except KeyError: logging.warning( "Assuming BAM is sorted by coordinate. Be sure this is correct") logging.info("Running in test mode") #do what you will.. from here # This is what I need to start with #spot = SpotResult(chrom="7", start=138402727, end=138402830, svtype="INS", size=113) chrom = "3" start, end = (195498264, 195498609) start -= 200 end += 200 spot = SpotResult(chrom=chrom, start=start, end=end, svtype="DEL", size=100) #fh = open("possible.bed") #for line in fh.readlines(): #data = line.strip().split('\t') #spot = SpotResult(chrom=data[0], start=int(data[8]), end = int(data[9]), \ #size=int(data[5]), svtype=data[4]) j = SpotCaller('group', spot.chrom, spot.start, spot.end, args) if j.supportingReadsFilter(spot, bam, args): consen = ConsensusCaller(spot, args) consen(bam, reference, 'none') for i in consen.newSpots: i.tags["seqmade"] = True print i if len(consen.newSpots) == 0: spot.tags["noseq"] = True print str(spot) else: spot.tags["filtfail"] = True print str(spot) #done with test code logging.info("Finished testing")
def __init__(self): """ Given a protocol fn, load it up so we are ready to run. """ self.parseArgs() setupLogging(self.options.debug) sys.stderr.write(""" Please Cite: English, Adam C., Stephen Richards, Yi Han, Min Wang, Vanesa Vee, Jiaxin Qu, Xiang Qin, et al. "Mind the Gap: Upgrading Genomes with Pacific Biosciences RS Long-Read Sequencing Technology." PLoS ONE 7, no. 11 (November 21, 2012): e47768. doi:10.1371/journal.pone.0047768.\n\n""") self.parseProtocol()
def __init__(self): """ Given a protocol fn, load it up so we are ready to run. """ self.parseArgs() setupLogging(self.options.debug) sys.stderr.write(""" Please Cite: English, Adam C., Stephen Richards, Yi Han, Min Wang, Vanesa Vee, Jiaxin Qu, Xiang Qin, et al. "Mind the Gap: Upgrading Genomes with Pacific Biosciences RS Long-Read Sequencing Technology." PLoS ONE 7, no. 11 (November 21, 2012): e47768. doi:10.1371/journal.pone.0047768.\n\n""") self.parseProtocol()
def parseArgs(self): parser = OptionParser() parser.add_option("-i", "--inputDir", default=None,\ help="Input directory to find chunks.m4 [DEFAULT=pwd]") parser.add_option("-j", "--json", action="store_true",\ help="Output table in JSON format instead of lined [DEFAULT=False]") parser.add_option("-o", "--output", default=None,\ help="Output file name [DEFAULT=stdout]") parser.add_option("-t", "--tailMax", type="int", default=-1, \ help=("Use PBJelly's Support module to remove discordant " "alignments with greater than specified tail length " "[DEFAULT=off]")) parser.add_option("-l", "--lengthMin", type="int", default=0, \ help="Ignore reads (query or target) less than specified length [DEFAULT=off]") parser.add_option("-b", "--bestn", type="int", default=sys.maxint, \ help=("Report only the top bestn alignment scores for a query" " [DEFAULT=all]")) parser.add_option("-e", "--extends", action="store_true", \ help="Only report alignments that extend query [DEFAULT=False]") parser.add_option("-m", "--maxEntries", type="int", default=10000, \ help="Max number of alignments to hold in memory from each file [DEFAULT=10000]") parser.add_option("--debug", action="store_true",\ help="Verbose logging") opts, args = parser.parse_args() setupLogging(opts.debug) if opts.inputDir is not None: self.inputDir = opts.inputDir else: self.inputDir = os.getcwd() if not os.path.exists(self.inputDir): parser.error("Input directory (%s) does not exist" % inputDir) self.outputJson = opts.json if opts.output is not None: self.output = open(opts.output,'w') else: self.output = sys.stdout #Filter params self.tailMax = opts.tailMax self.lengthMin = opts.lengthMin self.bestn = opts.bestn self.extends = opts.extends self.maxEntries = opts.maxEntries self.debug = opts.debug
def parseArgs(self): parser = OptionParser() parser.add_option("-i", "--inputDir", default=None,\ help="Input directory to find chunks.m4 [DEFAULT=pwd]") parser.add_option("-j", "--json", action="store_true",\ help="Output table in JSON format instead of lined [DEFAULT=False]") parser.add_option("-o", "--output", default=None,\ help="Output file name [DEFAULT=stdout]") parser.add_option("-t", "--tailMax", type="int", default=-1, \ help=("Use PBJelly's Support module to remove discordant " "alignments with greater than specified tail length " "[DEFAULT=off]")) parser.add_option("-l", "--lengthMin", type="int", default=0, \ help="Ignore reads (query or target) less than specified length [DEFAULT=off]") parser.add_option("-b", "--bestn", type="int", default=sys.maxint, \ help=("Report only the top bestn alignment scores for a query" " [DEFAULT=all]")) parser.add_option("-e", "--extends", action="store_true", \ help="Only report alignments that extend query [DEFAULT=False]") parser.add_option("-m", "--maxEntries", type="int", default=10000, \ help="Max number of alignments to hold in memory from each file [DEFAULT=10000]") parser.add_option("--debug", action="store_true",\ help="Verbose logging") opts, args = parser.parse_args() setupLogging(opts.debug) if opts.inputDir is not None: self.inputDir = opts.inputDir else: self.inputDir = os.getcwd() if not os.path.exists(self.inputDir): parser.error("Input directory (%s) does not exist" % inputDir) self.outputJson = opts.json if opts.output is not None: self.output = open(opts.output, 'w') else: self.output = sys.stdout #Filter params self.tailMax = opts.tailMax self.lengthMin = opts.lengthMin self.bestn = opts.bestn self.extends = opts.extends self.maxEntries = opts.maxEntries self.debug = opts.debug
def test(argv): numpy.seterr(all="ignore") args = parseArgs(argv) setupLogging(True)#keep debug on.. you're testing! logging.critical(("Running HSpots.py directly implements testing mode. " "If you're trying to run the full, actual program, use " "Honey.py spots")) bam = pysam.Samfile(args.bam) reference = pysam.Fastafile(args.reference) try: if bam.header["HD"]["SO"] != "coordinate": logging.warning("BAM is not sorted by coordinates! Performance may be slower") except KeyError: logging.warning("Assuming BAM is sorted by coordinate. Be sure this is correct") logging.info("Running in test mode") #do what you will.. from here # This is what I need to start with #spot = SpotResult(chrom="7", start=138402727, end=138402830, svtype="INS", size=113) chrom="3" start,end = (195498264, 195498609) start -=200 end +=200 spot = SpotResult(chrom=chrom, start=start, end=end, svtype="DEL", size=100) #fh = open("possible.bed") #for line in fh.readlines(): #data = line.strip().split('\t') #spot = SpotResult(chrom=data[0], start=int(data[8]), end = int(data[9]), \ #size=int(data[5]), svtype=data[4]) j = SpotCaller('group', spot.chrom, spot.start, spot.end, args) if j.supportingReadsFilter(spot, bam, args): consen = ConsensusCaller(spot, args) consen(bam, reference, 'none') for i in consen.newSpots: i.tags["seqmade"] = True print i if len(consen.newSpots) == 0: spot.tags["noseq"] = True print str(spot) else: spot.tags["filtfail"] = True print str(spot) #done with test code logging.info("Finished testing")
def test(argv): numpy.seterr(all="ignore") args = parseArgs(argv) setupLogging(True) #keep debug on.. you're testing! logging.critical(("Running HSpots.py directly implements testing mode. " "If you're trying to run the full, actual program, use " "Honey.py spots")) bam = pysam.Samfile(args.bam) reference = pysam.Fastafile(args.reference) try: if bam.header["HD"]["SO"] != "coordinate": logging.warning( "BAM is not sorted by coordinates! Performance may be slower") except KeyError: logging.warning( "Assuming BAM is sorted by coordinate. Be sure this is correct") logging.info("Running in test mode") #do what you will.. from here #spot = SpotResult(chrom='11', start=2215290, end=2215798, svtype="DEL", size=208) #spot = SpotResult(chrom='22', start=45964261, end=45965596, svtype="DEL", size=-1) # This is what I need to start with #spot = SpotResult(chrom="22", start=45963975, end=45964532, svtype="DEL", size=57) fh = open("honeymissing.bed") for line in fh.readlines(): data = line.strip().split('\t') spot = SpotResult(chrom=data[0], start=int(data[1]), end = int(data[2]), \ size=int(data[3].split('=')[-1]), svtype="DEL") j = SpotCaller('group', spot.chrom, spot.start, spot.end, args) if j.supportingReadsFilter(spot, bam, args): consen = ConsensusCaller(spot, args) consen(bam, reference, 'none') for i in consen.newSpots: i.tags["seqmade"] = True print i if len(consen.newSpots) == 0: spot.tags["noseq"] = True print str(spot) else: spot.tags["filtfail"] = True print str(spot) #done with test code logging.info("Finished testing")
def _parseOptions( self ): parser = OptionParser( usage=USAGE ) parser.add_option("--debug", action="store_true", help="Increases verbosity of logging" ) parser.add_option("--nproc", type="int", help="Number of processes to use." ) parser.add_option("-o", "--outName", type="string", help="Name of the output fasta and qual files (Don't include the extension)", default="out") parser.add_option("--fqOut", action="store_true", help="Create a .fastq output file") parser.add_option("--rename", type="string", help="Gives the ouput contigs more descriptive names") parser.add_option("--minSubreads", type="int", help="Minimum number of subreads required to attempt assembly") parser.add_option("--workDir", type="string", help="Directory to build the bank an everything in.") parser.add_option("--workTmp", type="string", help="Work in a temporary directory") parser.add_option("--threshold", type="int", help="Threshold when determining overlaps") parser.add_option("--transmax", type="int", help="Max links of transitivity") parser.add_option("-e", type="str", help="Alignment Error% e.g. 0.15 = 15%") parser.set_defaults(debug=False, nproc=1, outName="out", rename=None, minSubreads=2, \ filtering=False, workTmp=None, threshold=800, transmax=1, e="0.15") self.options, args = parser.parse_args(sys.argv) setupLogging(self.options.debug) logging.warning("This program doesn't work with SMRTAnalysis v2.1 and on") logging.info("Reading Input Reads") if len(args) == 2: self.fastqFile = args[1] if not self.fastqFile.endswith(".fastq"): parser.error("Expected a Fastq File or Fasta/Qual") self.fastqSeq = FastqFile(self.fastqFile) elif len(args) == 3: fasta = args[1] qual = args[2] if not fasta.endswith(".fasta"): parser.error("Expected First Argument To End With .fasta") if not qual.endswith(".qual"): parser.error("Expected Second Argument To End With .qual") self.fastqSeq = mergeFastaQual(fasta, qual) self.fastaFile = fasta self.qualFile = qual else: parser.error("Expected <input.fastq> or <input.fasta> <input.qual> Arguments!") self.options.outName = os.path.abspath(self.options.outName) if self.options.workTmp is not None: self.options.workDir = tempfile.mkdtemp(dir=self.options.workTmp) if self.options.workDir is not None: os.chdir(self.options.workDir)
def test(argv): numpy.seterr(all="ignore") args = parseArgs(argv) setupLogging(True)#keep debug on.. you're testing! logging.critical(("Running HSpots.py directly implements testing mode. " "If you're trying to run the full, actual program, use " "Honey.py spots")) bam = pysam.Samfile(args.bam) reference = pysam.Fastafile(args.reference) try: if bam.header["HD"]["SO"] != "coordinate": logging.warning("BAM is not sorted by coordinates! Performance may be slower") except KeyError: logging.warning("Assuming BAM is sorted by coordinate. Be sure this is correct") logging.info("Running in test mode") #do what you will.. from here #spot = SpotResult(chrom='11', start=2215290, end=2215798, svtype="DEL", size=208) #spot = SpotResult(chrom='22', start=45964261, end=45965596, svtype="DEL", size=-1) # This is what I need to start with #spot = SpotResult(chrom="22", start=45963975, end=45964532, svtype="DEL", size=57) fh = open("honeymissing.bed") for line in fh.readlines(): data = line.strip().split('\t') spot = SpotResult(chrom=data[0], start=int(data[1]), end = int(data[2]), \ size=int(data[3].split('=')[-1]), svtype="DEL") j = SpotCaller('group', spot.chrom, spot.start, spot.end, args) if j.supportingReadsFilter(spot, bam, args): consen = ConsensusCaller(spot, args) consen(bam, reference, 'none') for i in consen.newSpots: i.tags["seqmade"] = True print i if len(consen.newSpots) == 0: spot.tags["noseq"] = True print str(spot) else: spot.tags["filtfail"] = True print str(spot) #done with test code logging.info("Finished testing")
def _parseOptions(self): parser = OptionParser(usage=USAGE) parser.add_option("--debug", action="store_true", help="Increases verbosity of logging") parser.add_option("--nproc", type="int", help="Number of processes to use.") parser.add_option( "-o", "--outName", type="string", help= "Name of the output fasta and qual files (Don't include the extension)", default="out") parser.add_option("--fqOut", action="store_true", help="Create a .fastq output file") parser.add_option( "--rename", type="string", help="Gives the ouput contigs more descriptive names") parser.add_option( "--minSubreads", type="int", help="Minimum number of subreads required to attempt assembly") parser.add_option("--workDir", type="string", help="Directory to build the bank an everything in.") parser.add_option("--workTmp", type="string", help="Work in a temporary directory") parser.add_option("--threshold", type="int", help="Threshold when determining overlaps") parser.add_option("--transmax", type="int", help="Max links of transitivity") parser.add_option("-e", type="str", help="Alignment Error% e.g. 0.15 = 15%") parser.set_defaults(debug=False, nproc=1, outName="out", rename=None, minSubreads=2, \ filtering=False, workTmp=None, threshold=800, transmax=1, e="0.15") self.options, args = parser.parse_args(sys.argv) setupLogging(self.options.debug) logging.warning( "This program doesn't work with SMRTAnalysis v2.1 and on") logging.info("Reading Input Reads") if len(args) == 2: self.fastqFile = args[1] if not self.fastqFile.endswith(".fastq"): parser.error("Expected a Fastq File or Fasta/Qual") self.fastqSeq = FastqFile(self.fastqFile) elif len(args) == 3: fasta = args[1] qual = args[2] if not fasta.endswith(".fasta"): parser.error("Expected First Argument To End With .fasta") if not qual.endswith(".qual"): parser.error("Expected Second Argument To End With .qual") self.fastqSeq = mergeFastaQual(fasta, qual) self.fastaFile = fasta self.qualFile = qual else: parser.error( "Expected <input.fastq> or <input.fasta> <input.qual> Arguments!" ) self.options.outName = os.path.abspath(self.options.outName) if self.options.workTmp is not None: self.options.workDir = tempfile.mkdtemp(dir=self.options.workTmp) if self.options.workDir is not None: os.chdir(self.options.workDir)
def __init__(self): self.parseOpts() #setupLogging(self.debug) setupLogging(True)
def parseArgs(argv, established=False): parser = argparse.ArgumentParser(prog="Honey.py spots", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) ioGroup = parser.add_argument_group("I/O Arguments") ioGroup.add_argument("bam", metavar="BAM", type=str, \ help="BAM containing mapped reads") ioGroup.add_argument("--hon", metavar="HON.H5", type=str, default=None, \ help="HON.h5 containing Error data. Skips ErrorCouting.") ioGroup.add_argument("-r", "--region", type=str, default=None,\ help="Only call spots in region.bed") ioGroup.add_argument("--chrom", type=str, default=None, \ help="Only call spots on specified chromosomes (comma-separated) (%(default)s)") ioGroup.add_argument("-n", "--nproc", type=int, default=1, \ help="Number of processors to use (only for consensus) (%(default)s)") ioGroup.add_argument("-o", "--output", type=str, default=None, \ help="Basename for output (BAM.hon)") ioGroup.add_argument("--readFile", action="store_true", \ help="Create a file with what reads support what events (%(default)s)") pGroup = parser.add_argument_group( "Spot-Calling Threshold/Filtering Arguments") pGroup.add_argument("-b", "--binsize", type=int, default=100, \ help="binsize for window averaging (%(default)s)") pGroup.add_argument("-e", "--threshold", type=float, default=3, help="Minimum Spot Threshold (%(default)s)") pGroup.add_argument("-c", "--minCoverage", type=int, default=2, \ help="Minimum coverage of a region (%(default)s)") pGroup.add_argument("-C", "--maxCoverage", type=int, default=BIGINT, \ help="Maximum coverage of a region (%(default)s)") pGroup.add_argument("-q", "--minMapQ", type=int, default=1, \ help="Minimum map quality of reads considered (%(default)s)") pGroup.add_argument( "-m", "--minIndelErr", type=int, default=5, help="Minimum size of an indel error to be counted (%(default)s)") pGroup.add_argument("-i", "--minIndelSize", type=int, default=50, \ help="Minimum indel SV size (%(default)s)") pGroup.add_argument("-E", "--minErrReads", type=int, default=3, \ help="Minimum number of reads with indel (%(default)s)") pGroup.add_argument("--spanMax", type=int, default=2000, \ help="Maximum Size of spot to be called (%(default)s)") #pGroup.add_argument("-I", "--minIndelPct", type=float, default=0.20, \ #help="Minimum pct of reads with indel (max(%(default)s*cov,minErrReads)") aGroup = parser.add_argument_group("Consensus Arguments") aGroup.add_argument("--noConsensus", action="store_true", \ help="Turn off consensus calling, just report spots (False)") aGroup.add_argument("--buffer", default=1000, type=int, \ help="Buffer around SV to assemble (%(default)s)") aGroup.add_argument("--reference", default=None, type=str, \ help="Sample reference. Required with consensus calling (None)") aGroup.add_argument("--polish", type=str, default="pbdagcon", choices=["pbdagcon", "pbbanana", "None"], \ help="Method for polishing consensus. (%(default)s)") aGroup.add_argument("--blasr", default="blasr", \ help="Path to blasr if it's not in the env") #aGroup.add_argument("--contig", default="store_false", \ #help="Report the full contig sequences and QVs in INFO (False)") parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(argv) setupLogging(args.debug) if args.maxCoverage > BIGINT: logging.error("Max Coverge must be less than %d" % (BIGINT)) exit(0) #check bam is bamfile if args.output is None: #args.output = args.bam.filename[:-4]+".hon" if args.hon is not None: args.output = args.hon.rstrip(".h5") else: args.output = args.bam[:-4] + ".hon" if not args.noConsensus: if args.reference is None: logging.error("Reference is required with consensus calling") exit(0) #Check is fastafile if args.chrom is not None: args.chrom = args.chrom.split(',') return args
def __init__(self): self.parseArgs() setupLogging(self.opts.debug)
def parseArgs(argv, established=False): parser = argparse.ArgumentParser(prog="Honey.py spots", description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) ioGroup = parser.add_argument_group("I/O Arguments") ioGroup.add_argument("bam", metavar="BAM", type=str, \ help="BAM containing mapped reads") ioGroup.add_argument("--hon", metavar="HON.H5", type=str, default=None, \ help="HON.h5 containing Error data. Skips ErrorCouting.") ioGroup.add_argument("-r", "--region", type=str, default=None,\ help="Only call spots in region.bed") ioGroup.add_argument("--chrom", type=str, default=None, \ help="Only call spots on specified chromosomes (comma-separated) (%(default)s)") ioGroup.add_argument("-n", "--nproc", type=int, default=1, \ help="Number of processors to use (only for consensus) (%(default)s)") ioGroup.add_argument("-o", "--output", type=str, default=None, \ help="Basename for output (BAM.hon)") ioGroup.add_argument("--readFile", action="store_true", \ help="Create a file with what reads support what events (%(default)s)") pGroup = parser.add_argument_group("Spot-Calling Threshold/Filtering Arguments") pGroup.add_argument("-b", "--binsize", type=int, default=50, \ help="Binsize for window averaging (%(default)s)") pGroup.add_argument("-e", "--threshold", type=float, default=3, help="Minimum Spot Threshold (%(default)s)") pGroup.add_argument("-c", "--minCoverage", type=int, default=2, \ help="Minimum coverage of a region (%(default)s)") pGroup.add_argument("-C", "--maxCoverage", type=int, default=BIGINT, \ help="Maximum coverage of a region (%(default)s)") pGroup.add_argument("-q", "--minMapQ", type=int, default=1, \ help="Minimum map quality of reads considered (%(default)s)") pGroup.add_argument("-m", "--minIndelErr", type=int, default=5, help="Minimum size of an indel error to be counted (%(default)s)") pGroup.add_argument("-i", "--minIndelSize", type=int, default=50, \ help="Minimum indel SV size (%(default)s)") pGroup.add_argument("-E", "--minErrReads", type=int, default=3, \ help="Minimum number of reads with indel (%(default)s)") pGroup.add_argument("--spanMax", type=int, default=3000, \ help="Maximum Size of spot to be called (%(default)s)") #pGroup.add_argument("-I", "--minIndelPct", type=float, default=0.20, \ #help="Minimum pct of reads with indel (max(%(default)s*cov,minErrReads)") aGroup = parser.add_argument_group("Consensus Arguments") aGroup.add_argument("--consensus", type=str, default="pbdagcon", choices=["pbdagcon", "pbbanana", "None"], \ help="Method for polishing consensus. (%(default)s)") aGroup.add_argument("--buffer", default=1000, type=int, \ help="Buffer around SV to consense (%(default)s)") aGroup.add_argument("--reference", default=None, type=str, \ help="Sample reference. Required with consensus calling (None)") aGroup.add_argument("--reportContig", action="store_true", \ help="Report the contig created that called the spot") #aGroup.add_argument("--blasr", default="blasr", \ #help="Path to blasr if it's not in the env") #aGroup.add_argument("--contig", default="store_false", \ #help="Report the full contig sequences and QVs in INFO (False)") parser.add_argument("--debug", action="store_true", \ help="Verbose logging") args = parser.parse_args(argv) setupLogging(args.debug) if args.maxCoverage > BIGINT: logging.error("Max Coverge must be less than %d" % (BIGINT)) exit(0) #check bam is bamfile if args.output is None: #args.output = args.bam.filename[:-4]+".hon" if args.hon is not None: args.output = args.hon.rstrip(".h5") else: args.output = args.bam[:-4]+".hon" if args.consensus != "None": if args.reference is None: logging.error("Reference is required with consensus calling") exit(0) #Check is fastafile if args.chrom is not None: args.chrom = args.chrom.split(',') return args
def parseArgs(argv): parser = argparse.ArgumentParser(description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("putative", metavar="BED", type=str, \ help="Bed of regions to assemble") parser.add_argument("-b", "--bam", type=str, nargs="*", \ help="Input Bam (NonTrim)") parser.add_argument("-p", "--pacBam", type=str, nargs="*", \ help="PacBio Bam") parser.add_argument("-a", "--assembler", type=str, default='phrap', choices=["phrap", "minia", "spades"], help="Assembly program to use (%(default)s)") parser.add_argument("-B", "--buffer", type=int, default=1000, \ help="Amount of buffer sequence around the variant to use (%(default)s)") parser.add_argument("-n", "--nproc", type=int, default=1, \ help="Number of processors to use (%(default)s)") parser.add_argument("-o", "--output", default="asm.fastq",\ help="Where to write the resultant assemblies (%(default)s)") parser.add_argument("-r", "--reference", default=None, \ help="Reference to map to (optional if --noRemap)") parser.add_argument("--noRemap", action="store_false", \ help="Do not remap assembly") parser.add_argument("--noSplitMap", action="store_false", \ help="Do not map tails from remapped assembly (off if --noRemap)") parser.add_argument("--timeout", type=int, default=30, \ help="Timeout assembly after N minutes (%(default)s)") parser.add_argument("--maxspan", type=int, default=100000, \ help="Maximum Span of SV to attempt assembling (%(default)s)") parser.add_argument("--maxreads", type=int, default=500, \ help="Maximum number of Illumina reads used to attempt assembling (%(default)s)") parser.add_argument("--temp", type=str, default=tempfile.gettempdir(), help="Where to save temporary files") parser.add_argument("--start", type=int, default=0, help="Index of the first variant to begin assembling. (%(default)s)") parser.add_argument("--stride", type=int, default=1, help="Assemble one every N reads (%(default)s)") parser.add_argument("--debug", action="store_true",\ help="Verbose Logging") #parser.add_argument("--insertsize", type=int, default=None, \ #help=("Celera - insert size for PE Illumina reads (auto_detect)")) #parser.add_argument("--insertstd", type=float, default=None, \ #help=("Celera - insert std for PE Illumina reads (auto_detect)")) args = parser.parse_args(argv) setupLogging(args.debug) # Parameter checks if args.bam is None and args.pacBam is None: logging.error("Expected at least one BAM argument") exit(1) if not args.output.endswith(".fastq"): logging.error("Output needs to end with .fastq") exit(1) if not os.path.exists(args.putative): logging.error("Input {inp} does not exist".format(inp=args.putative)) exit(1) if args.noRemap and args.reference == None: logging.error("Cannot remap without --reference") exit(1) if args.reference and not os.path.exists(args.reference): logging.error("Reference {ref} does not exist".format(ref=args.reference)) exit(1) if args.bam is None: args.bam = [] if args.insertsize is None and args.bam is not None: j = pysam.Samfile(args.bam[0]) mu,std = insertDist(j) j.close() args.insertsize = mu args.insertstd = std if args.insertstd is None else args.insertstd if args.pacBam is None: args.pacBam = [] return args
def __init__(self): self.parseOpts() #setupLogging(self.debug) setupLogging(True)
def parseArgs(argv): parser = argparse.ArgumentParser(description=USAGE, \ formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("putative", metavar="BED", type=str, \ help="Bed of regions to assemble") parser.add_argument("-b", "--bam", type=str, nargs="*", \ help="Input Bam (NonTrim)") parser.add_argument("-p", "--pacBam", type=str, nargs="*", \ help="PacBio Bam") parser.add_argument("-a", "--assembler", type=str, default='phrap', choices=["phrap", "minia", "spades"], help="Assembly program to use (%(default)s)") parser.add_argument("-B", "--buffer", type=int, default=1000, \ help="Amount of buffer sequence around the variant to use (%(default)s)") parser.add_argument("-n", "--nproc", type=int, default=1, \ help="Number of processors to use (%(default)s)") parser.add_argument("-o", "--output", default="asm.fastq",\ help="Where to write the resultant assemblies (%(default)s)") parser.add_argument("-r", "--reference", default=None, \ help="Reference to map to (optional if --noRemap)") parser.add_argument("--noRemap", action="store_false", \ help="Do not remap assembly") parser.add_argument("--noSplitMap", action="store_false", \ help="Do not map tails from remapped assembly (off if --noRemap)") parser.add_argument("--timeout", type=int, default=30, \ help="Timeout assembly after N minutes (%(default)s)") parser.add_argument("--maxspan", type=int, default=100000, \ help="Maximum Span of SV to attempt assembling (%(default)s)") parser.add_argument("--maxreads", type=int, default=2500, \ help="Maximum number of Illumina reads used to attempt assembling (%(default)s)") parser.add_argument("--temp", type=str, default=tempfile.gettempdir(), help="Where to save temporary files") parser.add_argument( "--start", type=int, default=0, help="Index of the first variant to begin assembling. (%(default)s)") parser.add_argument("--stride", type=int, default=1, help="Assemble one every N reads (%(default)s)") parser.add_argument("--debug", action="store_true",\ help="Verbose Logging") #parser.add_argument("--insertsize", type=int, default=None, \ #help=("Celera - insert size for PE Illumina reads (auto_detect)")) #parser.add_argument("--insertstd", type=float, default=None, \ #help=("Celera - insert std for PE Illumina reads (auto_detect)")) args = parser.parse_args(argv) setupLogging(args.debug) # Parameter checks if args.bam is None and args.pacBam is None: logging.error("Expected at least one BAM argument") exit(1) if not args.output.endswith(".fastq"): logging.error("Output needs to end with .fastq") exit(1) if not os.path.exists(args.putative): logging.error("Input {inp} does not exist".format(inp=args.putative)) exit(1) if args.noRemap and args.reference == None: logging.error("Cannot remap without --reference") exit(1) if args.reference and not os.path.exists(args.reference): logging.error( "Reference {ref} does not exist".format(ref=args.reference)) exit(1) if args.bam is None: args.bam = [] #if args.insertsize is None and args.bam is not None: #j = pysam.Samfile(args.bam[0]) #mu,std = insertDist(j) #j.close() #args.insertsize = mu #args.insertstd = std if args.insertstd is None else args.insertstd if args.pacBam is None: args.pacBam = [] return args
def __init__(self): self.parseArgs() setupLogging(self.opts.debug)