Ejemplo n.º 1
0
def parseArgs(args):
    parser = argparse.ArgumentParser(prog="Honey.py force", description=USAGE, \
                formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("bam", metavar="BAM", type=str, \
                        help="Assembled Contigs Bam")
    parser.add_argument("bed", metavar="BED", type=str, \
                        help="Bed of locations to force SV Calls")
    parser.add_argument("-s", "--sizebuffer", type=float, default=0.35, \
                        help=("Buffer of estimated sv size to "
                              "create match (%(default)s)"))
    parser.add_argument("-d", "--maxDelta", type=int, default=500, \
                        help="Max distance between predicted and discovered variant (%(default)s)")
    parser.add_argument("-f", "--fetchbuffer", type=int, default=1000, \
                        help="Buffer for fetching reads from .bam (%(default)s)")
    #parser.add_argument("-o", "--overlapbuffer", type=float, default=0.50, \
                        #help="Percent overlap required from calls to tails (%(default)s)")
    parser.add_argument("-q", "--minMapq", type=int, default=100, \
                        help="Minimum mapping quality of a read and it's tail to consider (%(default)s)")
    parser.add_argument("-m", "--minErr", type=int, default=5, \
                        help="Minimum ins/del error size to consider (%(default)s)")
    #parser.add_argument("-a", "--asm", action="store_true", \
                        #help="Input reads are high-quality contigs")
    parser.add_argument("-p", "--bedPE", action="store_true", \
                        help="Input bed file is bedPE - only tails searching will be performed")
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")
    args = parser.parse_args(args)
    setupLogging(args.debug)

    return args
Ejemplo n.º 2
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py cpxres", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("tails", metavar="TAILS", type=str, \
                        help="Input hon.tals file")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output file (<tails>.cpx)")
    parser.add_argument("-c", "--minBlock", type=int, default=500, \
                        help=("To prevent 'tiny' reference bocks, remove "
                              "those with a size less than (%(default)s)"))
    parser.add_argument("-s", "--maxSpan", type=int, default=100000, \
                        help=("Max Span of a breakpoint to be considered"
                              " (%(default)s)"))
    parser.add_argument("-l", "--maxOvl", type=int, default=10, \
                        help=("Max number of overlaps in a cluster"
                              " (%(default)s)"))
    parser.add_argument("-r", "--maxRefBlocks", type=int, default=10,
                        help=("Max number of reference blocks to consider"
                              " (%(default)s)"))
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")
    args = parser.parse_args(argv)
    setupLogging(args.debug)
    if args.output is None:
        args.output = args.tails + '.cpx'
    return args
Ejemplo n.º 3
0
def parseArgs(args):
    parser = argparse.ArgumentParser(prog="Honey.py force", description=USAGE, \
                formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("bam", metavar="BAM", type=str, \
                        help="Assembled Contigs Bam")
    parser.add_argument("bed", metavar="BED", type=str, \
                        help="Bed of locations to force SV Calls")
    parser.add_argument("-s", "--sizebuffer", type=float, default=0.35, \
                        help=("Buffer of estimated sv size to "
                              "create match (%(default)s)"))
    parser.add_argument("-d", "--maxDelta", type=int, default=500, \
                        help="Max distance between predicted and discovered variant (%(default)s)")
    parser.add_argument("-f", "--fetchbuffer", type=int, default=1000, \
                        help="Buffer for fetching reads from .bam (%(default)s)")
    #parser.add_argument("-o", "--overlapbuffer", type=float, default=0.50, \
                        #help="Percent overlap required from calls to tails (%(default)s)")
    parser.add_argument("-q", "--minMapq", type=int, default=100, \
                        help="Minimum mapping quality of a read and it's tail to consider (%(default)s)")
    parser.add_argument("-m", "--minErr", type=int, default=5, \
                        help="Minimum ins/del error size to consider (%(default)s)")
    #parser.add_argument("-a", "--asm", action="store_true", \
                        #help="Input reads are high-quality contigs")
    parser.add_argument("-p", "--bedPE", action="store_true", \
                        help="Input bed file is bedPE - only tails searching will be performed")
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")
    args = parser.parse_args(args)
    setupLogging(args.debug)
    
    return args
Ejemplo n.º 4
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py cpxres", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("tails", metavar="TAILS", type=str, \
                        help="Input hon.tals file")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output file (<tails>.cpx)")
    parser.add_argument("-c", "--minBlock", type=int, default=500, \
                        help=("To prevent 'tiny' reference bocks, remove "
                              "those with a size less than (%(default)s)"))
    parser.add_argument("-s", "--maxSpan", type=int, default=100000, \
                        help=("Max Span of a breakpoint to be considered"
                              " (%(default)s)"))
    parser.add_argument("-l", "--maxOvl", type=int, default=10, \
                        help=("Max number of overlaps in a cluster"
                              " (%(default)s)"))
    parser.add_argument("-r", "--maxRefBlocks", type=int, default=10, 
                        help=("Max number of reference blocks to consider"
                              " (%(default)s)"))
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")
    args = parser.parse_args(argv)
    setupLogging(args.debug)
    if args.output is None:
        args.output = args.tails + '.cpx'
    return args
Ejemplo n.º 5
0
def test(argv):
    numpy.seterr(all="ignore")
    args = parseArgs(argv)
    setupLogging(True)  #keep debug on.. you're testing!
    logging.critical(("Running HSpots.py directly implements testing mode. "
                      "If you're trying to run the full, actual program, use "
                      "Honey.py spots"))

    bam = pysam.Samfile(args.bam)
    reference = pysam.Fastafile(args.reference)
    try:
        if bam.header["HD"]["SO"] != "coordinate":
            logging.warning(
                "BAM is not sorted by coordinates! Performance may be slower")
    except KeyError:
        logging.warning(
            "Assuming BAM is sorted by coordinate. Be sure this is correct")
    logging.info("Running in test mode")

    #do what you will.. from here
    # This is what I need to start with
    #spot = SpotResult(chrom="7", start=138402727, end=138402830, svtype="INS", size=113)
    chrom = "3"
    start, end = (195498264, 195498609)
    start -= 200
    end += 200
    spot = SpotResult(chrom=chrom,
                      start=start,
                      end=end,
                      svtype="DEL",
                      size=100)

    #fh = open("possible.bed")
    #for line in fh.readlines():
    #data = line.strip().split('\t')
    #spot = SpotResult(chrom=data[0], start=int(data[8]), end = int(data[9]), \
    #size=int(data[5]), svtype=data[4])

    j = SpotCaller('group', spot.chrom, spot.start, spot.end, args)
    if j.supportingReadsFilter(spot, bam, args):
        consen = ConsensusCaller(spot, args)
        consen(bam, reference, 'none')
        for i in consen.newSpots:
            i.tags["seqmade"] = True
            print i
        if len(consen.newSpots) == 0:
            spot.tags["noseq"] = True
            print str(spot)
    else:
        spot.tags["filtfail"] = True
        print str(spot)
    #done with test code
    logging.info("Finished testing")
Ejemplo n.º 6
0
    def __init__(self):
        """
        Given a protocol fn, load it up so we are ready to run. 
        """
        self.parseArgs()
        setupLogging(self.options.debug)
        sys.stderr.write("""
Please Cite: English, Adam C., Stephen Richards, Yi Han, Min Wang,
             Vanesa Vee, Jiaxin Qu, Xiang Qin, et al. "Mind the
             Gap: Upgrading Genomes with Pacific Biosciences RS
             Long-Read Sequencing Technology." PLoS ONE 7, no. 11
             (November 21, 2012): e47768.
             doi:10.1371/journal.pone.0047768.\n\n""")
        self.parseProtocol()
Ejemplo n.º 7
0
    def __init__(self):
        """
        Given a protocol fn, load it up so we are ready to run. 
        """
        self.parseArgs()
        setupLogging(self.options.debug)
        sys.stderr.write("""
Please Cite: English, Adam C., Stephen Richards, Yi Han, Min Wang,
             Vanesa Vee, Jiaxin Qu, Xiang Qin, et al. "Mind the
             Gap: Upgrading Genomes with Pacific Biosciences RS
             Long-Read Sequencing Technology." PLoS ONE 7, no. 11
             (November 21, 2012): e47768.
             doi:10.1371/journal.pone.0047768.\n\n""")
        self.parseProtocol()
Ejemplo n.º 8
0
 def parseArgs(self):
     parser = OptionParser()
     parser.add_option("-i", "--inputDir", default=None,\
             help="Input directory to find chunks.m4 [DEFAULT=pwd]")
     parser.add_option("-j", "--json", action="store_true",\
             help="Output table in JSON format instead of lined [DEFAULT=False]")
     parser.add_option("-o", "--output", default=None,\
             help="Output file name [DEFAULT=stdout]")
     parser.add_option("-t", "--tailMax", type="int", default=-1, \
             help=("Use PBJelly's Support module to remove discordant "
                   "alignments with greater than specified tail length "
                   "[DEFAULT=off]"))
     parser.add_option("-l", "--lengthMin", type="int", default=0, \
             help="Ignore reads (query or target) less than specified length [DEFAULT=off]")
     parser.add_option("-b", "--bestn", type="int", default=sys.maxint, \
             help=("Report only the top bestn alignment scores for a query"
                   " [DEFAULT=all]"))
     parser.add_option("-e", "--extends", action="store_true", \
             help="Only report alignments that extend query [DEFAULT=False]")
     parser.add_option("-m", "--maxEntries", type="int", default=10000, \
             help="Max number of alignments to hold in memory from each file [DEFAULT=10000]")
     parser.add_option("--debug", action="store_true",\
             help="Verbose logging")
     
     opts, args = parser.parse_args()
     
     setupLogging(opts.debug)
     
     if opts.inputDir is not None:
         self.inputDir = opts.inputDir
     else:
         self.inputDir = os.getcwd()
     if not os.path.exists(self.inputDir):
         parser.error("Input directory (%s) does not exist" % inputDir)
    
     self.outputJson = opts.json
     
     if opts.output is not None:
         self.output = open(opts.output,'w')
     else:
         self.output = sys.stdout
     
     #Filter params
     self.tailMax = opts.tailMax
     self.lengthMin = opts.lengthMin
     self.bestn = opts.bestn
     self.extends = opts.extends
     self.maxEntries = opts.maxEntries
     self.debug = opts.debug
Ejemplo n.º 9
0
    def parseArgs(self):
        parser = OptionParser()
        parser.add_option("-i", "--inputDir", default=None,\
                help="Input directory to find chunks.m4 [DEFAULT=pwd]")
        parser.add_option("-j", "--json", action="store_true",\
                help="Output table in JSON format instead of lined [DEFAULT=False]")
        parser.add_option("-o", "--output", default=None,\
                help="Output file name [DEFAULT=stdout]")
        parser.add_option("-t", "--tailMax", type="int", default=-1, \
                help=("Use PBJelly's Support module to remove discordant "
                      "alignments with greater than specified tail length "
                      "[DEFAULT=off]"))
        parser.add_option("-l", "--lengthMin", type="int", default=0, \
                help="Ignore reads (query or target) less than specified length [DEFAULT=off]")
        parser.add_option("-b", "--bestn", type="int", default=sys.maxint, \
                help=("Report only the top bestn alignment scores for a query"
                      " [DEFAULT=all]"))
        parser.add_option("-e", "--extends", action="store_true", \
                help="Only report alignments that extend query [DEFAULT=False]")
        parser.add_option("-m", "--maxEntries", type="int", default=10000, \
                help="Max number of alignments to hold in memory from each file [DEFAULT=10000]")
        parser.add_option("--debug", action="store_true",\
                help="Verbose logging")

        opts, args = parser.parse_args()

        setupLogging(opts.debug)

        if opts.inputDir is not None:
            self.inputDir = opts.inputDir
        else:
            self.inputDir = os.getcwd()
        if not os.path.exists(self.inputDir):
            parser.error("Input directory (%s) does not exist" % inputDir)

        self.outputJson = opts.json

        if opts.output is not None:
            self.output = open(opts.output, 'w')
        else:
            self.output = sys.stdout

        #Filter params
        self.tailMax = opts.tailMax
        self.lengthMin = opts.lengthMin
        self.bestn = opts.bestn
        self.extends = opts.extends
        self.maxEntries = opts.maxEntries
        self.debug = opts.debug
Ejemplo n.º 10
0
def test(argv):
    numpy.seterr(all="ignore")
    args = parseArgs(argv)
    setupLogging(True)#keep debug on.. you're testing!
    logging.critical(("Running HSpots.py directly implements testing mode. "
                      "If you're trying to run the full, actual program, use "
                      "Honey.py spots"))
       
    bam = pysam.Samfile(args.bam)
    reference = pysam.Fastafile(args.reference)
    try:
        if bam.header["HD"]["SO"] != "coordinate":
            logging.warning("BAM is not sorted by coordinates! Performance may be slower")
    except KeyError:
        logging.warning("Assuming BAM is sorted by coordinate. Be sure this is correct")
    logging.info("Running in test mode")
    
    #do what you will.. from here
    # This is what I need to start with
    #spot = SpotResult(chrom="7", start=138402727, end=138402830, svtype="INS", size=113)
    chrom="3"      
    start,end = (195498264, 195498609)
    start -=200
    end +=200
    spot = SpotResult(chrom=chrom, start=start, end=end, svtype="DEL", size=100)
    
    #fh = open("possible.bed")
    #for line in fh.readlines():
        #data = line.strip().split('\t')
        #spot = SpotResult(chrom=data[0], start=int(data[8]), end = int(data[9]), \
                          #size=int(data[5]), svtype=data[4])
        
    j = SpotCaller('group', spot.chrom, spot.start, spot.end, args)
    if j.supportingReadsFilter(spot, bam, args):
        consen = ConsensusCaller(spot, args)
        consen(bam, reference, 'none')
        for i in consen.newSpots:
            i.tags["seqmade"] = True
            print i
        if len(consen.newSpots) == 0:
            spot.tags["noseq"] = True
            print str(spot)
    else:
        spot.tags["filtfail"] = True
        print str(spot)
    #done with test code
    logging.info("Finished testing")
Ejemplo n.º 11
0
def test(argv):
    numpy.seterr(all="ignore")
    args = parseArgs(argv)
    setupLogging(True)  #keep debug on.. you're testing!
    logging.critical(("Running HSpots.py directly implements testing mode. "
                      "If you're trying to run the full, actual program, use "
                      "Honey.py spots"))

    bam = pysam.Samfile(args.bam)
    reference = pysam.Fastafile(args.reference)
    try:
        if bam.header["HD"]["SO"] != "coordinate":
            logging.warning(
                "BAM is not sorted by coordinates! Performance may be slower")
    except KeyError:
        logging.warning(
            "Assuming BAM is sorted by coordinate. Be sure this is correct")
    logging.info("Running in test mode")

    #do what you will.. from here
    #spot = SpotResult(chrom='11', start=2215290, end=2215798, svtype="DEL", size=208)
    #spot = SpotResult(chrom='22', start=45964261, end=45965596, svtype="DEL", size=-1)
    # This is what I need to start with
    #spot = SpotResult(chrom="22", start=45963975, end=45964532, svtype="DEL", size=57)
    fh = open("honeymissing.bed")
    for line in fh.readlines():
        data = line.strip().split('\t')
        spot = SpotResult(chrom=data[0], start=int(data[1]), end = int(data[2]), \
                          size=int(data[3].split('=')[-1]), svtype="DEL")

        j = SpotCaller('group', spot.chrom, spot.start, spot.end, args)
        if j.supportingReadsFilter(spot, bam, args):
            consen = ConsensusCaller(spot, args)
            consen(bam, reference, 'none')
            for i in consen.newSpots:
                i.tags["seqmade"] = True
                print i
            if len(consen.newSpots) == 0:
                spot.tags["noseq"] = True
                print str(spot)
        else:
            spot.tags["filtfail"] = True
            print str(spot)

    #done with test code
    logging.info("Finished testing")
Ejemplo n.º 12
0
 def _parseOptions( self ):
     parser = OptionParser( usage=USAGE )
     
     parser.add_option("--debug", action="store_true", help="Increases verbosity of logging" )
     parser.add_option("--nproc", type="int", help="Number of processes to use." )
     parser.add_option("-o", "--outName", type="string", help="Name of the output fasta and qual files (Don't include the extension)", default="out")
     parser.add_option("--fqOut", action="store_true", help="Create a .fastq output file")
     parser.add_option("--rename", type="string", help="Gives the ouput contigs more descriptive names")
     parser.add_option("--minSubreads", type="int", help="Minimum number of subreads required to attempt assembly")
     parser.add_option("--workDir", type="string", help="Directory to build the bank an everything in.")
     parser.add_option("--workTmp", type="string", help="Work in a temporary directory")
     parser.add_option("--threshold", type="int", help="Threshold when determining overlaps")
     parser.add_option("--transmax", type="int", help="Max links of transitivity")
     parser.add_option("-e", type="str", help="Alignment Error% e.g. 0.15 = 15%")
     
     parser.set_defaults(debug=False, nproc=1, outName="out", rename=None, minSubreads=2, \
         filtering=False, workTmp=None, threshold=800, transmax=1, e="0.15")
     
     self.options, args = parser.parse_args(sys.argv)
     setupLogging(self.options.debug)
     logging.warning("This program doesn't work with SMRTAnalysis v2.1 and on")
     logging.info("Reading Input Reads")
     if len(args) == 2:
         self.fastqFile = args[1]
         if not self.fastqFile.endswith(".fastq"):
             parser.error("Expected a Fastq File or Fasta/Qual")
         self.fastqSeq = FastqFile(self.fastqFile)
     elif len(args) == 3:
         fasta = args[1]
         qual = args[2]
         if not fasta.endswith(".fasta"):
             parser.error("Expected First Argument To End With .fasta")
         if not qual.endswith(".qual"):
             parser.error("Expected Second Argument To End With .qual")
         self.fastqSeq = mergeFastaQual(fasta, qual)
         self.fastaFile = fasta
         self.qualFile = qual
     else:
         parser.error("Expected <input.fastq> or <input.fasta> <input.qual> Arguments!")
     
     self.options.outName = os.path.abspath(self.options.outName)
     
     if self.options.workTmp is not None:
         self.options.workDir = tempfile.mkdtemp(dir=self.options.workTmp)
     if self.options.workDir is not None:
         os.chdir(self.options.workDir)
Ejemplo n.º 13
0
def test(argv):
    numpy.seterr(all="ignore")
    args = parseArgs(argv)
    setupLogging(True)#keep debug on.. you're testing!
    logging.critical(("Running HSpots.py directly implements testing mode. "
                      "If you're trying to run the full, actual program, use "
                      "Honey.py spots"))
       
    bam = pysam.Samfile(args.bam)
    reference = pysam.Fastafile(args.reference)
    try:
        if bam.header["HD"]["SO"] != "coordinate":
            logging.warning("BAM is not sorted by coordinates! Performance may be slower")
    except KeyError:
        logging.warning("Assuming BAM is sorted by coordinate. Be sure this is correct")
    logging.info("Running in test mode")
    
    #do what you will.. from here
    #spot = SpotResult(chrom='11', start=2215290, end=2215798, svtype="DEL", size=208)
    #spot = SpotResult(chrom='22', start=45964261, end=45965596, svtype="DEL", size=-1)
    # This is what I need to start with
    #spot = SpotResult(chrom="22", start=45963975, end=45964532, svtype="DEL", size=57)
    fh = open("honeymissing.bed")
    for line in fh.readlines():
        data = line.strip().split('\t')
        spot = SpotResult(chrom=data[0], start=int(data[1]), end = int(data[2]), \
                          size=int(data[3].split('=')[-1]), svtype="DEL")
        
        j = SpotCaller('group', spot.chrom, spot.start, spot.end, args)
        if j.supportingReadsFilter(spot, bam, args):
            consen = ConsensusCaller(spot, args)
            consen(bam, reference, 'none')
            for i in consen.newSpots:
                i.tags["seqmade"] = True
                print i
            if len(consen.newSpots) == 0:
                spot.tags["noseq"] = True
                print str(spot)
        else:
            spot.tags["filtfail"] = True
            print str(spot)
    
    #done with test code
    logging.info("Finished testing")
Ejemplo n.º 14
0
    def _parseOptions(self):
        parser = OptionParser(usage=USAGE)

        parser.add_option("--debug",
                          action="store_true",
                          help="Increases verbosity of logging")
        parser.add_option("--nproc",
                          type="int",
                          help="Number of processes to use.")
        parser.add_option(
            "-o",
            "--outName",
            type="string",
            help=
            "Name of the output fasta and qual files (Don't include the extension)",
            default="out")
        parser.add_option("--fqOut",
                          action="store_true",
                          help="Create a .fastq output file")
        parser.add_option(
            "--rename",
            type="string",
            help="Gives the ouput contigs more descriptive names")
        parser.add_option(
            "--minSubreads",
            type="int",
            help="Minimum number of subreads required to attempt assembly")
        parser.add_option("--workDir",
                          type="string",
                          help="Directory to build the bank an everything in.")
        parser.add_option("--workTmp",
                          type="string",
                          help="Work in a temporary directory")
        parser.add_option("--threshold",
                          type="int",
                          help="Threshold when determining overlaps")
        parser.add_option("--transmax",
                          type="int",
                          help="Max links of transitivity")
        parser.add_option("-e",
                          type="str",
                          help="Alignment Error% e.g. 0.15 = 15%")

        parser.set_defaults(debug=False, nproc=1, outName="out", rename=None, minSubreads=2, \
            filtering=False, workTmp=None, threshold=800, transmax=1, e="0.15")

        self.options, args = parser.parse_args(sys.argv)
        setupLogging(self.options.debug)
        logging.warning(
            "This program doesn't work with SMRTAnalysis v2.1 and on")
        logging.info("Reading Input Reads")
        if len(args) == 2:
            self.fastqFile = args[1]
            if not self.fastqFile.endswith(".fastq"):
                parser.error("Expected a Fastq File or Fasta/Qual")
            self.fastqSeq = FastqFile(self.fastqFile)
        elif len(args) == 3:
            fasta = args[1]
            qual = args[2]
            if not fasta.endswith(".fasta"):
                parser.error("Expected First Argument To End With .fasta")
            if not qual.endswith(".qual"):
                parser.error("Expected Second Argument To End With .qual")
            self.fastqSeq = mergeFastaQual(fasta, qual)
            self.fastaFile = fasta
            self.qualFile = qual
        else:
            parser.error(
                "Expected <input.fastq> or <input.fasta> <input.qual> Arguments!"
            )

        self.options.outName = os.path.abspath(self.options.outName)

        if self.options.workTmp is not None:
            self.options.workDir = tempfile.mkdtemp(dir=self.options.workTmp)
        if self.options.workDir is not None:
            os.chdir(self.options.workDir)
Ejemplo n.º 15
0
 def __init__(self):
     self.parseOpts()
     #setupLogging(self.debug)
     setupLogging(True)
Ejemplo n.º 16
0
def parseArgs(argv, established=False):
    parser = argparse.ArgumentParser(prog="Honey.py spots", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)

    ioGroup = parser.add_argument_group("I/O Arguments")
    ioGroup.add_argument("bam", metavar="BAM", type=str, \
                        help="BAM containing mapped reads")
    ioGroup.add_argument("--hon", metavar="HON.H5", type=str, default=None, \
                        help="HON.h5 containing Error data. Skips ErrorCouting.")
    ioGroup.add_argument("-r", "--region", type=str, default=None,\
                        help="Only call spots in region.bed")
    ioGroup.add_argument("--chrom", type=str, default=None, \
                        help="Only call spots on specified chromosomes (comma-separated) (%(default)s)")
    ioGroup.add_argument("-n", "--nproc", type=int, default=1, \
                        help="Number of processors to use (only for consensus) (%(default)s)")
    ioGroup.add_argument("-o", "--output", type=str, default=None, \
                        help="Basename for output (BAM.hon)")
    ioGroup.add_argument("--readFile", action="store_true", \
                        help="Create a file with what reads support what events (%(default)s)")

    pGroup = parser.add_argument_group(
        "Spot-Calling Threshold/Filtering Arguments")
    pGroup.add_argument("-b", "--binsize", type=int, default=100, \
                        help="binsize for window averaging (%(default)s)")
    pGroup.add_argument("-e",
                        "--threshold",
                        type=float,
                        default=3,
                        help="Minimum Spot Threshold (%(default)s)")
    pGroup.add_argument("-c", "--minCoverage", type=int, default=2, \
                        help="Minimum coverage of a region (%(default)s)")
    pGroup.add_argument("-C", "--maxCoverage", type=int, default=BIGINT, \
                        help="Maximum coverage of a region (%(default)s)")
    pGroup.add_argument("-q", "--minMapQ", type=int, default=1, \
                        help="Minimum map quality of reads considered (%(default)s)")
    pGroup.add_argument(
        "-m",
        "--minIndelErr",
        type=int,
        default=5,
        help="Minimum size of an indel error to be counted (%(default)s)")
    pGroup.add_argument("-i", "--minIndelSize", type=int, default=50, \
                        help="Minimum indel SV size (%(default)s)")
    pGroup.add_argument("-E", "--minErrReads", type=int, default=3, \
                        help="Minimum number of reads with indel (%(default)s)")
    pGroup.add_argument("--spanMax", type=int, default=2000, \
                        help="Maximum Size of spot to be called (%(default)s)")
    #pGroup.add_argument("-I", "--minIndelPct", type=float, default=0.20, \
    #help="Minimum pct of reads with indel (max(%(default)s*cov,minErrReads)")

    aGroup = parser.add_argument_group("Consensus Arguments")
    aGroup.add_argument("--noConsensus", action="store_true", \
                        help="Turn off consensus calling, just report spots (False)")
    aGroup.add_argument("--buffer", default=1000, type=int, \
                        help="Buffer around SV to assemble (%(default)s)")
    aGroup.add_argument("--reference", default=None, type=str, \
                        help="Sample reference. Required with consensus calling (None)")
    aGroup.add_argument("--polish", type=str, default="pbdagcon", choices=["pbdagcon", "pbbanana", "None"], \
                        help="Method for polishing consensus. (%(default)s)")
    aGroup.add_argument("--blasr", default="blasr", \
                        help="Path to blasr if it's not in the env")
    #aGroup.add_argument("--contig", default="store_false", \
    #help="Report the full contig sequences and QVs in INFO (False)")
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")

    args = parser.parse_args(argv)
    setupLogging(args.debug)
    if args.maxCoverage > BIGINT:
        logging.error("Max Coverge must be less than %d" % (BIGINT))
        exit(0)

    #check bam is bamfile

    if args.output is None:
        #args.output = args.bam.filename[:-4]+".hon"
        if args.hon is not None:
            args.output = args.hon.rstrip(".h5")
        else:
            args.output = args.bam[:-4] + ".hon"

    if not args.noConsensus:
        if args.reference is None:
            logging.error("Reference is required with consensus calling")
            exit(0)
        #Check is fastafile
    if args.chrom is not None:
        args.chrom = args.chrom.split(',')
    return args
Ejemplo n.º 17
0
 def __init__(self):
     self.parseArgs()
     setupLogging(self.opts.debug)
Ejemplo n.º 18
0
def parseArgs(argv, established=False):
    parser = argparse.ArgumentParser(prog="Honey.py spots", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    
    ioGroup = parser.add_argument_group("I/O Arguments")
    ioGroup.add_argument("bam", metavar="BAM", type=str, \
                        help="BAM containing mapped reads")
    ioGroup.add_argument("--hon", metavar="HON.H5", type=str, default=None, \
                        help="HON.h5 containing Error data. Skips ErrorCouting.")
    ioGroup.add_argument("-r", "--region", type=str, default=None,\
                        help="Only call spots in region.bed")
    ioGroup.add_argument("--chrom", type=str, default=None, \
                        help="Only call spots on specified chromosomes (comma-separated) (%(default)s)")
    ioGroup.add_argument("-n", "--nproc", type=int, default=1, \
                        help="Number of processors to use (only for consensus) (%(default)s)")
    ioGroup.add_argument("-o", "--output", type=str, default=None, \
                        help="Basename for output (BAM.hon)")
    ioGroup.add_argument("--readFile", action="store_true", \
                        help="Create a file with what reads support what events (%(default)s)")
                        
    pGroup = parser.add_argument_group("Spot-Calling Threshold/Filtering Arguments")
    pGroup.add_argument("-b", "--binsize", type=int, default=50, \
                        help="Binsize for window averaging (%(default)s)")
    pGroup.add_argument("-e", "--threshold",  type=float, default=3,
                        help="Minimum Spot Threshold (%(default)s)")
    pGroup.add_argument("-c", "--minCoverage", type=int, default=2, \
                        help="Minimum coverage of a region (%(default)s)")
    pGroup.add_argument("-C", "--maxCoverage", type=int, default=BIGINT, \
                        help="Maximum coverage of a region (%(default)s)")
    pGroup.add_argument("-q", "--minMapQ", type=int, default=1, \
                        help="Minimum map quality of reads considered (%(default)s)")
    pGroup.add_argument("-m", "--minIndelErr", type=int, default=5,
                        help="Minimum size of an indel error to be counted (%(default)s)")
    pGroup.add_argument("-i", "--minIndelSize", type=int, default=50, \
                        help="Minimum indel SV size (%(default)s)")
    pGroup.add_argument("-E", "--minErrReads", type=int, default=3, \
                        help="Minimum number of reads with indel (%(default)s)")
    pGroup.add_argument("--spanMax", type=int, default=3000, \
                        help="Maximum Size of spot to be called (%(default)s)")
    #pGroup.add_argument("-I", "--minIndelPct", type=float, default=0.20, \
                        #help="Minimum pct of reads with indel (max(%(default)s*cov,minErrReads)")
    
    aGroup = parser.add_argument_group("Consensus Arguments")
    aGroup.add_argument("--consensus", type=str, default="pbdagcon", choices=["pbdagcon", "pbbanana", "None"], \
                        help="Method for polishing consensus. (%(default)s)")
    aGroup.add_argument("--buffer", default=1000, type=int, \
                        help="Buffer around SV to consense (%(default)s)")
    aGroup.add_argument("--reference", default=None, type=str, \
                        help="Sample reference. Required with consensus calling (None)")
    aGroup.add_argument("--reportContig", action="store_true", \
                        help="Report the contig created that called the spot")
    #aGroup.add_argument("--blasr", default="blasr", \
                        #help="Path to blasr if it's not in the env")
    #aGroup.add_argument("--contig", default="store_false", \
                        #help="Report the full contig sequences and QVs in INFO (False)")
    parser.add_argument("--debug", action="store_true", \
                        help="Verbose logging")
    
    args = parser.parse_args(argv)
    setupLogging(args.debug)
    if args.maxCoverage > BIGINT:
        logging.error("Max Coverge must be less than %d" % (BIGINT))
        exit(0)
    
    #check bam is bamfile
    
    if args.output is None:
        #args.output = args.bam.filename[:-4]+".hon"
        if args.hon is not None:
            args.output = args.hon.rstrip(".h5")
        else:
            args.output = args.bam[:-4]+".hon"
    
    if args.consensus != "None":
        if args.reference is None:
            logging.error("Reference is required with consensus calling")
            exit(0)
        #Check is fastafile
    if args.chrom is not None:
        args.chrom = args.chrom.split(',')
    return args
Ejemplo n.º 19
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(description=USAGE, \
                formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("putative", metavar="BED", type=str, \
                        help="Bed of regions to assemble")
    parser.add_argument("-b", "--bam", type=str, nargs="*", \
                        help="Input Bam (NonTrim)")
    parser.add_argument("-p", "--pacBam", type=str, nargs="*", \
                        help="PacBio Bam")
    parser.add_argument("-a", "--assembler", type=str, default='phrap', choices=["phrap", "minia", "spades"],
                        help="Assembly program to use (%(default)s)")
    parser.add_argument("-B", "--buffer", type=int, default=1000, \
                        help="Amount of buffer sequence around the variant to use (%(default)s)")
    parser.add_argument("-n", "--nproc", type=int, default=1, \
                        help="Number of processors to use (%(default)s)")
    parser.add_argument("-o", "--output", default="asm.fastq",\
                        help="Where to write the resultant assemblies (%(default)s)")
    parser.add_argument("-r", "--reference", default=None, \
                        help="Reference to map to (optional if --noRemap)")
    parser.add_argument("--noRemap", action="store_false", \
                        help="Do not remap assembly")
    parser.add_argument("--noSplitMap", action="store_false", \
                        help="Do not map tails from remapped assembly (off if --noRemap)")
    parser.add_argument("--timeout", type=int, default=30, \
                        help="Timeout assembly after N minutes (%(default)s)")
    parser.add_argument("--maxspan", type=int, default=100000, \
                        help="Maximum Span of SV to attempt assembling (%(default)s)")
    parser.add_argument("--maxreads", type=int, default=500, \
                        help="Maximum number of Illumina reads used to attempt assembling (%(default)s)")
    parser.add_argument("--temp", type=str, default=tempfile.gettempdir(),
                            help="Where to save temporary files")
    parser.add_argument("--start", type=int, default=0,
                        help="Index of the first variant to begin assembling. (%(default)s)")
    parser.add_argument("--stride", type=int, default=1,
                        help="Assemble one every N reads (%(default)s)")
    parser.add_argument("--debug", action="store_true",\
                        help="Verbose Logging")

    #parser.add_argument("--insertsize", type=int, default=None, \
                        #help=("Celera - insert size for PE Illumina reads (auto_detect)"))
    #parser.add_argument("--insertstd", type=float, default=None, \
                        #help=("Celera - insert std for PE Illumina reads (auto_detect)"))
    
    args = parser.parse_args(argv)
    setupLogging(args.debug)
    
    # Parameter checks
    if args.bam is None and args.pacBam is None:
        logging.error("Expected at least one BAM argument")
        exit(1)
    
    if not args.output.endswith(".fastq"):
        logging.error("Output needs to end with .fastq")
        exit(1)
    
    if not os.path.exists(args.putative):
        logging.error("Input {inp} does not exist".format(inp=args.putative))
        exit(1)
    
    if args.noRemap and args.reference == None:
        logging.error("Cannot remap without --reference")
        exit(1)
    
    if args.reference and not os.path.exists(args.reference):
        logging.error("Reference {ref} does not exist".format(ref=args.reference))
        exit(1)
    
    if args.bam is None:
        args.bam = []
        if args.insertsize is None and args.bam is not None:
            j = pysam.Samfile(args.bam[0])
            mu,std = insertDist(j)
            j.close()
            args.insertsize = mu
            args.insertstd = std if args.insertstd is None else args.insertstd
    
    if args.pacBam is None:
        args.pacBam = []
       
    return args
Ejemplo n.º 20
0
 def __init__(self):
     self.parseOpts()
     #setupLogging(self.debug)
     setupLogging(True)
Ejemplo n.º 21
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(description=USAGE, \
                formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("putative", metavar="BED", type=str, \
                        help="Bed of regions to assemble")
    parser.add_argument("-b", "--bam", type=str, nargs="*", \
                        help="Input Bam (NonTrim)")
    parser.add_argument("-p", "--pacBam", type=str, nargs="*", \
                        help="PacBio Bam")
    parser.add_argument("-a",
                        "--assembler",
                        type=str,
                        default='phrap',
                        choices=["phrap", "minia", "spades"],
                        help="Assembly program to use (%(default)s)")
    parser.add_argument("-B", "--buffer", type=int, default=1000, \
                        help="Amount of buffer sequence around the variant to use (%(default)s)")
    parser.add_argument("-n", "--nproc", type=int, default=1, \
                        help="Number of processors to use (%(default)s)")
    parser.add_argument("-o", "--output", default="asm.fastq",\
                        help="Where to write the resultant assemblies (%(default)s)")
    parser.add_argument("-r", "--reference", default=None, \
                        help="Reference to map to (optional if --noRemap)")
    parser.add_argument("--noRemap", action="store_false", \
                        help="Do not remap assembly")
    parser.add_argument("--noSplitMap", action="store_false", \
                        help="Do not map tails from remapped assembly (off if --noRemap)")
    parser.add_argument("--timeout", type=int, default=30, \
                        help="Timeout assembly after N minutes (%(default)s)")
    parser.add_argument("--maxspan", type=int, default=100000, \
                        help="Maximum Span of SV to attempt assembling (%(default)s)")
    parser.add_argument("--maxreads", type=int, default=2500, \
                        help="Maximum number of Illumina reads used to attempt assembling (%(default)s)")
    parser.add_argument("--temp",
                        type=str,
                        default=tempfile.gettempdir(),
                        help="Where to save temporary files")
    parser.add_argument(
        "--start",
        type=int,
        default=0,
        help="Index of the first variant to begin assembling. (%(default)s)")
    parser.add_argument("--stride",
                        type=int,
                        default=1,
                        help="Assemble one every N reads (%(default)s)")
    parser.add_argument("--debug", action="store_true",\
                        help="Verbose Logging")

    #parser.add_argument("--insertsize", type=int, default=None, \
    #help=("Celera - insert size for PE Illumina reads (auto_detect)"))
    #parser.add_argument("--insertstd", type=float, default=None, \
    #help=("Celera - insert std for PE Illumina reads (auto_detect)"))

    args = parser.parse_args(argv)
    setupLogging(args.debug)

    # Parameter checks
    if args.bam is None and args.pacBam is None:
        logging.error("Expected at least one BAM argument")
        exit(1)

    if not args.output.endswith(".fastq"):
        logging.error("Output needs to end with .fastq")
        exit(1)

    if not os.path.exists(args.putative):
        logging.error("Input {inp} does not exist".format(inp=args.putative))
        exit(1)

    if args.noRemap and args.reference == None:
        logging.error("Cannot remap without --reference")
        exit(1)

    if args.reference and not os.path.exists(args.reference):
        logging.error(
            "Reference {ref} does not exist".format(ref=args.reference))
        exit(1)

    if args.bam is None:
        args.bam = []
        #if args.insertsize is None and args.bam is not None:
        #j = pysam.Samfile(args.bam[0])
        #mu,std = insertDist(j)
        #j.close()
        #args.insertsize = mu
        #args.insertstd = std if args.insertstd is None else args.insertstd

    if args.pacBam is None:
        args.pacBam = []

    return args
Ejemplo n.º 22
0
 def __init__(self):
     self.parseArgs()
     setupLogging(self.opts.debug)