Exemple #1
0
def parseArgs():
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("reads", metavar="reads", type=str, \
                        help="Input reads .fasta or .fastq")
    
    parser.add_argument("-t", "--target", type=str, \
                        help="Target sequence name")
    parser.add_argument("-T", "--Target", type=str, \
                        help="Fasta file containing target sequence")
    parser.add_argument("-s", "--super", dest="super", action="store_true",\
                        help="Treat each read as the target once")
    
    parser.add_argument("-m", "--maxtail", type = int, default=sys.maxint, \
                        help="Max number of bases allowed to be in tail (inf)")
    
    parser.add_argument("-n", "--nproc", dest="nproc", default=1, type=int,\
                        help="Number of processors to use with blasr (1)")
    
    parser.add_argument("-o", "--outname", dest="outname", default="polish.out", \
                        type=str, \
                        help="Base name for output files (polish.out)")
    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args()
    setupLogging(args.debug)
    
    #I don't think this is exhaustive
    if (args.target is not None and args.Target is not None) \
       or (args.super and (args.target is not None or args.Target is not None)):
        print "Error! only specify one of --super or --target or --Target"
        exit(1)
    
    return args
Exemple #2
0
def parseArgs():
    """
    input dir
    predicted gapsize
    if argument says that we need to extract the seeds we will have a single paramters
        extractFlanks
    """
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("asmdir", metavar="DIR", type=str, \
                        help="Local assembly directory for a gap")
    parser.add_argument("-t", "--maxTrim", type=int, default=100, \
                        help="Maxmum trim allowed (100)")
    parser.add_argument("-w", "--maxWiggle", type=int, default=400, \
                        help="Maxmum wiggle for gap spanning allowed (400)")
    parser.add_argument("-p", "--predictedGapSize", type=int, default=None)
    parser.add_argument("-n", "--nproc", type=int, default=1)
    parser.add_argument("-k", "--keepTemp", action="store_true",\
                        help="Keep temporary files")
    parser.add_argument("--tempDir", type=str, default=None,
                        help="Where to write temporary files (DIR)")
    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args()

    if args.asmdir.endswith("/"):
        args.asmdir = args.asmdir[:-1]

    if args.tempDir is None:
        args.tempDir = args.asmdir

    setupLogging(args.debug)

    return args
Exemple #3
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("m4", metavar="M4", type=str, \
                        help="M4 containing mapped reads' alignments")
    parser.add_argument("reads", metavar="READS", type=str,\
                        help="Fasta/Fastq containing reads' sequence")
    parser.add_argument("ref", metavar="REFERENCE", type=str,\
                        help="REFERENCE to map tails to")
    parser.add_argument("-t", "--minTail", type=int, default=100,\
                        help="Minimum tail length to attempt remapping (100)")
    parser.add_argument("-n", "--nproc", type=int, default=1,\
                        help="Number of processors to use (1)")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output Name (M4.tails.m4)")
    parser.add_argument("-i", "--inplace", action="store_true", \
                        help="Append the results to the input m4 file. Overrules --output")
    parser.add_argument("--noSa", action="store_true", \
                        help="Don't use reference's sa")
    parser.add_argument("--temp",
                        type=str,
                        default=tempfile.gettempdir(),
                        help="Where to save temporary files")
    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args(argv)
    if args.inplace:
        args.output = args.m4
    elif args.output is None:
        args.output = args.m4[:-3] + ".tails.m4"

    setupLogging(args.debug)
    return args
Exemple #4
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    
    parser.add_argument("m4", metavar="M4", type=str, \
                        help="M4 containing mapped reads' alignments")
    parser.add_argument("reads", metavar="READS", type=str,\
                        help="Fasta/Fastq containing reads' sequence")
    parser.add_argument("ref", metavar="REFERENCE", type=str,\
                        help="REFERENCE to map tails to")
    parser.add_argument("-t", "--minTail", type=int, default=100,\
                        help="Minimum tail length to attempt remapping (100)")
    parser.add_argument("-n", "--nproc", type=int, default=1,\
                        help="Number of processors to use (1)")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output Name (M4.tails.m4)")
    parser.add_argument("-i", "--inplace", action="store_true", \
                        help="Append the results to the input m4 file. Overrules --output")
    parser.add_argument("--noSa", action="store_true", \
                        help="Don't use reference's sa")
    parser.add_argument("--temp", type=str, default=tempfile.gettempdir(),
                        help="Where to save temporary files")
    parser.add_argument("--debug", action="store_true")
    
    args = parser.parse_args(argv)
    if args.inplace:
        args.output = args.m4
    elif args.output is None:
        args.output = args.m4[:-3] + ".tails.m4"
    
    setupLogging(args.debug)
    return args
Exemple #5
0
def parseArgs():
    """
    input dir
    predicted gapsize
    if argument says that we need to extract the seeds we will have a single paramters
        extractFlanks
    """
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("asmdir", metavar="DIR", type=str, \
                        help="Local assembly directory for a gap")
    parser.add_argument("-t", "--maxTrim", type=int, default=100, \
                        help="Maxmum trim allowed (100)")
    parser.add_argument("-w", "--maxWiggle", type=int, default=400, \
                        help="Maxmum wiggle for gap spanning allowed (400)")
    parser.add_argument("-p", "--predictedGapSize", type=int, default=None)
    parser.add_argument("-n", "--nproc", type=int, default=1)
    parser.add_argument("-k", "--keepTemp", action="store_true",\
                        help="Keep temporary files")
    parser.add_argument("--tempDir", type=str, default=None,
                        help="Where to write temporary files (DIR)")
    parser.add_argument("--debug", action="store_true")
    
    args = parser.parse_args()

    if args.asmdir.endswith("/"):
        args.asmdir = args.asmdir[:-1]
    
    if args.tempDir is None:
        args.tempDir = args.asmdir
    
    setupLogging(args.debug)

    return args
Exemple #6
0
def parseArgs():
    parser = argparse.ArgumentParser(description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument("reads", metavar="reads", type=str, \
                        help="Input reads .fasta or .fastq")
    
    parser.add_argument("-t", "--target", type=str, \
                        help="Target sequence name")
    parser.add_argument("-T", "--Target", type=str, \
                        help="Fasta file containing target sequence")
    parser.add_argument("-s", "--super", dest="super", action="store_true",\
                        help="Treat each read as the target once")
    
    parser.add_argument("-m", "--maxtail", type = int, default=sys.maxint, \
                        help="Max number of bases allowed to be in tail (inf)")
    
    parser.add_argument("-n", "--nproc", dest="nproc", default=1, type=int,\
                        help="Number of processors to use with blasr (1)")
    
    parser.add_argument("-o", "--outname", dest="outname", default="polish.out", \
                        type=str, \
                        help="Base name for output files (polish.out)")
    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args()
    setupLogging(args.debug)
    
    #I don't think this is exhaustive
    if (args.target is not None and args.Target is not None) \
       or (args.super and (args.target is not None or args.Target is not None)):
        print "Error! only specify one of --super or --target or --Target"
        exit(1)
    
    return args
Exemple #7
0
    def __init__(self):
        """
        Given a protocol fn, load it up so we are ready to run.
        """
        self.parseArgs()
        setupLogging(self.options.debug)
        sys.stderr.write("""
Please Cite: English, Adam C., Stephen Richards, Yi Han, Min Wang,
             Vanesa Vee, Jiaxin Qu, Xiang Qin, et al. "Mind the
             Gap: Upgrading Genomes with Pacific Biosciences RS
             Long-Read Sequencing Technology." PLoS ONE 7, no. 11
             (November 21, 2012): e47768.
             doi:10.1371/journal.pone.0047768.\n\n""")
        self.parseProtocol()
Exemple #8
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py pie", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("input", metavar="[SAM,BAM,FASTA,FASTQ,FOFN]", type=str, \
                        help="Input reads to be mapped")
    parser.add_argument("reference", metavar="REFERENCE", type=str,\
                        help="Reference to map tails")

    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output Name (BAM.tails.[sam|bam])")
    parser.add_argument("-t", "--minTail", type=int, default=100,\
                        help="Minimum tail length to attempt remapping (%(default)s)")
    parser.add_argument("-n", "--nproc", type=int, default=1,\
                        help="Number of processors to use (%(default)s)")
    parser.add_argument("-p", "--params", type=str, default=BLASRPARAMS,\
                        help="Specify custom blasr params. use -p=\"string\"")
    parser.add_argument("--temp",
                        type=str,
                        default=tempfile.gettempdir(),
                        help="Where to save temporary files")

    parser.add_argument("--chunks", type=int, default=0, \
                        help=("Create N scripts containing commands to "
                              "each input of the fofn (%(default)s)"))
    parser.add_argument("--debug", action="store_true")

    args = parser.parse_args(argv)

    setupLogging(args.debug)
    checkBlasrParams(args.params)

    if args.output is None:
        ext = args.input[args.input.rindex('.'):]
        main = args.input[:args.input.rindex('.')]
        if ext in [".sam", ".bam"]:
            args.output = main + ".tails" + ext
        else:
            args.output = main + ".tails.sam"

    return args
Exemple #9
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py pie", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    
    parser.add_argument("input", metavar="[SAM,BAM,FASTA,FASTQ,FOFN]", type=str, \
                        help="Input reads to be mapped")
    parser.add_argument("reference", metavar="REFERENCE", type=str,\
                        help="Reference to map tails")
    
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output Name (BAM.tails.[sam|bam])")
    parser.add_argument("-t", "--minTail", type=int, default=100,\
                        help="Minimum tail length to attempt remapping (%(default)s)")
    parser.add_argument("-n", "--nproc", type=int, default=1,\
                        help="Number of processors to use (%(default)s)")
    parser.add_argument("-p", "--params", type=str, default=BLASRPARAMS,\
                        help="Specify custom blasr params. use -p=\"string\"")
    parser.add_argument("--temp", type=str, default=tempfile.gettempdir(),
                        help="Where to save temporary files")
    
    parser.add_argument("--chunks", type=int, default=0, \
                        help=("Create N scripts containing commands to "
                              "each input of the fofn (%(default)s)"))
    parser.add_argument("--debug", action="store_true")
    
    args = parser.parse_args(argv)
    
    setupLogging(args.debug)
    checkBlasrParams(args.params)
    
    if args.output is None:
        ext =  args.input[args.input.rindex('.'):]
        main = args.input[:args.input.rindex('.')]
        if ext in [".sam", ".bam"]:
            args.output = main + ".tails" + ext
        else:
            args.output = main + ".tails.sam"
    
    return args
Exemple #10
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py tails", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument("bam", metavar="BAM", type=str, \
                        help="BAM containing mapped reads")
    parser.add_argument("-B", "--buffer", type=int, default=1000, \
                        help=("Buffer around breaks reads must fall "
                              "within to become clustered (%(default)s)"))
    parser.add_argument("-b", "--minBreads", type=int, default=3,\
                        help="Minimum number of reads (%(default)s)")
    parser.add_argument("-z", "--minZMWs", type=int, default=3, \
                        help="Minimum number of unique ZMWs (%(default)s)")
    parser.add_argument("-q", "--minMapq", type=int, default=150, \
                        help="Minimum mapping quality of a read and it's tail to consider (%(default)s)")
    parser.add_argument("-f", "--fastq", action="store_true", \
                        help="Write fastq for each cluster into a .tgz archive (%(default)s)")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output file to write results (BAM.hon.tails)")
    # parser.add_argument("--noAdaptFilter", action="store_false", \
    #                     help="Keep reads that appear to have a missed adapter orientation")

    #parser.add_argument("-a", "--ambigous", action="store_true",
    #help="Report SVs with ambigous annotation e.g. INS* (False)")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("-v", "--verboseFile", action="store_true", \
                        help="Print each read inside of a cluster to <output>.verbose (%(default)s)")
    args = parser.parse_args(argv)
    global BUFFER
    BUFFER = args.buffer
    if args.output is None:
        args.output = args.bam[:-4] + ".hon.tails"
    if args.verboseFile:
        args.verboseFile = args.output + '.verbose'

    setupLogging(args.debug)
    return args
Exemple #11
0
def parseArgs(argv):
    parser = argparse.ArgumentParser(prog="Honey.py tails", description=USAGE, \
            formatter_class=argparse.RawDescriptionHelpFormatter)
    
    parser.add_argument("bam", metavar="BAM", type=str, \
                        help="BAM containing mapped reads")
    parser.add_argument("-B", "--buffer", type=int, default=1000, \
                        help=("Buffer around breaks reads must fall "
                              "within to become clustered (%(default)s)"))
    parser.add_argument("-b", "--minBreads", type=int, default=3,\
                        help="Minimum number of reads (%(default)s)")
    parser.add_argument("-z", "--minZMWs", type=int, default=3, \
                        help="Minimum number of unique ZMWs (%(default)s)")
    parser.add_argument("-q", "--minMapq", type=int, default=150, \
                        help="Minimum mapping quality of a read and it's tail to consider (%(default)s)")
    parser.add_argument("-f", "--fastq", action="store_true", \
                        help="Write fastq for each cluster into a .tgz archive (%(default)s)")
    parser.add_argument("-o", "--output", type=str, default=None, \
                        help="Output file to write results (BAM.hon.tails)")
    # parser.add_argument("--noAdaptFilter", action="store_false", \
    #                     help="Keep reads that appear to have a missed adapter orientation")

    #parser.add_argument("-a", "--ambigous", action="store_true",
                        #help="Report SVs with ambigous annotation e.g. INS* (False)")
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("-v", "--verboseFile", action="store_true", \
                        help="Print each read inside of a cluster to <output>.verbose (%(default)s)")
    args = parser.parse_args(argv)
    global BUFFER
    BUFFER = args.buffer
    if args.output is None:
        args.output = args.bam[:-4] + ".hon.tails"
    if args.verboseFile:
        args.verboseFile = args.output + '.verbose'
        
    setupLogging(args.debug)
    return args
Exemple #12
0
#!/usr/bin/python

import sys, logging, argparse
import pysam
from pbsuite.honey.HSpots import *
from pbsuite.utils.setupLogging import setupLogging
USAGE = "Recall spots in a hon.h5 file"

args = parseArgs(sys.argv[1:], established=True)

setupLogging(args.debug)

f = h5py.File(args.hon, 'a')
bam = pysam.Samfile(args.bam)
tsp = 0
makeKernals(args.binsize)
print "#CHROM\tOUTERSTART\tSTART\tINNERSTART\tINNEREND\tEND\tOUTEREND\tTYPE\tSIZE\tINFO"
for chrom in f.keys():
    logging.info("Calling %s" % (chrom))
    container = f[chrom]["data"]
    start = f[chrom].attrs["start"]
    spots = callHotSpots(container, start, args)
    logging.info("Filtering spots")
    fspot = 0
    for spot in spots:
        spot.chrom = chrom
        spot.offset(start)

        spot.estimateSize()
        #the sv spans too far
        if spot.size > args.spanMax:
Exemple #13
0
#!/usr/bin/env python

import sys, logging, argparse
import pysam
from pbsuite.honey.HSpots import *
from pbsuite.utils.setupLogging import setupLogging
USAGE = "Recall spots in a hon.h5 file"


args = parseArgs(sys.argv[1:], established=True)

setupLogging(args.debug)

f = h5py.File(args.hon,'a')
bam = pysam.Samfile(args.bam)
tsp = 0
makeKernals(args.binsize)
print "#CHROM\tOUTERSTART\tSTART\tINNERSTART\tINNEREND\tEND\tOUTEREND\tTYPE\tSIZE\tINFO"
for chrom in f.keys():
    logging.info("Calling %s" % (chrom))
    container = f[chrom]["data"]
    start = f[chrom].attrs["start"]
    spots = callHotSpots(container, start, args)
    logging.info("Filtering spots")
    fspot = 0
    for spot in spots:
        spot.chrom = chrom
        spot.offset(start)
                
        spot.estimateSize()
        #the sv spans too far