コード例 #1
0
ファイル: Stages.py プロジェクト: wangshun1121/PBSuite
def support(inputDir, gapTable, outputDir, extras):
    ret = []
    command = Template(
        "Support.py ${inputm4} ${gapTable} ${outFile} ${debug} ${extras}")
    mappingFiles = glob.glob(os.path.join(inputDir, "mapping/*.m4"))

    if len(mappingFiles) == 0:
        logging.warning("No mapping files found!")
        return ret

    for inputm4 in mappingFiles:
        baseName = inputm4[inputm4.rindex('/') + 1:inputm4.rindex(".m4")]
        outFile = os.path.join(outputDir, baseName + ".gml")
        if os.path.isfile(outFile):
            logging.warning("Overwriting %s" % outFile)
        myCommand = command.substitute( {"inputm4": inputm4,\
                         "gapTable": gapTable,\
                         "outFile": outFile,\
                         "debug": DEBUG,\
                         "extras":extras} )

        ret.append( Command(myCommand,\
                     baseName+".support",\
                     os.path.join(outputDir,baseName+".out"),\
                                 os.path.join(outputDir,baseName+".err")) )

    return ret
コード例 #2
0
ファイル: Stages.py プロジェクト: wangshun1121/PBSuite
def extraction(outputDir, protocol, extras):
    command = Template("Extraction.py ${protocol} ${debug} ${extras}")
    myCommand = command.substitute({"protocol": protocol, \
                    "debug":DEBUG, \
                    "extras":extras})

    return Command(myCommand, "extraction", \
                os.path.join(outputDir,"extraction.out"), \
                os.path.join(outputDir,"extraction.err"))
コード例 #3
0
def assembly(inputDir, gapInfoFn, extras):

    gapInfo = GapInfoFile(gapInfoFn)
    command = Template("Assembly.py ${inputDir} ${size} ${debug} ${extras}")
    ret = []
    allInputs = glob.glob(os.path.join(inputDir,"ref*"))
    if len(allInputs) == 0:
        logging.warning("No gaps to be assembled were found in %s! Have you run 'extraction' yet?" % inputDir)
        sys.exit(1)

    for inputDir in allInputs:
        #get The predicted size if exists
        mySize = ""
        gapName = inputDir.split('/')[-1]
        if gapName.count('.') > 0:
            g = gapName.split('_')
            if len(g) == 1:
                ref,cnam = g[0].split('.')
                cn = int(cnam[:-2])
                if cnam.endswith('e5'):
                    ca = cn-1
                    cb = cn
                elif cnam.endswith('e3'):
                    ca = cn
                    cb = cn+1
                else:
                    logging.error("Single End Extension is misFormatted for %s" % inputDir)
                    exit(1)
                size = gapInfo["%s_%d_%d" % (ref, ca, cb)].length
                mySize = "-p %d" % (size)
            elif len(g) == 2:
                ref,ca = g[0].split('.')
                ref,cb = g[1].split('.')
                #Hackey Shack. - effect of sorting node names
                # to prevent redundancies during graph building
                ca = int(ca[:-2])
                cb = int(cb[:-2])
                j = [ca,cb]
                j.sort()
                ca, cb = j
                size = gapInfo["%s_%d_%d" % (ref, ca, cb)].length
                mySize = "-p %d" % (size)
            else:
                logging.error("Couldn't recreate gapName from refDir for %s" % inputDir)
                exit(1)
        myCommand = command.substitute({"inputDir":inputDir,\
                                        "size":mySize,\
                                        "debug":DEBUG,\
                                        "extras":extras})

        ret.append(Command(myCommand,
               os.path.join(inputDir.split('/')[-1],"assembly"), \
               os.path.join(inputDir,"assembly.out"), \
               os.path.join(inputDir,"assembly.err")) )

    return ret
コード例 #4
0
def collection(inputDir, protocol, extras):
    command = Template("Collection.py ${protocol} ${extras}")

    myCommand = command.substitute({"protocol": protocol.protocolName,
                                    "extras": extras})

    return Command(myCommand, \
            os.path.join(inputDir,"collectingOutput"), \
            os.path.join(inputDir,"output.out"), \
            os.path.join(inputDir,"output.err"))
コード例 #5
0
ファイル: Stages.py プロジェクト: wangshun1121/PBSuite
def setup(scaffoldName, scaffoldQualName, gapInfoName, extras):
    """
    Generate all the information we need from the input scaffolding
    """
    #Error
    if scaffoldQualName is None:
        scaffoldQualName = ""

    command = Template("Setup.py ${scaf} -g ${gap} -i ${debug} ${extras}")\
                    .substitute( \
                    {"scaf":scaffoldName, \
                    "scafQual":scaffoldQualName, \
                    "gap":gapInfoName, \
                    "debug":DEBUG, \
                    "extras":extras})
    baseName = os.path.dirname(scaffoldName)
    ret = Command(command, "setup", os.path.join(baseName,"setup.out"), \
                        os.path.join(baseName,"setup.err"))
    return ret
コード例 #6
0
ファイル: Stages.py プロジェクト: wangshun1121/PBSuite
def mapping(jobDirs, outDir, reference, referenceSa, parameters, extras):
    """
    Input:
        - a list of fasta inputs
        - an output directory
        - a reference - (should be contigs.. see scaffoldIntakeSetup
        - a pacbio indexed reference
    Task:
        - map each fasta to reference
    Output:
        - m4 alignments of the input sequence
    """
    logFormat = "%(asctime)s [%(levelname)s] %(message)s"
    level = "DEBUG" if DEBUG != "" else "INFO"
    logging.basicConfig(stream=sys.stderr, level=level, format=logFormat)
    logging.info("Running blasr")

    mappingTemplate = Template(
        "blasr ${fasta} ${ref} ${sa} -m 4 --out ${outFile} ${parameters} ")
    tailTemplate = Template(
        "m4pie.py ${outFile} ${fasta} ${ref} --nproc ${nproc} -i ${extras}")

    ret = []
    #sa safety
    if os.path.exists(referenceSa):
        referenceSa = "--sa " + referenceSa
    else:
        logging.critical(
            "Specified reference.sa %s does not exists. Mapping will be slower"
            % (referenceSa))
        referenceSa = ""

    for fasta in jobDirs:
        name = fasta[fasta.rindex('/') + 1:]

        if not os.path.exists(fasta):
            logging.error("%s doesn't exist." % fasta)
            exit(1)

        outFile = os.path.join(outDir, name + ".m4")
        if os.path.isfile(outFile):
            logging.warning(
                "Output File %s already exists and will be overwritten." %
                (outFile))

        #Build Blasr Command
        nprocRe = re.compile("-nproc (\d+)")
        np = nprocRe.findall(parameters + extras)
        if len(np) == 0:
            np = '1'
        else:
            np = np[-1]

        cmd = mappingTemplate.substitute({
            "fasta": fasta,
            "ref": reference,
            "sa": referenceSa,
            "outFile": outFile,
            "parameters": parameters,
            "extras": extras
        })
        cmd2 = tailTemplate.substitute({
            "fasta": fasta,
            "ref": reference,
            "outFile": outFile,
            "nproc": np,
            "extras": extras
        })
        fullCmd = cmd + "\n" + cmd2
        #Build Command to send to CommandRunner
        jobname = name + ".mapping"
        stdout = os.path.join(outDir, name + ".out")
        stderr = os.path.join(outDir, name + ".err")
        ret.append(Command(fullCmd, jobname, stdout, stderr))

    return ret
コード例 #7
0
ファイル: Stages.py プロジェクト: esrice/PBJelly
def assembly(inputDir, gapInfoFn, extras):

    gapInfo = GapInfoFile(gapInfoFn)
    command = Template("Assembly.py ${inputDir} ${size} ${debug} ${extras}")
    ret = []
    allInputs = glob.glob(os.path.join(inputDir, "ref*"))
    if len(allInputs) == 0:
        logging.warning(
            "No gaps to be assembled were found in %s! Have you run 'extraction' yet?"
            % inputDir)
        sys.exit(1)

    for inputDir in allInputs:
        # subdirectories of the assembly/ directory each correspond to a single
        # gap, and are named based on the sequences surrounding the gap, in the
        # following format:
        # [contig1].[ca]e[3|5]_[contig2].[cb]e[3|5]
        # where contig1/2 are ref names (e.g., "ref12345")
        # I have no idea what ca/cb are. it would be pretty cool if that was
        # commented, or at least named more than a two-letter variable.
        # e3/5 is the end of the contig that is adjacent to the gap
        #get The predicted size if exists
        mySize = ""
        #        gapName = inputDir.split('/')[-1]
        #        print >> sys.stderr, gapName
        #        if gapName.count('.') > 0:
        #            g = gapName.split('_')
        #            if len(g) == 1:
        #                ref,cnam = g[0].split('.')
        #                cn = int(cnam[:-2])
        #                if cnam.endswith('e5'):
        #                    ca = cn-1
        #                    cb = cn
        #                elif cnam.endswith('e3'):
        #                    ca = cn
        #                    cb = cn+1
        #                else:
        #                    logging.error("Single End Extension is misFormatted for %s" % inputDir)
        #                    exit(1)
        #                size = gapInfo["%s_%d_%d" % (ref, ca, cb)].length
        #                mySize = "-p %d" % (size)
        #            elif len(g) == 2:
        #                splits_a, splits_b = g[0].split('.'), g[1].split('.')
        #                if len(splits_a) == 2 and len(splits_b) == 2:
        #                    ref,ca = splits_a
        #                    ref,cb = splits_b
        #                    #Hackey Shack. - effect of sorting node names
        #                    # to prevent redundancies during graph building
        #                    ca = int(ca[:-2])
        #                    cb = int(cb[:-2])
        #                    j = [ca,cb]
        #                    j.sort()
        #                    ca, cb = j
        #                    size = gapInfo["%s_%d_%d" % (ref, ca, cb)].length
        #                    mySize = "-p %d" % (size)
        #            else:
        #                logging.error("Couldn't recreate gapName from refDir for %s" % inputDir)
        #                exit(1)
        myCommand = command.substitute({"inputDir":inputDir,\
                                        "size":mySize,\
                                        "debug":DEBUG,\
                                        "extras":extras})

        ret.append(Command(myCommand,
               os.path.join(inputDir.split('/')[-1],"assembly"), \
               os.path.join(inputDir,"assembly.out"), \
               os.path.join(inputDir,"assembly.err")) )

    return ret