Esempio n. 1
0
def makeMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    outMafPath = makePath(inHalPath, outDir, step, "out", "maf")
    if doMaf and (overwrite or not os.path.isfile(outMafPath)):
        runShellCommand("hal2maf %s %s" % (srcHalPath, outMafPath))
Esempio n. 2
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("hal", help="input hal")
    parser.add_argument("outDir", help="output dir")
    parser.add_argument("--bedName", type=str,
                        default="%%s.bed", help="Name function for output "
                        "bed files where sequence name is specifed as %%s")

    parser.add_argument("--root", default=None, type=str, help="root")
    parser.add_argument("--doSnps",action="store_true", default=False)
    parser.add_argument("--doParentDeletions",action="store_true",
                        default=False)
    parser.add_argument("--maxGap", default=10, type=int, help="gap threshold")
    parser.add_argument("--noSort", action="store_true", default=False)
    args = parser.parse_args()

    if not os.path.exists(args.outDir):
        os.makedirs(args.outDir)

    if not args.noSort:
        try:
            runShellCommand("echo \"x\t0\t1\" | sortBed 2> /dev/null")
        except Exception:
            print ("Warning: output BED files not sorted because sortBed" + 
               " (BedTools) not found")
            args.noSort = True
        
    getHalTreeMutations(args.hal, args, args.root)
Esempio n. 3
0
def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("hal", help="input hal")
    parser.add_argument("outDir", help="output dir")
    parser.add_argument("--bedName",
                        type=str,
                        default="%%s.bed",
                        help="Name function for output "
                        "bed files where sequence name is specifed as %%s")

    parser.add_argument("--root", default=None, type=str, help="root")
    parser.add_argument("--doSnps", action="store_true", default=False)
    parser.add_argument("--doParentDeletions",
                        action="store_true",
                        default=False)
    parser.add_argument("--maxGap", default=10, type=int, help="gap threshold")
    parser.add_argument("--noSort", action="store_true", default=False)
    args = parser.parse_args()

    if not os.path.exists(args.outDir):
        os.makedirs(args.outDir)

    if not args.noSort:
        try:
            runShellCommand("echo \"x\t0\t1\" | sortBed 2> /dev/null")
        except Exception:
            print("Warning: output BED files not sorted because sortBed" +
                  " (BedTools) not found")
            args.noSort = True

    getHalTreeMutations(args.hal, args, args.root)
Esempio n. 4
0
def computeModel(options):
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    extractGeneMAFs(options)
    computeAgMAFStats(options)
    computeFit(options)
    if not options.noModFreqs:
        modFreqs(options)
    runShellCommand("rm -f %s" % options.outMafAllPaths)
Esempio n. 5
0
def computeModel(options):
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    extractGeneMAFs(options)
    computeAgMAFStats(options)
    computeFit(options)
    if not options.noModFreqs:
        modFreqs(options)
    runShellCommand("rm -f %s" % options.outMafAllPaths)
Esempio n. 6
0
def timeCmd(cmd):
    tstart = datetime.now()
    runShellCommand(cmd)
    tend = datetime.now()
    tdelta = tend - tstart
    tsecs = tdelta.seconds
    tsecs += tdelta.microseconds / 1000000.0
    return tsecs
Esempio n. 7
0
def computeFit(options):
    cmd = "phyloFit --tree \"%s\" --subst-mod %s --sym-freqs %s --precision %s --out-root %s" % (
        options.tree, options.substMod, options.outMafSS, options.precision,
        os.path.splitext(options.outMod)[0])
    if options.error is not None:
        cmd += " --error %s " % options.error
    runShellCommand(cmd)
    runShellCommand("rm -f %s" % options.outMafSS)
Esempio n. 8
0
def timeCmd(cmd):
    tstart = datetime.now()
    runShellCommand(cmd)
    tend = datetime.now()
    tdelta = tend - tstart
    tsecs = tdelta.seconds
    tsecs += tdelta.microseconds / 1000000.0
    return tsecs
Esempio n. 9
0
def computeFit(options):
    cmd = "phyloFit --tree \"%s\" --subst-mod %s --sym-freqs %s --out-root %s" % (
        options.tree, options.substMod, options.outMafSS,
        os.path.splitext(options.outMod)[0])
    if options.error is not None:
        cmd += " --error %s " % options.error
    runShellCommand(cmd)
    runShellCommand("rm -f %s" % options.outMafSS)
Esempio n. 10
0
def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]
Esempio n. 11
0
def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (
        srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]
Esempio n. 12
0
def computeFit(options):
    if options.tree is not None:
        tree = options.tree
    else:
        tree = getHalTree(options.hal)

    cmd = "phyloFit --tree \"%s\" --subst-mod %s --sym-freqs %s --out-root %s" % (
        tree, options.substMod, options.outMafSS, 
        os.path.splitext(options.outMod)[0])
    if options.error is not None:
        cmd += " --error %s " % options.error
    runShellCommand(cmd)
    runShellCommand("rm -f %s" % options.outMafSS)
Esempio n. 13
0
def extractGeneMAFs(options):
    runShellCommand("rm -f %s" % options.outMafAllPaths)

    for bedFile in options.bedFiles:
        bedFile4d = (os.path.splitext(options.outMafPath)[0] + "_" +
                     os.path.splitext(os.path.basename(bedFile))[0] +
                     "4d.bed")
        if not options.no4d:
            runShellCommand("hal4dExtract %s %s %s %s" % (
                options.hal, options.refGenome, bedFile, bedFile4d))
        else:
            runShellCommand("cp %s %s" % (bedFile, bedFile4d))

        outMaf = (os.path.splitext(options.outMafPath)[0] + "_" +
                  os.path.splitext(os.path.basename(bedFile4d))[0] + ".maf")
        h2mFlags = "--noDupes"
        h2mFlags += " --targetGenomes %s" % options.halGenomes
        if options.noAncestors is True:
            h2mFlags += " --noAncestors"
        runShellCommand("hal2mafMP.py %s %s %s "
                        "--numProc %d --refTargets %s --refGenome %s "
                        % (options.hal, outMaf, h2mFlags,options.numProc,
                           bedFile4d, options.refGenome))
        if os.path.exists(bedFile4d):
            os.remove(bedFile4d)

    for mafFile in glob.glob(options.outMafAllPaths):
        if os.path.getsize(mafFile) < 5:
            os.remove(mafFile)
        else:
            remove2ndLine(mafFile)
            #runShellCommand("msa_view -o SS -z --in-format MAF %s > %s" % (
            #mafFile, mafFile.replace(".maf", ".SS")))
    if len(glob.glob(options.outMafAllPaths)) < 1:
        raise RuntimeError("Given BED files do not overlap alignment")
Esempio n. 14
0
def extractGeneMAFs(options):
    runShellCommand("rm -f %s" % options.outMafAllPaths)

    for bedFile in options.bedFiles:
        bedFile4d = (os.path.splitext(options.outMafPath)[0] + "_" +
                     os.path.splitext(os.path.basename(bedFile))[0] + "4d.bed")
        if not options.no4d:
            runShellCommand(
                "hal4dExtract %s %s %s %s" %
                (options.hal, options.refGenome, bedFile, bedFile4d))
        else:
            runShellCommand("cp %s %s" % (bedFile, bedFile4d))

        outMaf = (os.path.splitext(options.outMafPath)[0] + "_" +
                  os.path.splitext(os.path.basename(bedFile4d))[0] + ".maf")
        h2mFlags = "--noDupes"
        h2mFlags += " --targetGenomes %s" % options.halGenomes
        if options.noAncestors is True:
            h2mFlags += " --noAncestors"
        runShellCommand("hal2mafMP.py %s %s %s "
                        "--numProc %d --refTargets %s --refGenome %s " %
                        (options.hal, outMaf, h2mFlags, options.numProc,
                         bedFile4d, options.refGenome))
        if os.path.exists(bedFile4d):
            os.remove(bedFile4d)

    for mafFile in glob.glob(options.outMafAllPaths):
        if os.path.getsize(mafFile) < 5:
            os.remove(mafFile)
        else:
            remove2ndLine(mafFile)
            #runShellCommand("msa_view -o SS -z --in-format MAF %s > %s" % (
            #mafFile, mafFile.replace(".maf", ".SS")))
    if len(glob.glob(options.outMafAllPaths)) < 1:
        raise RuntimeError("Given BED files do not overlap alignment")
Esempio n. 15
0
def splitBed(path, options):
    numLines = int(runShellCommand("wc -l %s" % path).split()[0])
    outPaths = []
    outDir = os.path.dirname(options.outMafPath)
    if options.maxBedLines is not None and numLines > options.maxBedLines:
        inBed = open(path, "r")
        curLine = 0
        curBed = 0
        outPath = path.replace(".bed", "_%d.bed" % curLine)
        outPaths.append(outPath)
        outBed = open(outPath, "w")
        for inLine in inBed:
            if curLine > options.maxBedLines:
                curBed += 1
                outBed.close()
                outPath = path.replace(".bed", "_%d.bed" % curBed)
                outPath = os.path.join(outDir, os.path.basename(outPath))
                outPaths.append(outPath)
                outBed = open(outPath, "w")
                curLine = 0
            else:
                curLine += 1
            outBed.write(inLine)
        outBed.close()
    else:
        outPaths = [path]
    return outPaths
Esempio n. 16
0
def splitBed(path, options):
    numLines = int(runShellCommand("wc -l %s" % path).split()[0])
    outPaths = []
    outDir = os.path.dirname(options.outMafPath)
    if options.maxBedLines is not None and numLines > options.maxBedLines:
        inBed = open(path, "r")
        curLine = 0
        curBed = 0
        outPath = path.replace(".bed", "_%d.bed" % curLine)
        outPaths.append(outPath)
        outBed = open(outPath, "w")
        for inLine in inBed:
            if curLine > options.maxBedLines:
                curBed += 1
                outBed.close()
                outPath = path.replace(".bed", "_%d.bed" % curBed)
                outPath = os.path.join(outDir, os.path.basename(outPath))
                outPaths.append(outPath)
                outBed = open(outPath, "w")
                curLine = 0
            else:
                curLine += 1
            outBed.write(inLine)
        outBed.close()
    else:
        outPaths = [path]
    return outPaths
Esempio n. 17
0
def computeTreePhyloP(args):
    visitQueue = [args.root]
    bigwigCmds = []
    while len(visitQueue) > 0:
        genome = visitQueue.pop()
        bedFlags = ""
        # Generate a bed file of all regions of 
        # genome that dont align to parent
        bedInsertsFile = outFileName(args, genome, "bed", "inserts", True)
        if genome != args.root:
            runShellCommand(
            "halAlignedExtract %s %s --alignedFile %s --complement" % (
                args.hal, genome, bedInsertsFile))
            bedFlags = "--refBed %s" % bedInsertsFile

        # Run halPhyloP on the inserts
        wigFile = outFileName(args, genome, "wig", "phyloP", False)
        cmd = "halPhyloPMP.py %s %s %s %s --numProc %d %s" % (
            args.hal, genome, args.mod, bedFlags, args.numProc, wigFile)
        if args.subtree is not None:
            cmd += " --subtree %s" % args.subtree
        if args.prec is not None:
            cmd += " --prec %d" % args.prec

        runShellCommand(cmd)
    
        runShellCommand("rm -f %s" % bedInsertsFile)

        # Lift down from the parent, appending to the wig file computed above
        if genome != args.root:
            parent = getHalParentName(args.hal, genome)
            parentWig = outFileName(args, parent, "wig", "phyloP", False)
            if os.path.isfile(parentWig):
                runShellCommand("halWiggleLiftover %s %s %s %s %s --append" % (
                    args.hal, parent, parentWig, genome, wigFile))

        # Convert to bigwig if desired and delete wig file
        if args.bigWig is True and os.path.isfile(wigFile):
            sizesFile = outFileName(args, genome, "sizes", "chr", True)
            bwFile = outFileName(args, genome, "bw", "phyloP", False)
            bwCmd = "halStats %s --chromSizes %s > %s && " % (args.hal, genome,
                                                              sizesFile)
            bwCmd += "wigToBigWig %s %s %s && " % (wigFile, sizesFile, bwFile)
            bwCmd += "rm -f %s &&" % wigFile
            bwCmd += "rm -f %s" % sizesFile
            bigwigCmds.append(bwCmd)

        # Recurse on children.
        children = getHalChildrenNames(args.hal, genome)
        for child in children:
            visitQueue.append(child)

    #parallel bigwig conversion
    runParallelShellCommands(bigwigCmds, args.numProc)
Esempio n. 18
0
def simulateLoad(options):
    cmds = [getBlockVizCmd(options, tgtGenome) for tgtGenome in options.tgtGenomes]
    elapsedTime = 0.
    for cmd in cmds:
        lastExcep = None
        for trial in xrange(options.retry):
            if options.udc is not None and options.zapUdc is True:
                runShellCommand("rm -rf %s" % os.path.join(options.udc, "*"))
            t = -1
            try:
                t = timeCmd(cmd)
                lastExcep = None
                break
            except Exception as e:
                lastExcep = e
                time.sleep(2)
        if lastExcep is None:
            elapsedTime += t
        else:
            raise lastExcep
    return elapsedTime
Esempio n. 19
0
def getHalBranchMutations(halPath, genomeName, args):
    command = "halBranchMutations %s %s --maxGap %s" % (halPath, genomeName,
                                                        args.maxGap)
    
    refBedFile = os.path.join(args.outDir,  "%s.bed" % genomeName)
    dest = refBedFile
    if not args.noSort:
        dest = "stdout"
        
    command += " --refFile %s" % dest
    command += " --delBreakFile %s" % dest
    if args.doSnps:
        command += " --snpFile %s" % dest
    if args.doParentDeletions:
        command += " --parentFile %s" % os.path.join(args.outDir, 
                                                     "%s_pd.bed" % genomeName)

    if not args.noSort:
        command += " | sortBed > %s" % refBedFile
    print command
    runShellCommand(command)
Esempio n. 20
0
def getHalBranchMutations(halPath, genomeName, args):
    command = "halBranchMutations %s %s --maxGap %s" % (halPath, genomeName,
                                                        args.maxGap)

    refBedFile = os.path.join(args.outDir, "%s.bed" % genomeName)
    dest = refBedFile
    if not args.noSort:
        dest = "stdout"

    command += " --refFile %s" % dest
    command += " --delBreakFile %s" % dest
    if args.doSnps:
        command += " --snpFile %s" % dest
    if args.doParentDeletions:
        command += " --parentFile %s" % os.path.join(args.outDir,
                                                     "%s_pd.bed" % genomeName)

    if not args.noSort:
        command += " | sortBed > %s" % refBedFile
    print command
    runShellCommand(command)
Esempio n. 21
0
def computeAgMAFStats(options):
    runShellCommand("msa_view -o SS -z --in-format MAF --aggregate %s %s > %s" % (
        options.halGenomes, options.outMafAllPaths,
        options.outMafSS))
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    runShellCommand("rm -f %s" % options.outMafAllPaths.replace(".maf",
                                                                ".maf-e"))
Esempio n. 22
0
def modFreqs(options):
    baseComp = getHalBaseComposition(options.hal, options.refGenome, 1)
    runShellCommand("mv %s %s_temp" % (options.outMod, options.outMod))
    runShellCommand("modFreqs %s_temp %f %f %f %f > %s" %
                    (options.outMod, baseComp[0], baseComp[1], baseComp[2],
                     baseComp[3], options.outMod))
    runShellCommand("rm -f %s_temp" % options.outMod)
Esempio n. 23
0
def compMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcMaf = makePath(inHalPath, outDir, 0, "out", "maf")
    tgtMaf = makePath(inHalPath, outDir, step, "out", "maf")
    xmlPath = makePath(inHalPath, outDir, step, "comp", "xml")
    sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlPath)):
        runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % (srcMaf, tgtMaf, xmlPath))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath, sumPath))
    xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml")
    sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlNearPath)):
        runShellCommand(
            "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000"
            % (srcMaf, tgtMaf, xmlNearPath, int(step))
        )
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlNearPath, sumNearPath))
Esempio n. 24
0
def modFreqs(options):
    baseComp = getHalBaseComposition(options.hal, options.refGenome, 1)
    runShellCommand("mv %s %s_temp" % (options.outMod, options.outMod))
    runShellCommand("modFreqs %s_temp %f %f %f %f > %s" % (options.outMod,
                                                           baseComp[0],
                                                           baseComp[1],
                                                           baseComp[2],
                                                           baseComp[3],
                                                           options.outMod))
    runShellCommand("rm -f %s_temp" % options.outMod)
Esempio n. 25
0
def computeAgMAFStats(options):
    halSpecies = ",".join(options.halGenomes)
    
    runShellCommand("msa_view -o SS -z --in-format MAF --aggregate %s %s > %s" % (
        halSpecies, options.outMafAllPaths, 
        options.outMafSS))
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    #runShellCommand("rm -f %s" % options.outMafAllPaths.replace(".maf", ".SS"))
    runShellCommand("rm -f %s" % options.outMafAllPaths.replace(".maf", 
                                                                ".maf-e"))
Esempio n. 26
0
def compMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcMaf = makePath(inHalPath, outDir, 0, "out", "maf")
    tgtMaf = makePath(inHalPath, outDir, step, "out", "maf")
    xmlPath = makePath(inHalPath, outDir, step, "comp", "xml")
    sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlPath)):
        runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % (
            srcMaf, tgtMaf, xmlPath))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath,
                                                                    sumPath))
    xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml")
    sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlNearPath)):
        runShellCommand(
            "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000" % (
                srcMaf, tgtMaf, xmlNearPath, int(step)))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (
            xmlNearPath, sumNearPath))
Esempio n. 27
0
def computeAgMAFStats(options):
    if options.targetGenomes is not None:
        species = ",".join(options.targetGenomes)
    else:
        species = options.halGenomes
    runShellCommand("msa_view -o SS -z --in-format MAF --aggregate %s %s > %s" % (
        species, options.outMafAllPaths,
        options.outMafSS))
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    runShellCommand("rm -f %s" % options.outMafAllPaths.replace(".maf", 
                                                                ".maf-e"))
Esempio n. 28
0
def computeAgMAFStats(options):
    if options.targetGenomes is not None:
        species = ",".join(options.targetGenomes)
    else:
        species = options.halGenomes
    runShellCommand(
        "msa_view -o SS -z --in-format MAF --aggregate %s %s > %s" %
        (species, options.outMafAllPaths, options.outMafSS))
    runShellCommand("rm -f %s" % options.outMafAllPaths)
    runShellCommand("rm -f %s" %
                    options.outMafAllPaths.replace(".maf", ".maf-e"))
Esempio n. 29
0
def computeMAFStats(options):
    # make one big maf
    first = True
    for mafFile in glob.glob(options.outMafAllPaths):
        if first is True:
            first = False
            runShellCommand("mv %s %s" % (mafFile, options.outMafPath))
        else:
            with open(options.outMafPath, "a") as outMaf:
                with open(mafFile, "r") as inMaf:
                    for line in inMaf:
                        l = line.lstrip()
                        if len(l) > 0 and l[0] != "#":
                            outMaf.write(line + "\n")

    runShellCommand("msa_view -o SS --in-format MAF %s > %s" % (
        options.outMafPath, options.outMafSS))
    runShellCommand("rm -f %s %s" % (options.outMafAllPaths,
                                     options.outMafPath))
Esempio n. 30
0
def computeMAFStats(options):
    # make one big maf
    first = True
    for mafFile in glob.glob(options.outMafAllPaths):
        if first is True:
            first = False
            runShellCommand("mv %s %s" % (mafFile, options.outMafPath))
        else:
            with open(options.outMafPath, "a") as outMaf:
                with open(mafFile, "r") as inMaf:
                    for line in inMaf:
                        l = line.lstrip()
                        if len(l) > 0 and l[0] != "#":
                            outMaf.write(line + "\n")

    runShellCommand("msa_view -o SS --in-format MAF %s > %s" %
                    (options.outMafPath, options.outMafSS))
    runShellCommand("rm -f %s %s" %
                    (options.outMafAllPaths, options.outMafPath))
Esempio n. 31
0
def remove2ndLine(path):
    runShellCommand("sed -i -e 2d %s" % path)
Esempio n. 32
0
def remove2ndLine(path):
    runShellCommand("sed -i -e 2d %s" % path)