Esempio n. 1
0
def makeMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    outMafPath = makePath(inHalPath, outDir, step, "out", "maf")
    if doMaf and (overwrite or not os.path.isfile(outMafPath)):
        runShellCommand("hal2maf %s %s" % (srcHalPath, outMafPath))
Esempio n. 2
0
def runSteps(inHalPath, outDir, maxBlock, scale, steps, overwrite, doMaf,
             keepSeq, trans, inMemory):
    table = defaultdict(list)
    makeMaf(inHalPath, outDir, 0, overwrite, doMaf)

    table[0] = [os.path.getsize(inHalPath) / 1024]
    table[0] += list(getHalTotalSegments(inHalPath))
    table[0] += getPrecisionRecall(inHalPath, outDir, 0, False)
    table[0] += getScanTime(inHalPath, outDir, 0)

    if steps is None:
        steps =  getSteps(inHalPath, maxBlock, scale)
    for stepIdx in xrange(1,len(steps)):
        step = steps[stepIdx]
        outPath = makePath(inHalPath, outDir, step, "lod", "hal")
        
        srcPath = inHalPath
        if trans is True and stepIdx > 1:
            srcPath = makePath(inHalPath, outDir,  steps[stepIdx-1],
                               "lod", "hal")        
        
        if overwrite is True or not os.path.isfile(outPath):
            stepScale = (scale ** stepIdx)
            runHalLodExtract(srcPath, outPath, stepScale, keepSeq, inMemory)

        makeMaf(inHalPath, outDir, step, overwrite, doMaf)
        compMaf(inHalPath, outDir, step, overwrite, doMaf)
        
        table[step] = [os.path.getsize(outPath) / 1024]
        table[step] += list(getHalTotalSegments(outPath))
        table[step] += getPrecisionRecall(inHalPath, outDir, step, doMaf)
        table[step] += getScanTime(inHalPath, outDir, step)

    return table
Esempio n. 3
0
def getPrecisionRecall(inHalPath, outDir, step, doMaf):
    if doMaf:
        sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
        sumFile = open(sumPath, "r")
        line = sumFile.next()
        line = sumFile.next()
        line = sumFile.next()
        tokens = line.split()
        assert tokens[2] == "self)"

        sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
        sumNearFile = open(sumNearPath, "r")
        line = sumNearFile.next()
        line = sumNearFile.next()
        line = sumNearFile.next()
        tokensNear = line.split()
        assert tokensNear[2] == "self)"

        return [
            float(tokens[3]),
            float(tokens[4]),
            float(tokensNear[3]),
            float(tokensNear[4])
        ]
    elif step == 0:
        return [1., 1., 1., 1.]
    else:
        return [0., 0., 0., 0.]
Esempio n. 4
0
def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]
Esempio n. 5
0
def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (
        srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]
Esempio n. 6
0
def getPrecisionRecall(inHalPath, outDir, step, doMaf):
    if doMaf:
        sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
        sumFile = open(sumPath, "r")
        line = sumFile.next()
        line = sumFile.next()
        line = sumFile.next()
        tokens = line.split()
        assert tokens[2] == "self)"

        sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
        sumNearFile = open(sumNearPath, "r")
        line = sumNearFile.next()
        line = sumNearFile.next()
        line = sumNearFile.next()
        tokensNear = line.split()
        assert tokensNear[2] == "self)"

        return [float(tokens[3]), float(tokens[4]), float(tokensNear[3]), float(tokensNear[4])]
    elif step == 0:
        return [1.0, 1.0, 1.0, 1.0]
    else:
        return [0.0, 0.0, 0.0, 0.0]
Esempio n. 7
0
def compMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcMaf = makePath(inHalPath, outDir, 0, "out", "maf")
    tgtMaf = makePath(inHalPath, outDir, step, "out", "maf")
    xmlPath = makePath(inHalPath, outDir, step, "comp", "xml")
    sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlPath)):
        runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % (srcMaf, tgtMaf, xmlPath))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath, sumPath))
    xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml")
    sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlNearPath)):
        runShellCommand(
            "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000"
            % (srcMaf, tgtMaf, xmlNearPath, int(step))
        )
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlNearPath, sumNearPath))
Esempio n. 8
0
def compMaf(inHalPath, outDir, step, overwrite, doMaf):
    srcMaf = makePath(inHalPath, outDir, 0, "out", "maf")
    tgtMaf = makePath(inHalPath, outDir, step, "out", "maf")
    xmlPath = makePath(inHalPath, outDir, step, "comp", "xml")
    sumPath = makePath(inHalPath, outDir, step, "comp", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlPath)):
        runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % (
            srcMaf, tgtMaf, xmlPath))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath,
                                                                    sumPath))
    xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml")
    sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt")
    if doMaf and (overwrite or not os.path.isfile(xmlNearPath)):
        runShellCommand(
            "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000" % (
                srcMaf, tgtMaf, xmlNearPath, int(step)))
        runShellCommand("comparatorSummarizer.py --xml %s > %s " % (
            xmlNearPath, sumNearPath))