def makeMaf(inHalPath, outDir, step, overwrite, doMaf): srcHalPath = inHalPath if step > 0: srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal") outMafPath = makePath(inHalPath, outDir, step, "out", "maf") if doMaf and (overwrite or not os.path.isfile(outMafPath)): runShellCommand("hal2maf %s %s" % (srcHalPath, outMafPath))
def runSteps(inHalPath, outDir, maxBlock, scale, steps, overwrite, doMaf, keepSeq, trans, inMemory): table = defaultdict(list) makeMaf(inHalPath, outDir, 0, overwrite, doMaf) table[0] = [os.path.getsize(inHalPath) / 1024] table[0] += list(getHalTotalSegments(inHalPath)) table[0] += getPrecisionRecall(inHalPath, outDir, 0, False) table[0] += getScanTime(inHalPath, outDir, 0) if steps is None: steps = getSteps(inHalPath, maxBlock, scale) for stepIdx in xrange(1,len(steps)): step = steps[stepIdx] outPath = makePath(inHalPath, outDir, step, "lod", "hal") srcPath = inHalPath if trans is True and stepIdx > 1: srcPath = makePath(inHalPath, outDir, steps[stepIdx-1], "lod", "hal") if overwrite is True or not os.path.isfile(outPath): stepScale = (scale ** stepIdx) runHalLodExtract(srcPath, outPath, stepScale, keepSeq, inMemory) makeMaf(inHalPath, outDir, step, overwrite, doMaf) compMaf(inHalPath, outDir, step, overwrite, doMaf) table[step] = [os.path.getsize(outPath) / 1024] table[step] += list(getHalTotalSegments(outPath)) table[step] += getPrecisionRecall(inHalPath, outDir, step, doMaf) table[step] += getScanTime(inHalPath, outDir, step) return table
def getPrecisionRecall(inHalPath, outDir, step, doMaf): if doMaf: sumPath = makePath(inHalPath, outDir, step, "comp", "txt") sumFile = open(sumPath, "r") line = sumFile.next() line = sumFile.next() line = sumFile.next() tokens = line.split() assert tokens[2] == "self)" sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt") sumNearFile = open(sumNearPath, "r") line = sumNearFile.next() line = sumNearFile.next() line = sumNearFile.next() tokensNear = line.split() assert tokensNear[2] == "self)" return [ float(tokens[3]), float(tokens[4]), float(tokensNear[3]), float(tokensNear[4]) ] elif step == 0: return [1., 1., 1., 1.] else: return [0., 0., 0., 0.]
def getScanTime(inHalPath, outDir, step): srcHalPath = inHalPath if step > 0: srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal") genomes = getHalGenomes(inHalPath) assert len(genomes) > 1 genName = genomes[1] bedPath = makePath(inHalPath, outDir, step, genName, "bed") t1 = time.time() runShellCommand("halBranchMutations %s %s --refFile %s" % (srcHalPath, genName, bedPath)) elapsedTime = time.time() - t1 return [elapsedTime]
def getScanTime(inHalPath, outDir, step): srcHalPath = inHalPath if step > 0: srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal") genomes = getHalGenomes(inHalPath) assert len(genomes) > 1 genName = genomes[1] bedPath = makePath(inHalPath, outDir, step, genName, "bed") t1 = time.time() runShellCommand("halBranchMutations %s %s --refFile %s" % ( srcHalPath, genName, bedPath)) elapsedTime = time.time() - t1 return [elapsedTime]
def getPrecisionRecall(inHalPath, outDir, step, doMaf): if doMaf: sumPath = makePath(inHalPath, outDir, step, "comp", "txt") sumFile = open(sumPath, "r") line = sumFile.next() line = sumFile.next() line = sumFile.next() tokens = line.split() assert tokens[2] == "self)" sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt") sumNearFile = open(sumNearPath, "r") line = sumNearFile.next() line = sumNearFile.next() line = sumNearFile.next() tokensNear = line.split() assert tokensNear[2] == "self)" return [float(tokens[3]), float(tokens[4]), float(tokensNear[3]), float(tokensNear[4])] elif step == 0: return [1.0, 1.0, 1.0, 1.0] else: return [0.0, 0.0, 0.0, 0.0]
def compMaf(inHalPath, outDir, step, overwrite, doMaf): srcMaf = makePath(inHalPath, outDir, 0, "out", "maf") tgtMaf = makePath(inHalPath, outDir, step, "out", "maf") xmlPath = makePath(inHalPath, outDir, step, "comp", "xml") sumPath = makePath(inHalPath, outDir, step, "comp", "txt") if doMaf and (overwrite or not os.path.isfile(xmlPath)): runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % (srcMaf, tgtMaf, xmlPath)) runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath, sumPath)) xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml") sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt") if doMaf and (overwrite or not os.path.isfile(xmlNearPath)): runShellCommand( "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000" % (srcMaf, tgtMaf, xmlNearPath, int(step)) ) runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlNearPath, sumNearPath))
def compMaf(inHalPath, outDir, step, overwrite, doMaf): srcMaf = makePath(inHalPath, outDir, 0, "out", "maf") tgtMaf = makePath(inHalPath, outDir, step, "out", "maf") xmlPath = makePath(inHalPath, outDir, step, "comp", "xml") sumPath = makePath(inHalPath, outDir, step, "comp", "txt") if doMaf and (overwrite or not os.path.isfile(xmlPath)): runShellCommand("mafComparator --maf1 %s --maf2 %s --out %s --samples 100000" % ( srcMaf, tgtMaf, xmlPath)) runShellCommand("comparatorSummarizer.py --xml %s > %s " % (xmlPath, sumPath)) xmlNearPath = makePath(inHalPath, outDir, step, "comp_near", "xml") sumNearPath = makePath(inHalPath, outDir, step, "comp_near", "txt") if doMaf and (overwrite or not os.path.isfile(xmlNearPath)): runShellCommand( "mafComparator --maf1 %s --maf2 %s --out %s --near %d --samples 100000" % ( srcMaf, tgtMaf, xmlNearPath, int(step))) runShellCommand("comparatorSummarizer.py --xml %s > %s " % ( xmlNearPath, sumNearPath))