Esempio n. 1
0
def moveRoot(root, branch):
    """
    Removes the old root and places the new root at the mid point along the given branch
    """
    from sonLib import bioio
    if root.traversalID.mid == branch:
        return bioio.newickTreeParser(bioio.printBinaryTree(root, True))
    def fn2(tree, seq):
        if seq is not None:
            return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')'
        return bioio.printBinaryTree(tree, True)[:-1]
    def fn(tree, seq):
        if tree.traversalID.mid == branch:
            i = tree.distance
            tree.distance /= 2
            seq = '(' + bioio.printBinaryTree(tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');'
            tree.distance = i
            return seq
        if tree.internal:
            if branch < tree.traversalID.mid:
                seq = fn2(tree.right, seq)
                return fn(tree.left, seq)
            else:
                assert branch > tree.traversalID.mid
                seq = fn2(tree.left, seq)
                return fn(tree.right, seq)
        else:
            return bioio.printBinaryTree(tree, True)[:-1]
    s = fn(root, None)
    return bioio.newickTreeParser(s)
Esempio n. 2
0
 def testGetLongestPath(self):
     self.assertAlmostEqual(
         getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5):0.5)")),
         2.0)
     self.assertAlmostEqual(
         getLongestPath(
             newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10):0.5)")), 10.5)
     self.assertAlmostEqual(
         getLongestPath(
             newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10,e,f:20):0.5)")),
         20.5)
Esempio n. 3
0
def remodelTreeRemovingRoot(root, node):
    """
    Node is mid order number
    """
    from sonLib import bioio
    assert root.traversalID.mid != node
    hash = {}
    def fn(bT):
        if bT.traversalID.mid == node:
            assert bT.internal == False
            return [ bT ]
        elif bT.internal:
            i = fn(bT.left)
            if i is None:
                i = fn(bT.right)
            if i is not None:
                hash[i[-1]]= bT
                i.append(bT)
            return  i
        return None
    l = fn(root)
    def fn2(i, j):
        if i.left == j:
            return i.right
        assert i.right == j
        return i.left
    def fn3(bT):
        if hash[bT] == root:
            s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ')'
        else:
            s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ',' + fn3(hash[bT]) + ')'
        return s + ":" + str(bT.distance)
    s = fn3(l[0]) + ';'
    t = bioio.newickTreeParser(s)
    return t
Esempio n. 4
0
 def testNewickTreeParser(self):
     if self.testNo > 0:
         d = '((human,baboon),chimp);'
         e = newickTreeParser(d)
         f = printBinaryTree(e, False)
         print(d, f)
         assert d == f
Esempio n. 5
0
 def testNewickTreeParser_UnaryNodes(self):
     #tests with unary nodes
     for test in range(0, self.testNo):
         tree = getRandomTreeString()
         logger.debug("tree to try\t", tree)
         tree2 = newickTreeParser(tree, reportUnaryNodes=True)
         tree3 = printBinaryTree(tree2, True)
         logger.debug("tree found\t", tree3)
         assert tree == tree3
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage = usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    
    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA     = os.path.basename(d)
        nameA     = nameA.replace('[','')
        nameA     = nameA.replace(']','')
        cleanName = nameA.replace('\'','')
        
        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d,'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
 def run(self):
     logger.info('Extract object running, rootDir: %s' % (self.options.rootDir))
     nt = newickTreeParser(self.options.inputNewick, 0.0)
     nt.iD = os.path.basename(self.options.rootDir)
     self.addChildTarget(MergeTree(nt, self.nodeDict, self.nodeParentDict, 
                                   self.leafsDict, self.options))
     if not self.options.noBurninMerge:
         self.setFollowOnTarget(MergeTreeFollow(nt, self.nodeDict, self.nodeParentDict,
                                                self.leafsDict, self.options))
 def run(self):
     nt = newickTreeParser(self.options.inputNewick, 0.0)
     if nt.distance == 0:
         self.addChildTarget(Tree(lsc.tree2str(nt.left), self.options.parentDir,  
                                  'left', self.options))
         self.addChildTarget(Tree(lsc.tree2str(nt.right), self.options.parentDir, 
                                  'right', self.options))
     else:
         self.addChildTarget(Tree(lsc.tree2str(nt), self.options.parentDir, 
                                  'stem', self.options))
Esempio n. 9
0
    def testCalculateProbableRootOfGeneTree_Examples(self):
        #return
        treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
        speciesTree = newickTreeParser(treeString)
        binaryTree_depthFirstNumbers(speciesTree)

        geneString1 = ('((human,baboon),chimp);', '((human,chimp),baboon);')
        geneString2 = ('((human,chimp),baboon);', '((human,chimp),baboon);')
        geneString3 = (
            '((((human,chimp),baboon),((dog,cat),cow)),(mouse,rat));',
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));')
        geneString4 = (
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));',
            '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));')
        geneString5 = (
            '((((human,(chimp, chimp)),baboon),((dog,cat),cow)),(mouse,rat));',
            '((((human,(chimp,chimp)),baboon),(mouse,rat)),((dog,cat),cow));')
        #geneString3 = ('((human,(human, chimp)),baboon);', 1)
        #geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2)

        #geneString5 = ('(dog,cat);', 0)
        #geneString6 = ('((dog,cat), cow);', 0)
        #geneString7 = ('(cow,(dog,cat));', 0)
        #geneString8 = ('(cow,(cat,dog));', 0)

        #geneString9 = ('((cow,dog),(dog,cow));', 1)
        #geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2)
        #geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4)

        geneStrings = [
            geneString1, geneString2, geneString3, geneString4, geneString5
        ]
        #[ geneString3, geneString4, \
        #geneString5, geneString6, geneString7, geneString8,
        #geneString9, geneString10, geneString11 ]
        for geneString, rootedGeneString in geneStrings:
            geneTree = newickTreeParser(geneString)
            rootedGeneTree = newickTreeParser(rootedGeneString)
            binaryTree_depthFirstNumbers(geneTree)
            rootedGeneTree2, dupCount, lossCount = calculateProbableRootOfGeneTree(
                speciesTree, geneTree)
            print("rootedGeneTree", rootedGeneString, dupCount, lossCount,
                  printBinaryTree(rootedGeneTree2, False))
Esempio n. 10
0
    def testCalculateDupsAndLossesByReconcilingTrees_Examples(self):
        treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);'
        speciesTree = newickTreeParser(treeString)
        binaryTree_depthFirstNumbers(speciesTree)
        #s =  printBinaryTree(speciesTree, True)
        #speciesTree = newickTreeParser(s)
        #binaryTree_depthFirstNumbers(speciesTree)

        geneString1 = ('((human,baboon),chimp);', 1, 3)
        geneString2 = ('((human,chimp),baboon);', 0, 0)
        geneString3 = ('((human,(human, chimp)),baboon);', 1, 1)
        geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2, 3)

        geneString5 = ('(dog,cat);', 0, 0)
        geneString6 = ('((dog,cat), cow);', 0, 0)
        geneString7 = ('(cow,(dog,cat));', 0, 0)
        geneString8 = ('(cow,(cat,dog));', 0, 0)

        geneString9 = ('((cow,dog),(dog,cow));', 1, 2)
        geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2, 0)
        geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4, 0)

        geneStrings = [ geneString1, geneString2, geneString3, geneString4, \
                        geneString5, geneString6, geneString7, geneString8,
                        geneString9, geneString10, geneString11 ]
        print("")
        for geneString, dupCount, lossCount in geneStrings:
            geneTree = newickTreeParser(geneString)
            binaryTree_depthFirstNumbers(geneTree)
            print(printBinaryTree(geneTree, True),
                  printBinaryTree(speciesTree, True))
            dupCount2, lossCount2 = calculateDupsAndLossesByReconcilingTrees(
                speciesTree, geneTree, processID=lambda x: x)
            print(geneString, "dups", dupCount, dupCount2, "losses", lossCount,
                  lossCount2)
            assert dupCount == dupCount2
            assert lossCount == lossCount2
Esempio n. 11
0
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]')
    parser = OptionParser(usage=usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    nt = newickTreeParser(options.inputNewick, 0.0)
    if nt.iD is None:
        nt.iD = options.rootName

    leafs = {}
    extractLeafsAndIntBranches(nt, options, leafs)
    leafs[options.rootName] = True
    results = parseStats(options, leafs)
    standardizeResults(options, results)
    printStats(options, results)
    printScript(options, results)
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]')
    parser=OptionParser(usage = usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    nt = newickTreeParser(options.inputNewick, 0.0)
    if nt.iD is None:
        nt.iD = options.rootName
        
    leafs = {}
    extractLeafsAndIntBranches(nt, options, leafs)
    leafs[options.rootName] = True
    results = parseStats(options, leafs)
    standardizeResults(options, results)
    printStats(options, results)
    printScript(options, results)
 def run(self):
     logger.info('ExtractionManager object running, rootDir: %s' % (self.options.rootDir))
     nt = newickTreeParser(self.options.inputNewick, 0.0)
     nodesList = []
     leafsDict = {}
     lsc.extractLeafsFromNewick(nt, leafsDict)
     nt.iD = os.path.basename(self.options.rootDir)
     lsc.buildNodesListFromNewick(nt, nodesList, leafsDict)
     if (os.path.exists(os.path.join(self.options.rootDir, 'aln.rev')) and not
         os.path.exists(os.path.join(self.options.rootDir, 'burnin.tmp.maf'))):
         self.addChildTarget(Extract(self.options.rootDir, 'burnin', False, self.options))
     for n in nodesList:
         # parent nodes
         for c in n.children:
             # the child alignment is named for the parent node
             self.addChildTarget(Extract(os.path.join(self.options.simDir, c), n.name,
                                         c in leafsDict, self.options))
     self.setFollowOnTarget(MergeManager(nodesList, leafsDict, self.options))
Esempio n. 14
0
def checkOptions(options, parser):
    if options.inputNewick is None:
        parser.error('Specify --inputNewick.')
    nt = newickTreeParser(options.inputNewick, 0.0)
    if options.rootName is None and nt.iD is None:
        parser.error('Specify --rootName')
    elif options.rootName is None and nt.iD is not None:
        options.rootName = nt.iD

    # check newickTree for reserved words
    if newickContainsReservedWord(nt, options):
        parser.error('Newick tree contains reserved word: %s. '
                     'Maybe try --rootName=NAME to resolve or rename nodes in the newick.\n' % 
                     newickContainsReservedWord(nt, options))
    
    # Sim Tree Options
    if options.outDir is None:
        parser.error('specify --outDir.\n')
    if os.path.exists(options.outDir):
       parser.error('%s already exists! If your simulation crashed, '
                    'relaunch it with "jobTreeRun --jobTree %s/" \n' % 
                    (os.path.join(options.outDir), options.jobTree))
    options.outDir = os.path.abspath(options.outDir)
    if not os.path.exists(options.outDir):
        os.mkdir(options.outDir)
    # Sim Control options
    if options.rootInputDir is None:
        parser.error('Specify --rootDir.\n')
    if not os.path.isdir(options.rootInputDir):
        parser.error('--rootDir "%s" not a directory!\n' % options.rootInputDir)
    options.rootInputDir = os.path.abspath(options.rootInputDir)
    
    if options.paramsDir is None:
        parser.error('Specify --params.\n')
    if not os.path.isdir(options.paramsDir):
        parser.error('Params dir "%s" not a directory!\n' % options.paramsDir)
    options.paramsDir = os.path.abspath(options.paramsDir)
    if options.stepLength <= 0:
        parser.error('specify positive stepLength.\n')
    if options.seed != 'stochastic':
        options.seed = int(options.seed)
 def run(self):
     logger.info('TreeFollow object running, %s' % self.thisGrandParentDir)
     nt = newickTreeParser(self.thisNewickStr, 0.0)
     name = lsc.nameTree(nt)
     commonParentDir = os.path.abspath(os.path.join(self.options.simDir, name))
     if nt.distance == 0:
         if nt.internal:
             # branch point
             branches = { 'left' : lsc.tree2str(nt.left),
                          'right': lsc.tree2str(nt.right) }
             for b in branches:
                 if not lsc.nodeIsLeaf(branches[b]):
                     self.addChildTarget(Tree(branches[b], commonParentDir, b, self.options))
                     childDir = lsc.treeStr2Dir(lsc.takeNewickStep(branches[b], self.options)[0], 
                                                 self.options.simDir)
         else:
             # follow up to leaf cycles... Transalign and Stats only
             self.setFollowOnTarget(LeafCleanUp(commonParentDir, 
                                                self.thisGrandParentDir, self.options))
     else:
         # stem with distance
         self.addChildTarget(Tree(lsc.tree2str(nt), commonParentDir, 'stem', self.options))
         childDir = lsc.treeStr2Dir(lsc.takeNewickStep(lsc.tree2str(nt), self.options)[0], 
                                     self.options.simDir)
Esempio n. 16
0
 def testGetLongestPath(self):
     self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5):0.5)")), 2.0)
     self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10):0.5)")), 10.5)
     self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10,e,f:20):0.5)")), 20.5)
Esempio n. 17
0
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage=usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)

    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA = os.path.basename(d)
        nameA = nameA.replace('[', '')
        nameA = nameA.replace(']', '')
        cleanName = nameA.replace('\'', '')

        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d, 'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        lsc.runCommands(cmds,
                        os.curdir,
                        outPipes=outPipes,
                        inPipes=inPipes,
                        mode='s')