Esempio n. 1
0
    def run(self, fileStore):
        self.configNode = ET.parse(fileStore.readGlobalFile(self.project.getConfigID())).getroot()
        self.configWrapper = ConfigWrapper(self.configNode)
        self.configWrapper.substituteAllPredefinedConstantsWithLiterals()

        logger.info("Progressive Up: " + self.event)

        # open up the experiment
        # note that we copy the path into the options here
        experimentFile = fileStore.readGlobalFile(self.project.expIDMap[self.event])
        expXml = ET.parse(experimentFile).getroot()
        experiment = ExperimentWrapper(expXml)
        configPath = fileStore.readGlobalFile(experiment.getConfigID())
        configXml = ET.parse(configPath).getroot()

        seqIDMap = dict()
        tree = experiment.getTree()
        seqNames = []
        for node in tree.postOrderTraversal():
            name = tree.getName(node)
            if tree.isLeaf(node) or (name == experiment.getRootGenome() and experiment.isRootReconstructed() == False):
                seqIDMap[name] = self.project.outputSequenceIDMap[name]
                seqNames.append(name)
        logger.info("Sequences in progressive, %s: %s" % (self.event, seqNames))

        experimentFile = fileStore.getLocalTempFile()
        experiment.writeXML(experimentFile)
        self.options.experimentFileID = fileStore.writeGlobalFile(experimentFile)

        # take union of command line options and config options for hal and reference
        halNode = findRequiredNode(configXml, "hal")
        if self.options.buildHal == False:
            self.options.buildHal = getOptionalAttrib(halNode, "buildHal", bool, False)
        if self.options.buildFasta == False:
            self.options.buildFasta = getOptionalAttrib(halNode, "buildFasta", bool, False)

        # get parameters that cactus_workflow stuff wants
        configFile = fileStore.readGlobalFile(experiment.getConfigID())
        configNode = ET.parse(configFile).getroot()
        workFlowArgs = CactusWorkflowArguments(self.options, experimentFile=experimentFile, configNode=configNode, seqIDMap = seqIDMap)

        # copy over the options so we don't trail them around
        workFlowArgs.buildHal = self.options.buildHal
        workFlowArgs.buildFasta = self.options.buildFasta
        workFlowArgs.globalLeafEventSet = self.options.globalLeafEventSet
        if self.options.intermediateResultsUrl is not None:
            # Give the URL prefix a special name for this particular
            # subproblem (by suffixing it with the name of the
            # internal node in the guide tree)
            workFlowArgs.intermediateResultsUrl = self.options.intermediateResultsUrl + '-' + self.event

        # Use the trimming strategy to blast ingroups vs outgroups.
        finalExpWrapper = self.addChild(CactusTrimmingBlastPhase(cactusWorkflowArguments=workFlowArgs, phaseName="trimBlast")).rv()
        logger.info("Going to create alignments and define the cactus tree")

        return finalExpWrapper
Esempio n. 2
0
class TestCase(unittest.TestCase):
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.tree = NXNewick().parseString(
            '((((HUMAN:0.006969,CHIMP:0.009727)anc2:0.025291,BABOON:0.044568)anc1:0.11,(MOUSE:0.072818,RAT:0.081244):0.260342):0.02326,((DOG:0.07,CAT:0.07):0.087381,(PIG:0.06,COW:0.06):0.104728):0.04);'
        )
        self.xmlRoot = self.__makeXmlDummy()
        self.exp = ExperimentWrapper(self.xmlRoot)
        self.exp.setTree(self.tree)
        self.seqMap = {
            'HUMAN': 'human.txt',
            'CHIMP': 'chimp.txt',
            'BABOON': 'baboon.txt',
            'MOUSE': 'mouse.txt',
            'RAT': 'rat.txt',
            'DOG': 'dog.txt',
            'CAT': 'cat.txt',
            'PIG': 'pig.txt',
            'COW': 'cow.txt'
        }
        self.exp.setRootGenome('anc1')
        self.exp.setRootReconstructed(True)
        self.exp.setOutgroupGenomes(
            ['MOUSE', 'RAT', 'DOG', 'CAT', 'PIG', 'COW'])
        for genome, seq in self.seqMap.items():
            # These aren't real IDs, but should still work for our
            # purposes
            self.exp.setSequenceID(genome, seq)

    def testGetSequencePath(self):
        for genome, seq in self.seqMap.items():
            self.assertEqual(self.exp.getSequenceID(genome), seq)

        # Should not be any entries for genomes not in the tree
        self.assertEqual(self.exp.getSequenceID('DUCK'), None)

    def testChangingSequencePaths(self):
        """Tests that changing a sequence path persists correctly."""
        self.exp.setSequenceID('HUMAN', 'human2.txt')
        self.assertEqual(self.exp.getSequenceID('HUMAN'), 'human2.txt')
        # Reload the wrapper and try again
        self.exp = ExperimentWrapper(self.xmlRoot)
        self.assertEqual(self.exp.getSequenceID('HUMAN'), 'human2.txt')

    def testOutgroups(self):
        self.assertEqual(set(self.exp.getOutgroupGenomes()),
                         set(['MOUSE', 'RAT', 'DOG', 'CAT', 'PIG', 'COW']))
        self.exp.setOutgroupGenomes([])
        self.assertEqual(self.exp.getOutgroupGenomes(), [])

    def testRootGenome(self):
        self.assertEqual(self.exp.getRootGenome(), 'anc1')
        self.exp.setRootGenome('anc2')
        self.assertEqual(self.exp.getRootGenome(), 'anc2')

    def testSetTree(self):
        # A modfied version, with fewer genomes and a new one
        tree2 = NXNewick().parseString(
            '((HUMAN:0.006969,CHIMP:0.009727):0.025291,BABOON:0.044568,ARMADILLO:1.0);'
        )
        self.exp.setTree(tree2)
        self.assertEqual(set(self.exp.getGenomesWithSequence()),
                         set(['HUMAN', 'CHIMP', 'BABOON']))

    def __makeXmlDummy(self):

        rootElem = ET.Element("dummy")
        rootElem.append(self.__makeDiskElem())
        return rootElem

    def __makeDiskElem(self):
        diskElem = ET.Element("cactus_disk")
        confElem = ET.Element("st_kv_database_conf")
        confElem.attrib['type'] = 'kyoto_tycoon'
        diskElem.append(confElem)
        dbElem = ET.Element('kyoto_tycoon')
        confElem.append(dbElem)
        return diskElem