Example #1
0
class MultiCactusProject:
    def __init__(self):
        self.mcTree = None
        self.expMap = dict()
        self.expIDMap = None
        self.inputSequences = []
        self.inputSequenceIDs = None
        self.outputSequenceIDMap = None
        self.configID = None

    def readXML(self, path):
        xmlRoot = ET.parse(path).getroot()
        treeElem = xmlRoot.find("tree")
        self.mcTree = MultiCactusTree(NXNewick().parseString(
            treeElem.text, addImpliedRoots=False))
        self.expMap = dict()
        self.expIDMap = dict()
        cactusPathElemList = xmlRoot.findall("cactus")
        for cactusPathElem in cactusPathElemList:
            nameElem = cactusPathElem.attrib["name"]
            pathElem = cactusPathElem.attrib["experiment_path"]
            self.expMap[nameElem] = pathElem
            if "experiment_id" in cactusPathElem.attrib:
                self.expIDMap[nameElem] = cactusPathElem.attrib[
                    "experiment_id"]
        self.inputSequences = xmlRoot.attrib["inputSequences"].split()
        if "inputSequenceIDs" in xmlRoot.attrib:
            self.inputSequenceIDs = xmlRoot.attrib["inputSequenceIDs"].split()
        if "outputSequenceIDs" in xmlRoot.attrib:
            self.outputSequenceIDMap = dict(
                zip(xmlRoot.attrib["outputSequenceIDs"].split(),
                    xmlRoot.attrib["outputSequenceNames"].split()))

        logger.info("xmlRoot = %s" % ET.tostring(xmlRoot))
        if "configID" in xmlRoot.attrib:
            self.configID = xmlRoot.attrib["configID"]

        self.mcTree.assignSubtreeRootNames(self.expMap)

    def writeXML(self, path):
        xmlRoot = ET.Element("multi_cactus")
        treeElem = ET.Element("tree")
        treeElem.text = NXNewick().writeString(self.mcTree)
        xmlRoot.append(treeElem)
        for name, expPath in self.expMap.items():
            cactusPathElem = ET.Element("cactus")
            cactusPathElem.attrib["name"] = name
            cactusPathElem.attrib["experiment_path"] = expPath
            if self.expIDMap:
                cactusPathElem.attrib["experiment_id"] = self.expIDMap[name]
            xmlRoot.append(cactusPathElem)
        #We keep track of all the input sequences at the top level
        xmlRoot.attrib["inputSequences"] = " ".join(self.inputSequences)
        if self.inputSequenceIDs:
            xmlRoot.attrib["inputSequenceIDs"] = " ".join(
                self.inputSequenceIDs)
        if self.outputSequenceIDMap:
            xmlRoot.attrib["outputSequenceIDs"] = " ".join(
                self.outputSequenceIDMap.values())
            xmlRoot.attrib["outputSequenceNames"] = " ".join(
                self.outputSequenceIDMap.keys())
        if self.configID:
            xmlRoot.attrib["configID"] = self.configID

        xmlFile = open(path, "w")
        xmlString = ET.tostring(xmlRoot)
        xmlString = minidom.parseString(xmlString).toprettyxml()
        xmlFile.write(xmlString)
        xmlFile.close()

    def syncToFileStore(self, toil):
        self.expIDMap = dict()
        for name, expPath in self.expMap.items():
            expWrapper = ExperimentWrapper(ET.parse(expPath).getroot())
            expWrapper.setConfigID(
                toil.importFile("file://" + expWrapper.getConfig()))
            if expWrapper.getConstraintsFilePath():
                expWrapper.setConstraintsID(
                    toil.importFile("file://" +
                                    expWrapper.getConstraintsFilePath()))
            expWrapper.writeXML(expPath)
            self.expIDMap[name] = toil.importFile("file://" + expPath)

    def getInputSequenceIDMap(self):
        """Return a map between event names and sequence IDs.
        """
        inputSequenceMap = dict()
        i = 0
        for node in self.mcTree.postOrderTraversal():
            if self.mcTree.isLeaf(node) is True:
                inputSequenceMap[self.mcTree.getName(node)] = \
                  self.inputSequenceIDs[i]
                i += 1
        assert i == len(self.inputSequenceIDs)
        return inputSequenceMap

    def getInputSequenceIDs(self):
        """Get the set of input sequences for the multicactus tree
        """
        return self.inputSequenceIDs

    def getInputSequencePaths(self):
        return self.inputSequences

    def setOutputSequenceIDs(self, outputSequenceIDs):
        self.outputSequenceIDMap = dict()
        i = 0
        for node in self.mcTree.postOrderTraversal():
            if self.mcTree.isLeaf(node) is True:
                self.outputSequenceIDMap[self.mcTree.getName(node)] = \
                  outputSequenceIDs[i]
                i += 1
        assert i == len(outputSequenceIDs)

    def getOutputSequenceIDMap(self):
        return self.outputSequenceIDMap

    def getConfigPath(self):
        return ExperimentWrapper(ET.parse(
            self.expMap.values()[0]).getroot()).getConfigPath()

    def setConfigID(self, configID):
        self.configID = configID

    def getConfigID(self):
        return self.configID

    def setInputSequenceIDs(self, inputSequenceIDs):
        self.inputSequenceIDs = inputSequenceIDs
Example #2
0
class MultiCactusProject:
    def __init__(self):
        self.mcTree = None
        self.expMap = dict()
        self.expIDMap = None
        self.inputSequenceMap = {}
        self.inputSequenceIDMap = {}
        self.outputSequenceIDMap = {}
        self.configID = None

    def readXML(self, path):
        xmlRoot = ET.parse(path).getroot()
        treeElem = xmlRoot.find("tree")
        self.mcTree = MultiCactusTree(NXNewick().parseString(
            treeElem.text, addImpliedRoots=False))
        self.expMap = dict()
        self.expIDMap = dict()
        cactusPathElemList = xmlRoot.findall("cactus")
        for cactusPathElem in cactusPathElemList:
            nameElem = cactusPathElem.attrib["name"]
            pathElem = cactusPathElem.attrib["experiment_path"]
            self.expMap[nameElem] = pathElem
            if "experiment_id" in cactusPathElem.attrib:
                self.expIDMap[nameElem] = cactusPathElem.attrib[
                    "experiment_id"]
        self.inputSequenceMap = dict(
            zip(xmlRoot.attrib["inputSequenceNames"].split(),
                xmlRoot.attrib["inputSequences"].split()))
        if "inputSequenceIDs" in xmlRoot.attrib:
            self.inputSequenceIDMap = dict(
                zip(xmlRoot.attrib["inputSequenceIDNames"].split(),
                    xmlRoot.attrib["inputSequenceIDs"].split()))
        if "outputSequenceIDs" in xmlRoot.attrib:
            self.outputSequenceIDMap = dict(
                zip(xmlRoot.attrib["outputSequenceNames"].split(),
                    xmlRoot.attrib["outputSequenceIDs"].split()))

        logger.info("xmlRoot = %s" % ET.tostring(xmlRoot))
        if "configID" in xmlRoot.attrib:
            self.configID = xmlRoot.attrib["configID"]

        self.mcTree.assignSubtreeRootNames(self.expMap)

    def writeXML(self, path):
        xmlRoot = ET.Element("multi_cactus")
        treeElem = ET.Element("tree")
        treeElem.text = NXNewick().writeString(self.mcTree)
        xmlRoot.append(treeElem)
        for name, expPath in self.expMap.items():
            cactusPathElem = ET.Element("cactus")
            cactusPathElem.attrib["name"] = name
            cactusPathElem.attrib["experiment_path"] = expPath
            if self.expIDMap:
                cactusPathElem.attrib["experiment_id"] = self.expIDMap[name]
            xmlRoot.append(cactusPathElem)
        #We keep track of all the input sequences at the top level
        xmlRoot.attrib["inputSequences"] = " ".join(
            self.inputSequenceMap.values())
        xmlRoot.attrib["inputSequenceNames"] = " ".join(
            self.inputSequenceMap.keys())
        if self.inputSequenceIDMap:
            xmlRoot.attrib["inputSequenceIDs"] = " ".join(
                self.inputSequenceIDMap.values())
            xmlRoot.attrib["inputSequenceIDNames"] = " ".join(
                self.inputSequenceIDMap.keys())
        if self.outputSequenceIDMap:
            xmlRoot.attrib["outputSequenceIDs"] = " ".join(
                self.outputSequenceIDMap.values())
            xmlRoot.attrib["outputSequenceNames"] = " ".join(
                self.outputSequenceIDMap.keys())
        if self.configID:
            xmlRoot.attrib["configID"] = self.configID

        xmlFile = open(path, "w")
        xmlString = ET.tostring(xmlRoot)
        xmlString = minidom.parseString(xmlString).toprettyxml()
        xmlFile.write(xmlString)
        xmlFile.close()

    def syncToFileStore(self, toil):
        self.expIDMap = dict()
        for name, expPath in self.expMap.items():
            expWrapper = ExperimentWrapper(ET.parse(expPath).getroot())
            expWrapper.setConfigID(
                toil.importFile("file://" + expWrapper.getConfigPath()))
            expWrapper.writeXML(expPath)
            self.expIDMap[name] = toil.importFile("file://" + expPath)

    def getConfigPath(self):
        return ExperimentWrapper(ET.parse(
            self.expMap.values()[0]).getroot()).getConfigPath()

    def setConfigID(self, configID):
        self.configID = configID

    def getConfigID(self):
        return self.configID
Example #3
0
class MultiCactusProject:
    def __init__(self):
        self.mcTree = None
        self.expMap = dict()
        self.inputSequences = []
        self.outputSequenceDir = None

    def readXML(self, path):
        xmlRoot = ET.parse(path).getroot()
        treeElem = xmlRoot.find("tree")
        self.mcTree = MultiCactusTree(NXNewick().parseString(treeElem.text, addImpliedRoots=False))
        self.expMap = dict()
        cactusPathElemList = xmlRoot.findall("cactus")
        for cactusPathElem in cactusPathElemList:
            nameElem = cactusPathElem.attrib["name"]
            pathElem = cactusPathElem.attrib["experiment_path"]
            self.expMap[nameElem] = pathElem
        self.inputSequences = xmlRoot.attrib["inputSequences"].split()
        self.outputSequenceDir = xmlRoot.attrib["outputSequenceDir"]
        self.mcTree.assignSubtreeRootNames(self.expMap)

    def writeXML(self, path):
        xmlRoot = ET.Element("multi_cactus")
        treeElem = ET.Element("tree")
        treeElem.text = NXNewick().writeString(self.mcTree)
        xmlRoot.append(treeElem)
        for name, expPath in self.expMap.items():
            cactusPathElem = ET.Element("cactus")
            cactusPathElem.attrib["name"] = name
            cactusPathElem.attrib["experiment_path"] = expPath
            xmlRoot.append(cactusPathElem)
        # We keep track of all the input sequences at the top level
        xmlRoot.attrib["inputSequences"] = " ".join(self.inputSequences)
        xmlRoot.attrib["outputSequenceDir"] = self.outputSequenceDir
        xmlFile = open(path, "w")
        xmlString = ET.tostring(xmlRoot)
        xmlString = minidom.parseString(xmlString).toprettyxml()
        xmlFile.write(xmlString)
        xmlFile.close()

    # find the sequence associated with an event name
    # by digging out the appropriate experiment file
    # doesn't work for the root!!!!
    def sequencePath(self, eventName):
        parentEvent = self.mcTree.getSubtreeRoot(eventName)
        expPath = self.expMap[parentEvent]
        expElem = ET.parse(expPath).getroot()
        exp = ExperimentWrapper(expElem)
        seq = exp.getSequence(eventName)
        assert os.path.isfile(seq)
        return seq

    def getInputSequenceMap(self):
        """Return a map between event names and sequence paths.  Paths
        are different from above in that they are not taken from experiment
        xmls, but rather from directly from the project xml.
        """
        inputSequenceMap = dict()
        i = 0
        for node in self.mcTree.postOrderTraversal():
            if self.mcTree.isLeaf(node) is True:
                inputSequenceMap[self.mcTree.getName(node)] = self.inputSequences[i]
                i += 1
        assert i == len(self.inputSequences)
        return inputSequenceMap

    def getInputSequencePaths(self):
        """Get the set of input sequences for the multicactus tree
        """
        return self.inputSequences

    def getOutputSequenceDir(self):
        """The directory where the output sequences go
        """
        return self.outputSequenceDir

    def getConfigPath(self):
        return ExperimentWrapper(ET.parse(self.expMap.values()[0]).getroot()).getConfigPath()