예제 #1
0
    def extractStep(self):
        listIds = {}
        smallMolID = "none"
        for item in self.inputListID.get():
            if hasattr(item, "_iteractsWithPDBId"):
                tokens = item._iteractsWithPDBId.get().split(";")
                for token in tokens:
                    pdbId = token.strip()
                    if not pdbId in listIds:
                        listIds[pdbId] = []
                    chemId = item.getDbId()
                    if hasattr(item, "_PDBChemId"):
                        chemId = item._PDBChemId.get()
                        smallMolID = "pdbchem"
                    listIds[pdbId].append(chemId)

        outputDatabaseID = SetOfDatabaseID().create(path=self._getPath(),
                                                    suffix='PDBs')
        for pdbId in listIds:
            pdb = DatabaseID()
            pdb.setDatabase("pdb")
            pdb.setDbId(pdbId)
            pdb._pdbId = pwobj.String(pdbId)
            pdb._PDBLink = pwobj.String("https://www.rcsb.org/structure/%s" %
                                        pdbId)
            aux = " ; ".join(listIds[pdbId])
            if smallMolID == "none":
                pdb._interactsWithChemId = pwobj.String(aux)
            elif smallMolID == "pdbchem":
                pdb._interactsWithPDBChemId = pwobj.String(aux)
            outputDatabaseID.append(pdb)
        self._defineOutputs(outputPDBs=outputDatabaseID)
        self._defineSourceRelation(self.inputListID, outputDatabaseID)
    def searchStep(self):
        outputDatabaseID = SetOfDatabaseID().create(path=self._getPath())
        for item in self.inputListID.get():
            newItem = DatabaseID()
            newItem.copy(item)
            newItem._uniprotId = pwobj.String("Not available")
            newItem._uniprotLink = pwobj.String("Not available")

            pdbId = item._pdbId.get()
            print("Processing %s" % pdbId)

            urlId = "https://www.rcsb.org/pdb/rest/das/pdb_uniprot_mapping/alignment?query=%s" % pdbId
            if hasattr(item, "_chain"):
                urlId += "." + item._chain.get().upper()

            fnXml = self._getExtraPath("%s.xml" % pdbId)
            if not os.path.exists(fnXml):
                print("Fetching uniprot: %s" % urlId)
                for i in range(3):
                    try:
                        urllib.request.urlretrieve(urlId, fnXml)
                        break
                    except:  # The library raises an exception when the web is not found
                        pass
            if os.path.exists(fnXml):
                try:
                    tree = ET.parse(fnXml)
                    # print(ET.tostring(tree, pretty_print=True))

                    uniprotId = None
                    for child in tree.getroot().iter():
                        if child.tag.endswith("alignObject"):
                            if child.attrib['dbSource'] == "UniProt":
                                uniprotId = child.attrib['dbAccessionId']
                                break
                    if uniprotId:
                        newItem._uniprotId = pwobj.String(uniprotId)
                        newItem._uniprotLink = pwobj.String(
                            "https://www.uniprot.org/uniprot/%s" % uniprotId)
                except:
                    print("    Cannot parse the Uniprot XML: %s" % fnXml)

                outputDatabaseID.append(newItem)

        self._defineOutputs(outputUniprot=outputDatabaseID)
        self._defineSourceRelation(self.inputListID, outputDatabaseID)
    def searchStep(self):
        outputDatabaseID = SetOfDatabaseID().create(path=self._getPath())
        fnList = []
        for item in self.inputListID.get():
            newItem = DatabaseID()
            newItem.copy(item)
            newItem._uniprotFile = pwobj.String("Not available")
            newItem._unitprotSeqLength = pwobj.Integer(-1)

            uniprotId = item._uniprotId.get()
            print("Processing %s" % uniprotId)

            urlId = "https://www.uniprot.org/uniprot/%s.fasta" % uniprotId

            fnFasta = self._getExtraPath("%s.fasta" % uniprotId)
            if not os.path.exists(fnFasta):
                print("Fetching uniprot: %s" % urlId)
                for i in range(3):
                    try:
                        urllib.request.urlretrieve(urlId, fnFasta)
                        if not fnFasta in fnList:
                            fnList.append(fnFasta)
                        break
                    except:  # The library raises an exception when the web is not found
                        pass
            if os.path.exists(fnFasta):
                newItem._uniprotFile = pwobj.String(fnFasta)
                newItem._unitprotSeqLength = pwobj.Integer(
                    sequenceLength(fnFasta))

            outputDatabaseID.append(newItem)

        fnAll = self._getPath("sequences.fasta")
        with open(fnAll, 'w') as outfile:
            for fname in fnList:
                with open(fname) as infile:
                    for line in infile:
                        outfile.write(line)
                    outfile.write('\n\n')
        seqFile = ProteinSequenceFile()
        seqFile.setFileName(fnAll)

        self._defineOutputs(outputUniprot=outputDatabaseID)
        self._defineSourceRelation(self.inputListID, outputDatabaseID)
        self._defineOutputs(outputSequence=seqFile)
        self._defineSourceRelation(self.inputListID, seqFile)
    def constructOutput(self, fnTxt):
        fnDir, fnResults = os.path.split(fnTxt)
        tokens = fnResults.split('-')
        if len(tokens) > 1:
            subset = tokens[1].split('.')[0]
        else:
            subset = ""

        outputSet = SetOfDatabaseID.create(path=self._getPath(), suffix=subset)
        for line in open(fnTxt, "r"):
            line = line.strip()
            if line == "":
                continue
            elif line.startswith("# Structural equivalences"):
                break
            elif line.startswith("#"):
                continue
            else:
                tokens = line.split()
                pdbId = DatabaseID()
                tokens2 = tokens[1].split('-')
                pdbId.setDatabase("pdb")
                pdbId.setDbId(tokens[1])
                pdbId._pdbId = pwobj.String(tokens2[0])
                if len(tokens2) > 1:
                    pdbId._chain = pwobj.String(tokens2[1])
                pdbId._PDBLink = pwobj.String(
                    "https://www.rcsb.org/structure/%s" % tokens2[0])
                pdbId._DaliZscore = pwobj.Float(float(tokens[2]))
                pdbId._DaliRMSD = pwobj.Float(float(tokens[3]))
                pdbId._DaliSuperpositionLength = pwobj.Integer(int(tokens[4]))
                pdbId._DaliSeqLength = pwobj.Integer(int(tokens[5]))
                pdbId._DaliSeqIdentity = pwobj.Float(float(tokens[6]))
                pdbId._DaliDescription = pwobj.String(" ".join(tokens[7:]))
                outputSet.append(pdbId)
        outputDict = {'outputDatabaseIds%s' % subset: outputSet}
        self.protocol._defineOutputs(**outputDict)
        self.protocol._defineSourceRelation(self.protocol.inputStructure,
                                            outputSet)
예제 #5
0
    def operateStep(self):
        outputDict = {}
        if self.operation.get() == 1:
            # Union
            for database in self.multipleInputListID:
                for databaseEntry in database.get():
                    add = True
                    if self.removeDuplicates.get():
                        add = not databaseEntry.getDbId() in outputDict
                    if add:
                        dbEntry = DatabaseID()
                        dbEntry.copy(databaseEntry, copyId=False)
                        outputDict[databaseEntry.getDbId()] = dbEntry
        elif self.operation.get() == 0 or self.operation.get(
        ) == 2 or self.operation.get() == 3:
            # Unique, Intersection, Difference
            outputList2 = []
            if self.operation.get() == 2 or self.operation.get() == 3:
                for databaseEntry in self.inputListID2.get():
                    outputList2.append(databaseEntry.getDbId())

            for databaseEntry in self.inputListID.get():
                add = False
                if self.operation.get() == 0:  # Unique
                    add = not databaseEntry.getDbId() in outputDict
                elif self.operation.get() == 2:  # Intersection
                    add = databaseEntry.getDbId() in outputList2
                    if self.removeDuplicates.get():
                        add = add and not databaseEntry.getDbId() in outputDict
                elif self.operation.get() == 3:  # Difference
                    add = not databaseEntry.getDbId() in outputList2
                    if self.removeDuplicates.get():
                        add = add and not databaseEntry.getDbId() in outputDict
                if add:
                    dbEntry = DatabaseID()
                    dbEntry.copy(databaseEntry)
                    outputDict[databaseEntry.getDbId()] = dbEntry
        elif self.operation.get() == 4:
            # Change ID
            newLabel = True
            for name, _ in self.inputListID.get().getFirstItem().getAttributes(
            ):
                if self.newDb.get() == name:
                    newLabel = False
                    break

            for databaseEntry in self.inputListID.get():
                dbEntry = DatabaseID()
                dbEntry.copy(databaseEntry)
                if hasattr(dbEntry, self.newDbId.get()):
                    if newLabel:
                        dbEntry.setDatabase(self.newDb.get())
                    else:
                        dbEntry.setDatabase(
                            dbEntry.getAttributeValue(self.newDb.get()))
                    dbEntry.setDbId(
                        dbEntry.getAttributeValue(self.newDbId.get()))
                add = True
                if self.removeDuplicates.get():
                    add = add and not dbEntry.getDbId() in outputDict
                if add:
                    outputDict[dbEntry.getDbId()] = dbEntry
        elif self.operation.get() == 5:
            # Keep columns
            keepList = [x.strip() for x in self.keepColumns.get().split()]
            keepList.append("database")
            keepList.append("dbId")

            ignoreList = []
            for name, _ in self.inputListID.get().getFirstItem().getAttributes(
            ):
                if not name in keepList:
                    ignoreList.append(name)
            for databaseEntry in self.inputListID.get():
                dbEntry = DatabaseID()
                dbEntry.copy(databaseEntry, ignoreAttrs=ignoreList)
                add = True
                if self.removeDuplicates.get():
                    add = add and not dbEntry.getDbId() in outputDict
                if add:
                    outputDict[dbEntry.getDbId()] = dbEntry
        elif self.operation.get() == 6:
            # Filter columns
            referenceValue = self.filterValue.get()
            value = self.inputListID.get().getFirstItem().getAttributeValue(
                self.filterColumn.get())
            if isinstance(value, float):
                referenceValue = float(referenceValue)
            elif isinstance(value, int):
                referenceValue = int(referenceValue)

            for databaseEntry in self.inputListID.get():
                dbEntry = DatabaseID()
                dbEntry.copy(databaseEntry)
                add = False

                value = dbEntry.getAttributeValue(self.filterColumn.get())
                if isinstance(value, Float):
                    value = float(value)
                elif isinstance(value, Integer):
                    value = int(value)

                filterOp = self.filterOp.get()
                if filterOp == 0:  # ==
                    add = value == referenceValue
                elif filterOp == 1:  # >
                    add = value > referenceValue
                elif filterOp == 2:  # >=
                    add = value > referenceValue
                elif filterOp == 3:  # <
                    add = value < referenceValue
                elif filterOp == 4:  # <=
                    add = value <= referenceValue
                elif filterOp == 5:  # !=
                    add = value != referenceValue
                elif filterOp == 6:  #startswith
                    add = value.startswith(referenceValue)
                elif filterOp == 7:  # endswith
                    add = value.endswith(referenceValue)
                elif filterOp == 8:  # contains
                    add = referenceValue in value
                elif filterOp == 9:  # does not startswith
                    add = not (value.startswith(referenceValue))
                elif filterOp == 10:  # does not endswith
                    add = not (value.endswith(referenceValue))
                elif filterOp == 11:  # does not contains
                    add = not (referenceValue in value)

                if self.removeDuplicates.get():
                    add = add and not dbEntry.getDbId() in outputDict
                if add:
                    outputDict[dbEntry.getDbId()] = dbEntry

        outputDatabaseID = SetOfDatabaseID().create(path=self._getPath())
        for dbId in outputDict:
            outputDatabaseID.append(outputDict[dbId])
        self._defineOutputs(output=outputDatabaseID)
        self._defineSourceRelation(self.inputListID, outputDatabaseID)