def getSpliceEvent(defString, location): defString = defString.lower() # grab location information try: locationList = [int(location["GBInterval_from"])-1, int(location["GBInterval_to"])] except KeyError: # some regions are defined as a single point locationList = [int(location["GBInterval_point"])-1, int(location["GBInterval_point"])] # if this defString defines a missing event if defString.find("missing") > -1: return["missing", locationList[0], locationList[1], "", ""] # if the defstring defines a replacement event if defString.find("->") > -1: # to get the string of AA either side of -> we have to do some # quick regexs/reformatting defString = defString.replace(" ","") old = str(defString.split("->")[0]) new = re.search("[a-zA-Z]*", defString.split("->")[1]).group() return ["replacement", locationList[0], locationList[1], old, new] ######################################## # if we get here this XML is, technically, maformed. HOWEVER, we can try and salvage by adding # 'exception' handling show_warning("The isform line \n\n[" + defString + "] \nis malformed\n\nTrying to parse regardless...\n") # Exception 1 use of ">" instead of "->" if defString.find(">") > -1: # to get the string of AA either side of -> we have to do some # quick regexs/reformatting defString = defString.replace(" ","") old = str(defString.split(">")[0]) new = re.search("[a-zA-Z]*", defString.split(">")[1]).group() return ["replacement", locationList[0], locationList[1], old, new] raise ProteinObjectException("Isoform 'note string' had neither 'missing' nor '->' in it, and\neven specific known exception handling couldn't help!\nParse error!")
def _xml_is_OK(self, proteinxml): if len(proteinxml) > 1: show_warning(" [ProteinObject._xml_is_ok()] - ProteinXML detected more than one record associated with this GI.\nThis should never happen.") return False # Nothing in XML - so return an empty-initiailized object with exists = 0 if len(proteinxml) == 0: return False # Check that we're really dealing with protein (despite specifying db="protein" # on the efetch call, when a GI is used other databases seem to be searched too) try: if not (proteinxml[0]["GBSeq_moltype"] == "AA"): return False except TypeError: return False return True
isoformSequenceList_keys = isoformSequenceList.keys() nametoIsoID_keys = nametoIsoID.keys() # remove 1 try: nametoIsoID_keys.remove("1") except Exception: pass if len(set(isoformSequenceList_keys + nametoIsoID_keys))-len(nametoIsoID_keys) == 1: for i in nametoIsoID_keys: if i not in isoformSequenceList_keys: try: isoformReturnVal[nametoIsoID[i]] = missing_data show_warning("Malformed isoform data lead to an inconsistency, but the gremlins think they fixed it... [ID=" + self.accession + "]") except KeyError: show_warning("Malformed isoform data has lead to an inconsistency - skipping that isoform [ID=" + self.accession + "]") # OH COME ON!! if salvage > 1: show_warning("Malformed isoform data has lead to an inconsistency - skipping several isoforms [ID=" + self.accession + "]") return isoformReturnVal