def defineComplexCorrespondenceWithNamingConventions(reactant,namingConventions,database,createOnDemand=False): if isinstance(namingConventions[0][1],tuple): #if len([x for x in namingConventions[0][1] if x not in database.labelDictionary]) == 0: if namingConventions[0][0] in database.labelDictionary: a1 = (database.labelDictionary[namingConventions[0][0]][0]) a2 = (namingConventions[0][1]) evaluation = len([x for x in a1 if x not in a2])+len([x for x in a2 if x not in a1]) if evaluation != 0: #if there's already something in there if a1[0] != namingConventions[0][0]: if not createOnDemand: pass logMess("ERROR","{0}: {1} =/= {2}".format(namingConventions[0][0],[x for x in a1 if x not in a2],[x for x in a2 if x not in a1])) #print 'CONTRADICTORY ',namingConventions[0][0],'(',[x for x in a1 if x not in a2],[x for x in a2 if x not in a1],[x in database.labelDictionary for x in a2 if x not in a1],')' if createOnDemand and len([x for x in a1 if x not in a2])<len([x for x in a2 if x not in a1]): database.labelDictionary[namingConventions[0][0]] = [a2] print 'CONTRADICTORY ',namingConventions[0][0],'(',[x for x in a1 if x not in a2],[x for x in a2 if x not in a1],[x in database.labelDictionary for x in a2 if x not in a1],')' for element in [x for x in a2 if x not in database.labelDictionary]: database.labelDictionary[element] = [(element,)] #if we already know about all the elements in there elif len([x for x in a2 if x not in namingConventions]) == 0: print '++++',a1,a2,namingConventions[0][0] #database.labelDictionary[namingConventions[0][0]].append(namingConventions[0][1]) #for element in [x for x in namingConventions[0][1] if x not in database.labelDictionary]: # database.labelDictionary[element] = [(element,)] #if namingConventions[0][0] in database.labelDictionary: # print namingConventions[0][0],[namingConventions[0][1]],database.labelDictionary[namingConventions[0][0]],namingConventions[0][1]==database.labelDictionary[namingConventions[0][0]] return
def resolveDependencyGraphHelper(dependencyGraph, reactant, memory, withModifications=False): result = [] #if type(reactant) == tuple: # return [] if reactant not in dependencyGraph or dependencyGraph[reactant] == []: if not withModifications: result.append([reactant]) else: for option in dependencyGraph[reactant]: tmp = [] for element in option: if element in memory and not withModifications: result.append([element]) continue elif element in memory: logMess('ERROR:Atomization','dependency cycle detected on {0}'.format(element)) raise CycleError(memory) baseElement = resolveDependencyGraphHelper(dependencyGraph,element, memory + [element], withModifications) if baseElement is not None: tmp.extend(baseElement) #if not withModifications: result.extend(tmp) if len(option) == 1 and withModifications and option[0] != reactant: result.append((option[0], reactant)) return result
def queryActiveSite(nameStr, organism): url = 'http://www.uniprot.org/uniprot/?' response = None retry = 0 while retry < 3: retry += 1 if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'query={0}+AND+organism:{1}&columns=entry name,id,feature(ACTIVE SITE)&format=tab&limit=5&sort=score'.format( nameStr, organismExtract) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC03', 'A connection could not be established to uniprot') if response in ['', None]: url = 'http://www.uniprot.org/uniprot/?' xparams = 'query={0}&columns=entry name,id,feature(ACTIVE SITE)&format=tab&limit=5&sort=score'.format( nameStr) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC03', 'A connection could not be established to uniprot') if not response: return response parsedData = [x.split('\t') for x in response.split('\n')][1:] #return parsedData return [ x[0] for x in parsedData if len(x) == 3 and any( nameStr.lower() in z for z in [y.lower() for y in x[0].split('_')]) and len(x[2]) > 0 ]
def resolveDependencyGraphHelper(dependencyGraph, reactant, memory, withModifications=False): result = [] #if type(reactant) == tuple: # return [] if reactant not in dependencyGraph or dependencyGraph[reactant] == []: if not withModifications: result.append([reactant]) else: for option in dependencyGraph[reactant]: tmp = [] for element in option: if element in memory and not withModifications: result.append([element]) continue elif element in memory: logMess('ERROR:Atomization', 'dependency cycle detected on {0}'.format(element)) raise CycleError(memory) baseElement = resolveDependencyGraphHelper( dependencyGraph, element, memory + [element], withModifications) if baseElement is not None: tmp.extend(baseElement) #if not withModifications: result.extend(tmp) if len(option ) == 1 and withModifications and option[0] != reactant: result.append((option[0], reactant)) return result
def name2uniprot(nameStr, organism): url = 'http://www.uniprot.org/uniprot/?' response = None if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'query={0}+AND+organism:{1}&columns=entry name,id&format=tab&limit=5&sort=score'.format( nameStr, organismExtract) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC03', 'A connection could not be established to uniprot') return None if response in ['', None]: url = 'http://www.uniprot.org/uniprot/?' xparams = 'query={0}&columns=entry name,id&format=tab&limit=5&sort=score'.format( nameStr) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: return None parsedData = [x.split('\t') for x in response.split('\n')][1:] return [ x[1] for x in parsedData if len(x) == 2 and any(nameStr.lower() in z for z in [y.lower() for y in x[0].split('_')]) ]
def queryActiveSite(nameStr,organism): url = 'http://www.uniprot.org/uniprot/?' response = None retry = 0 while retry < 3: retry += 1 if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'query={0}+AND+organism:{1}&columns=entry name,id,feature(ACTIVE SITE)&format=tab&limit=5&sort=score'.format(nameStr, organismExtract) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC03', 'A connection could not be established to uniprot') if response in ['', None]: url = 'http://www.uniprot.org/uniprot/?' xparams = 'query={0}&columns=entry name,id,feature(ACTIVE SITE)&format=tab&limit=5&sort=score'.format(nameStr) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC03', 'A connection could not be established to uniprot') if not response: return response parsedData = [x.split('\t') for x in response.split('\n')][1:] #return parsedData return [x[0] for x in parsedData if len(x) == 3 and any(nameStr.lower() in z for z in [y.lower() for y in x[0].split('_')]) and len(x[2]) > 0]
def queryBioGridByName(name1, name2, organism=None): url = 'http://webservice.thebiogrid.org/interactions/?' response = None if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'geneList={0}&includeInteractors=false&accesskey=59764eb62ca572de5949062a1ba75e5d&format=json&taxId={1}'.format('|'.join([name1,name2]),'|'.join(organism)) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:biogrid', 'A connection could not be established to biogrid while testing with taxon {1} and genes {0}'.format('|'.join([name1, name2]), '|'.join(organism))) return -1 if not response: xparams = 'geneList={0}&includeInteractors=false&accesskey=59764eb62ca572de5949062a1ba75e5d&format=json'.format('|'.join([name1,name2])) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:biogrid', 'A connection could not be established to biogrid') return -1 results = json.loads(response) for result in results: resultName1 = results[result]['OFFICIAL_SYMBOL_A'].lower() resultName2 = results[result]['OFFICIAL_SYMBOL_B'].lower() synonymName1 = results[result]['SYNONYMS_A'].split('|') synonymName1 = [x.lower() for x in synonymName1] synonymName2 = results[result]['SYNONYMS_B'].split('|') synonymName2 = [x.lower() for x in synonymName2] name1 = name1.lower() name2 = name2.lower() if (name1 == resultName1 or name1 in synonymName1) and (name2 == resultName2 or name2 in synonymName2): return True if (name2 == resultName1 or name2 in synonymName1) and (name1 == resultName2 or name1 in synonymName2): return True return False
def changeToBNGL(functionList,rule,function): oldrule = '' #if the rule contains any mathematical function we need to reformat while any([re.search(r'(\W|^)({0})(\W|$)'.format(x),rule) != None for x in functionList]) and (oldrule != rule): oldrule = rule for x in functionList: rule = re.sub('({0})\(([^,]+),([^)]+)\)'.format(x),function,rule) if rule == oldrule: logMess('ERROR','Malformed pow or root function %s' % rule) print 'meep' return rule
def queryBioGridByName(name1, name2, organism, truename1, truename2): url = 'http://webservice.thebiogrid.org/interactions/?' response = None if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'geneList={0}&includeInteractors=false&accesskey=59764eb62ca572de5949062a1ba75e5d&format=json&taxId={1}'.format( '|'.join([name1, name2]), '|'.join(organism)) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess( 'ERROR:MSC02', 'A connection could not be established to biogrid while testing with taxon {1} and genes {0}' .format('|'.join([name1, name2]), '|'.join(organism))) return False if not response: xparams = 'geneList={0}&includeInteractors=false&accesskey=59764eb62ca572de5949062a1ba75e5d&format=json'.format( '|'.join([name1, name2])) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:MSC02', 'A connection could not be established to biogrid') return False results = json.loads(response) referenceName1 = truename1.lower() if truename1 else name1.lower() referenceName2 = truename2.lower() if truename2 else name2.lower() for result in results: resultName1 = results[result]['OFFICIAL_SYMBOL_A'].lower() resultName2 = results[result]['OFFICIAL_SYMBOL_B'].lower() synonymName1 = results[result]['SYNONYMS_A'].split('|') synonymName1 = [x.lower() for x in synonymName1] synonymName2 = results[result]['SYNONYMS_B'].split('|') synonymName2 = [x.lower() for x in synonymName2] if truename1 != None and truename2 != None and resultName1 != resultName2: return True elif truename1 != None and truename2 != None and truename1 == truename2 and resultName1 == resultName2: return True if (referenceName1 == resultName1 or referenceName1 in synonymName1) and (referenceName2 == resultName2 or referenceName2 in synonymName2): return True if (referenceName2 == resultName1 or referenceName2 in synonymName1) and (referenceName1 == resultName2 or referenceName1 in synonymName2): return True return False
def defineComplexCorrespondenceWithNamingConventions(reactant, namingConventions, database, createOnDemand=False): if isinstance(namingConventions[0][1], tuple): #if len([x for x in namingConventions[0][1] if x not in database.labelDictionary]) == 0: if namingConventions[0][0] in database.labelDictionary: a1 = (database.labelDictionary[namingConventions[0][0]][0]) a2 = (namingConventions[0][1]) evaluation = len([x for x in a1 if x not in a2]) + len( [x for x in a2 if x not in a1]) if evaluation != 0: #if there's already something in there if a1[0] != namingConventions[0][0]: if not createOnDemand: pass logMess( "ERROR", "{0}: {1} =/= {2}".format( namingConventions[0][0], [x for x in a1 if x not in a2], [x for x in a2 if x not in a1])) #print 'CONTRADICTORY ',namingConventions[0][0],'(',[x for x in a1 if x not in a2],[x for x in a2 if x not in a1],[x in database.labelDictionary for x in a2 if x not in a1],')' if createOnDemand and len([ x for x in a1 if x not in a2 ]) < len([x for x in a2 if x not in a1]): database.labelDictionary[namingConventions[0][0]] = [ a2 ] print 'CONTRADICTORY ', namingConventions[0][0], '(', [ x for x in a1 if x not in a2 ], [x for x in a2 if x not in a1], [ x in database.labelDictionary for x in a2 if x not in a1 ], ')' for element in [ x for x in a2 if x not in database.labelDictionary ]: database.labelDictionary[element] = [(element, )] #if we already know about all the elements in there elif len([x for x in a2 if x not in namingConventions]) == 0: print '++++', a1, a2, namingConventions[0][0] #database.labelDictionary[namingConventions[0][0]].append(namingConventions[0][1]) #for element in [x for x in namingConventions[0][1] if x not in database.labelDictionary]: # database.labelDictionary[element] = [(element,)] #if namingConventions[0][0] in database.labelDictionary: # print namingConventions[0][0],[namingConventions[0][1]],database.labelDictionary[namingConventions[0][0]],namingConventions[0][1]==database.labelDictionary[namingConventions[0][0]] return
def changeToBNGL(functionList, rule, function): oldrule = '' #if the rule contains any mathematical function we need to reformat while any([ re.search(r'(\W|^)({0})(\W|$)'.format(x), rule) != None for x in functionList ]) and (oldrule != rule): oldrule = rule for x in functionList: rule = re.sub('({0})\(([^,]+),([^)]+)\)'.format(x), function, rule) if rule == oldrule: logMess('ERROR', 'Malformed pow or root function %s' % rule) print 'meep' return rule
def propagateChanges(translator, dependencyGraph): flag = True while flag: flag = False for dependency in dependencyGraph: if dependency == 'RAF_P': pass if dependencyGraph[dependency] == []: continue for molecule in dependencyGraph[dependency][0]: try: if updateSpecies(translator[dependency], translator[getTrueTag(dependencyGraph, molecule)].molecules[0]): flag = True except: logMess('CRITICAL:Program', 'Species is not being properly propagated') flag = False
def propagateChanges(translator, dependencyGraph): flag = True while flag: flag = False for dependency in dependencyGraph: if dependency == 'RAF_P': pass if dependencyGraph[dependency] == []: continue for molecule in dependencyGraph[dependency][0]: try: if updateSpecies( translator[dependency], translator[getTrueTag(dependencyGraph, molecule)].molecules[0]): flag = True except: logMess('CRITICAL:Program', 'Species is not being properly propagated') flag = False
def name2uniprot(nameStr, organism): url = 'http://www.uniprot.org/uniprot/?' response = None if organism: organismExtract = list(organism)[0].split('/')[-1] xparams = 'query={0}+AND+organism:{1}&columns=entry name,id&format=tab&limit=5&sort=score'.format(nameStr, organismExtract) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: logMess('ERROR:pathwaycommons', 'A connection could not be established to uniprot') return None if response in ['', None]: url = 'http://www.uniprot.org/uniprot/?' xparams = 'query={0}&columns=entry name,id&format=tab&limit=5&sort=score'.format(nameStr) try: response = urllib2.urlopen(url, xparams).read() except urllib2.HTTPError: return None parsedData = [x.split('\t') for x in response.split('\n')][1:] return [x[1] for x in parsedData if len(x) == 2 and any(nameStr.lower() in z for z in [y.lower() for y in x[0].split('_')])]
def getReactions(self, translator=[],isCompartments=False,extraParameters={}): ''' returns a triple containing the parameters,rules,functions ''' rules = [] parameters = [] functions = [] tester = NumericStringParser() functionTitle = 'functionRate' for index,reaction in enumerate(self.model.getListOfReactions()): parameterDict = {} rawRules = self.__getRawRules(reaction) #newRate = self.updateFunctionReference(rawRules,extraParameters) if len(rawRules[2]) >0: for parameter in rawRules[2]: parameters.append('%s %f' % (parameter[0],parameter[1])) parameterDict[parameter[0]] = parameter[1] compartmentList = [['cell',1]] compartmentList.extend([[self.__getRawCompartments(x)[0],self.__getRawCompartments(x)[2]] for x in self.model.getListOfCompartments()]) functionName = '%s%d()' % (functionTitle,index) if 'delay' in rawRules[3][0]: logMess('ERROR','BNG cannot handle delay functions in function %s' % functionName) if rawRules[4]: functions.append(writer.bnglFunction(rawRules[3][0],functionName,rawRules[0],compartmentList,parameterDict,self.reactionDictionary)) functionName2 = '%s%dm()' % (functionTitle,index) functions.append(writer.bnglFunction(rawRules[3][1],functionName2,rawRules[0],compartmentList,parameterDict,self.reactionDictionary)) self.reactionDictionary[rawRules[5]] = '({0} - {1})'.format(functionName, functionName2) functionName = '{0},{1}'.format(functionName,functionName2) else: functions.append(writer.bnglFunction(rawRules[3][0],functionName,rawRules[0],compartmentList,parameterDict,self.reactionDictionary)) self.reactionDictionary[rawRules[5]] = '{0}'.format(functionName) rules.append(writer.bnglReaction(rawRules[0],rawRules[1],functionName,self.tags,translator,isCompartments,rawRules[4])) return parameters, rules,functions
def printTranslate(chemical,tags,translator={}): tmp = [] if chemical[0] not in translator: app = chemical[0] + '()' + tags else: translator[chemical[0]].addCompartment(tags) app = str(translator[chemical[0]]) if float(int(chemical[1])) == chemical[1]: for item in range(0,int(chemical[1])): tmp.append(app) else: idx = logMess("ERROR","Cannot deal with non integer stoicheometries: {0}* {1}".format(chemical[1],chemical[0])) tmp.append(app) return ' + '.join(tmp)
def printTranslate(chemical, tags, translator={}): tmp = [] if chemical[0] not in translator: app = chemical[0] + '()' + tags else: translator[chemical[0]].addCompartment(tags) app = str(translator[chemical[0]]) if float(int(chemical[1])) == chemical[1]: for item in range(0, int(chemical[1])): tmp.append(app) else: idx = logMess( "ERROR:Simulation", "Cannot deal with non integer stoicheometries: {0}* {1}".format( chemical[1], chemical[0])) tmp.append(app) return ' + '.join(tmp)
def getAssignmentRules(self, zparams, parameters, molecules): ''' this method obtains an SBML rate rules and assignment rules. They require special handling since rules are often both defined as rules and parameters initialized as 0, so they need to be removed from the parameters list ''' compartmentList = [['cell',1]] compartmentList.extend([[self.__getRawCompartments(x)[0], self.__getRawCompartments(x)[2]] for x in self.model.getListOfCompartments()]) arules = [] aParameters = {} zRules = zparams removeParameters = [] artificialReactions = [] artificialObservables = {} for arule in self.model.getListOfRules(): rawArule = self.__getRawAssignmentRules(arule) #tmp.remove(rawArule[0]) #newRule = rawArule[1].replace('+',',').strip() if rawArule[3] == True: #it is an rate rule if rawArule[0] in self.boundaryConditionVariables: aParameters[rawArule[0]] = 'arj' + rawArule[0] tmp = list(rawArule) tmp[0] = 'arj' + rawArule[0] rawArule = tmp rateLaw1 = rawArule[1][0] rateLaw2 = rawArule[1][1] arules.append(writer.bnglFunction(rateLaw1, 'arRate{0}'.format(rawArule[0]),[],compartments=compartmentList, reactionDict=self.reactionDictionary)) arules.append(writer.bnglFunction(rateLaw2, 'armRate{0}'.format(rawArule[0]),[],compartments=compartmentList, reactionDict=self.reactionDictionary)) artificialReactions.append(writer.bnglReaction([], [[rawArule[0],1]],'{0},{1}'.format('arRate{0}'.format(rawArule[0]), 'armRate{0}'.format(rawArule[0])), self.tags, {}, isCompartments=True, comment = '#rateLaw')) #arules.append(writer.bnglFunction('({0}) - ({1})'.format(rawArule[1][0],rawArule[1][1]), '{0}'.format(rawArule[0]),[],compartments=compartmentList, reactionDict=self.reactionDictionary)) if rawArule[0] in zparams: removeParameters.append('{0} 0'.format(rawArule[0])) zRules.remove(rawArule[0]) else: for element in parameters: #TODO: if for whatever reason a rate rule #was defined as a parameter that is not 0 #remove it. This might not be exact behavior logMess("WARNING","A name corresponds both as a non zero parameter \ and a rate rule, verify behavior") if re.search('^{0}\s'.format(rawArule[0]), element): removeParameters.append(element) elif rawArule[2] == True: #it is an assigment rule if rawArule[0] in zRules: zRules.remove(rawArule[0]) if rawArule[0] in self.boundaryConditionVariables: aParameters[rawArule[0]] = 'arj' + rawArule[0] tmp = list(rawArule) tmp[0] = 'arj' + rawArule[0] rawArule= tmp artificialObservables[rawArule[0]] = writer.bnglFunction(rawArule[1][0],rawArule[0]+'()',[],compartments=compartmentList,reactionDict=self.reactionDictionary) else: ''' if for whatever reason you have a rule that is not assigment or rate and it is initialized as a non zero parameter, give it a new name ''' if rawArule[0] not in zparams: ruleName = 'ar' + rawArule[0] else: ruleName = rawArule[0] zRules.remove(rawArule[0]) arules.append(writer.bnglFunction(rawArule[1][0],ruleName,[],compartments=compartmentList,reactionDict=self.reactionDictionary)) aParameters[rawArule[0]] = 'ar' + rawArule[0] ''' elif rawArule[2] == True: for parameter in parameters: if re.search('^{0}\s'.format(rawArule[0]),parameter): print '////',rawArule[0] ''' #arules.append('%s = %s' %(rawArule[0],newRule)) return aParameters,arules,zRules,artificialReactions,removeParameters,artificialObservables
def getReactions(self, translator={}, isCompartments=False, extraParameters={},atomize=False): ''' @returns: a triple containing the parameters,rules,functions ''' ##@FIXME:this part of the code is there so that we only generate the functions list once through different #iterations of this call. This is because we cannot create a clone of the 'math' object for this #reaction and it is being permanently changed every call. It's ugly but it works. Change for something #better when we figure out how to clone the math object if not hasattr(self.getReactions,'functionFlag'): self.getReactions.__func__.functionFlag = False or (not atomize) rules = [] parameters = [] functions = [] functionTitle = 'functionRate' for index, reaction in enumerate(self.model.getListOfReactions()): parameterDict = {} #symmetry factors for components with the same name sl,sr = self.reduceComponentSymmetryFactors(reaction,translator,functions) rawRules = self.__getRawRules(reaction,[sl,sr],self.getReactions.functionFlag) if len(rawRules['parameters']) >0: for parameter in rawRules['parameters']: parameters.append('r%d_%s %f' % (index+1, parameter[0], parameter[1])) parameterDict[parameter[0]] = parameter[1] compartmentList = [['cell',1]] compartmentList.extend([[self.__getRawCompartments(x)[0],self.__getRawCompartments(x)[2]] for x in self.model.getListOfCompartments()]) threshold = 0 if rawRules['numbers'][0] > threshold: functionName = '%s%d()' % (functionTitle,index) else: #append reactionNumbers to parameterNames finalString = str(rawRules['rates'][0]) for parameter in parameterDict: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(parameter), r'\1{0}\3'.format('r{0}_{1}'.format(index+1,parameter)), finalString) functionName = finalString if self.getReactions.functionFlag and 'delay' in rawRules['rates'][0]: logMess('ERROR','BNG cannot handle delay functions in function %s' % functionName) if rawRules['reversible']: if rawRules['numbers'][0] > threshold: if self.getReactions.functionFlag: functions.append(writer.bnglFunction(rawRules['rates'][0], functionName, rawRules['reactants'], compartmentList, parameterDict, self.reactionDictionary)) if rawRules['numbers'][1] > threshold: functionName2 = '%s%dm()' % (functionTitle,index) if self.getReactions.functionFlag: functions.append(writer.bnglFunction(rawRules['rates'][1],functionName2,rawRules['reactants'],compartmentList,parameterDict,self.reactionDictionary)) self.reactionDictionary[rawRules['reactionID']] = '({0} - {1})'.format(functionName, functionName2) functionName = '{0},{1}'.format(functionName, functionName2) else: finalString = str(rawRules['rates'][1]) for parameter in parameterDict: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(parameter),r'\1{0}\3'.format('r{0}_{1}'.format(index+1,parameter)),finalString) functionName = '{0},{1}'.format(functionName,finalString) else: if rawRules['numbers'][0] > threshold: if self.getReactions.functionFlag: functions.append(writer.bnglFunction(rawRules['rates'][0], functionName, rawRules['reactants'], compartmentList, parameterDict,self.reactionDictionary)) self.reactionDictionary[rawRules['reactionID']] = '{0}'.format(functionName) #reactants = [x for x in rawRules[0] if x[0] not in self.boundaryConditionVariables] #products = [x for x in rawRules[1] if x[0] not in self.boundaryConditionVariables] reactants = [x for x in rawRules['reactants']] products = [x for x in rawRules['products']] rules.append(writer.bnglReaction(reactants,products,functionName,self.tags,translator,(isCompartments or ((len(reactants) == 0 or len(products) == 0) and self.getReactions.__func__.functionFlag)),rawRules['reversible'],reactionName=rawRules['reactionID'])) if atomize: self.getReactions.__func__.functionFlag = not self.getReactions.functionFlag return parameters, rules,functions
def bnglFunction(rule,functionTitle,reactants,compartments=[],parameterDict={},reactionDict={}): def powParse(match): if match.group(1) == 'root': exponent = '(1/%s)' % match.group(3) else: exponent = match.group(3) if match.group(1) in ['root','pow']: operator = '^' return '({0}){1}({2})'.format(match.group(2),operator,exponent) def compParse(match): translator = {'gt':'>','lt':'<','and':'&&','or':'||','geq':'>=','leq':'<=','eq':'=='} exponent = match.group(3) operator = translator[match.group(1)] return '{0} {1} {2}'.format(match.group(2),operator,exponent) def ceilfloorParse(math): flag = False if math.group(1) == 'ceil': flag = True if flag: return 'min(rint({0}+0.5),rint({0} + 1))'.format(math.group(2)) else: return 'min(rint({0}-0.5),rint({0}+0.5))'.format(math.group(2)) def parameterRewrite(match): return match.group(1) + 'param_' + match.group(2) + match.group(3) def constructFromList(argList,optionList): parsedString = '' idx = 0 translator = {'gt':'>','lt':'<','and':'&&','or':'||','geq':'>=','leq':'<=','eq':'=='} while idx < len(argList): if type(argList[idx]) is list: parsedString += '(' + constructFromList(argList[idx],optionList) + ')' elif argList[idx] in optionList: if argList[idx] == 'ceil': parsedString += 'min(rint(({0}) + 0.5),rint(({0}) + 1))'.format(constructFromList(argList[idx+1],optionList)) idx += 1 elif argList[idx] == 'floor': parsedString += 'min(rint(({0}) -0.5),rint(({0}) + 0.5))'.format(constructFromList(argList[idx+1],optionList)) idx += 1 elif argList[idx] in ['pow']: index = rindex(argList[idx+1],',') parsedString += '(('+ constructFromList(argList[idx+1][0:index],optionList) + ')' parsedString += ' ^ ' + '(' + constructFromList(argList[idx+1][index+1:] ,optionList) + '))' idx += 1 elif argList[idx] in ['sqr','sqrt']: tag = '1/' if argList[idx] == 'sqrt' else '' parsedString += '((' + constructFromList(argList[idx+1],optionList) + ') ^ ({0}2))'.format(tag) idx += 1 elif argList[idx] == 'root': index = rindex(argList[idx+1],',') tmp = '1/('+ constructFromList(argList[idx+1][0:index],optionList) + '))' parsedString += '((' + constructFromList(argList[idx+1][index+1:] ,optionList) + ') ^ ' + tmp idx += 1 elif argList[idx] == 'piecewise': index1 = argList[idx+1].index(',') index2 = argList[idx+1][index1+1:].index(',') + index1+1 try: index3 = argList[idx+1][index2+1:].index(',') + index2+1 except ValueError: index3 = -1 condition = constructFromList([argList[idx+1][index1+1:index2]],optionList) result = constructFromList([argList[idx+1][:index1]],optionList) if index3 == -1: result2 = constructFromList([argList[idx+1][index2+1:]],optionList) else: result2 = constructFromList(['piecewise', argList[idx+1][index2+1:]],optionList) parsedString += 'if({0},{1},{2})'.format(condition,result,result2) idx+=1 elif argList[idx] == 'lambda': tmp = '(' upperLimit = rindex(argList[idx+1],',') parsedParams = [] for x in argList[idx+1][0:upperLimit]: if x == ',': tmp += ', ' else: tmp += 'param_' + x parsedParams.append(x) #tmp = ''.join([x for x in constructFromList(argList[idx+1][0:upperLimit])]) tmp2 = ') = ' + constructFromList(argList[idx+1][rindex(argList[idx+1],',')+1:],optionList) for x in parsedParams: while re.search(r'(\W|^)({0})(\W|$)'.format(x),tmp2) != None: tmp2 = re.sub(r'(\W|^)({0})(\W|$)'.format(x),r'\1param_\2 \3',tmp2) idx+= 1 parsedString += tmp + tmp2 else: parsedString += argList[idx] idx += 1 return parsedString def changeToBNGL(functionList,rule,function): oldrule = '' #if the rule contains any mathematical function we need to reformat while any([re.search(r'(\W|^)({0})(\W|$)'.format(x),rule) != None for x in functionList]) and (oldrule != rule): oldrule = rule for x in functionList: rule = re.sub('({0})\(([^,]+),([^)]+)\)'.format(x),function,rule) if rule == oldrule: logMess('ERROR','Malformed pow or root function %s' % rule) print 'meep' return rule #rule = changeToBNGL(['pow','root'],rule,powParse) rule = changeToBNGL(['gt','lt','leq','geq','eq'],rule,compParse) rule = changeToBNGL(['and','or'],rule,compParse) flag = True contentRule = pyparsing.Word(pyparsing.alphanums + '_') | ',' | '.' | '+' | '-' | '*' | '/' | '^' | '&' | '>' | '<' | '=' | '|' parens = pyparsing.nestedExpr( '(', ')', content=contentRule) finalString = '' #remove ceil,floor if any([re.search(r'(\W|^)({0})(\W|$)'.format(x),rule) != None for x in ['ceil','floor','pow','sqrt','sqr','root']]): argList = parens.parseString('('+ rule + ')').asList() rule = constructFromList(argList[0],['floor','ceil','pow','sqrt','sqr','root']) #TODO:rewrite this to use pyparsing while 'piecewise' in rule: argList = parens.parseString('('+ rule + ')').asList() rule = constructFromList(argList[0],['piecewise']) #remove references to lambda functions if 'lambda(' in rule: lambdaList = parens.parseString('(' + rule + ')') functionBody = constructFromList(lambdaList[0].asList(),['lambda']) flag = False rule = '{0}{1}'.format(functionTitle,functionBody) tmp = rule #delete the compartment from the rate function since cBNGL already does it for compartment in compartments: tmp = re.sub('^{0}\s*[*]'.format(compartment[0]),'',tmp) tmp = re.sub('([*]\s*{0})$'.format(compartment[0]),'',tmp) if compartment[0] in tmp: tmp =re.sub(r'(\W|^)({0})(\W|$)'.format(compartment[0]),r'\1 {0} \3'.format(str(compartment[1])),tmp) #tmp = re.sub(r'(\W)({0})(\W)'.format(compartment[0]),r'\1%s\3' % str(compartment[1]),tmp) logMess('WARNING','Exchanging reference to compartment %s for its dimensions' % compartment[0]) #change references to time for time() #tmp =re.sub(r'(\W|^)(time)(\W|$)',r'\1time()\3',tmp) #tmp =re.sub(r'(\W|^)(Time)(\W|$)',r'\1time()\3',tmp) #BNGL has ^ for power. if flag: finalString = '%s = %s' % (functionTitle,tmp) else: finalString = tmp #change references to local parameters for parameter in parameterDict: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(parameter),r'\1 {0} \3'.format(parameterDict[parameter]),finalString) #change references to reaction Id's to their netflux equivalent for reaction in reactionDict: if reaction in finalString: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(reaction),r'\1 {0} \3'.format(reactionDict[reaction]),finalString) #combinations '+ -' break ibonetgen finalString = re.sub(r'(\W|^)([-])(\s)+',r'\1-',finalString) #changing reference of 't' to time() #finalString = re.sub(r'(\W|^)(t)(\W|$)',r'\1time()\3',finalString) #pi finalString = re.sub(r'(\W|^)(pi)(\W|$)',r'\1 3.1415926535 \3',finalString) #print reactants,finalString #log for log 10 finalString = re.sub(r'(\W|^)log\(',r'\1 ln(',finalString) #reserved keyword: e finalString = re.sub(r'(\W|^)(e)(\W|$)',r'\1 are \3',finalString) #changing ceil #avoiding variables whose name starts with a number #removing mass-action elements tmp = finalString #print finalString,reactants #for reactant in reactants: # finalString = re.sub(r'(\W|^)({0}\s+\*)'.format(reactant[0]),r'\1',finalString) # finalString = re.sub(r'(\W|^)(\*\s+{0}(\s|$))'.format(reactant[0]),r'\1',finalString) #print finalString #if finalString != tmp: # logMess('WARNING','Removed mass action elements from ) return finalString
def createSpeciesCompositionGraph(parser, database, configurationFile, namingConventions, speciesEquivalences=None, bioGridFlag=False): _, rules, _ = parser.getReactions(atomize=True) molecules, _, _, _ = parser.getSpecies() database.sbmlAnalyzer = \ analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences) #classify reactions database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\ indirectEquivalenceTranslator, \ adhocLabelDictionary,lexicalDependencyGraph= database.sbmlAnalyzer.classifyReactions(rules, molecules) referenceVariables = [ database.classifications, equivalenceTranslator, database.eequivalenceTranslator, indirectEquivalenceTranslator, adhocLabelDictionary ] comparisonVariables = [deepcopy(x) for x in referenceVariables] #####input processing #states,components,other user options #with open('temp1.dict','w') as f: # pickle.dump(referenceVariables,f) #with open('temp2.dict','w') as f: # pickle.dump(comparisonVariables,f) database.reactionProperties = database.sbmlAnalyzer.getReactionProperties() #user defined and lexical analysis naming conventions are stored here database.reactionProperties.update(adhocLabelDictionary) database.translator, database.labelDictionary, \ database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes() database.dependencyGraph = {} #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser, 'uniprot') ####dependency graph #binding reactions for reaction, classification in zip(rules, database.classifications): bindingReactionsAnalysis(database.dependencyGraph, list(parseReactions(reaction)), classification) for element in lexicalDependencyGraph: database.dependencyGraph[element] = lexicalDependencyGraph[element] #catalysis reactions for key in database.eequivalenceTranslator: for namingEquivalence in database.eequivalenceTranslator[key]: baseElement = min(namingEquivalence, key=len) modElement = max(namingEquivalence, key=len) if key != 'Binding': if baseElement not in database.dependencyGraph or database.dependencyGraph[ baseElement] == []: if modElement not in database.dependencyGraph or database.dependencyGraph[ modElement] == []: database.dependencyGraph[baseElement] = [] #do we have a meaningful reverse dependence? #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]): # addToDependencyGraph(database.dependencyGraph,baseElement,[modElement]) # continue addToDependencyGraph(database.dependencyGraph, modElement, [baseElement]) #non lexical-analysis catalysis reactions if database.forceModificationFlag: for reaction, classification in zip(rules, database.classifications): if classification == 'Transformation': preaction = list(parseReactions(reaction)) if preaction[1][0] in preaction[0][0]: base = preaction[1][0] mod = preaction[0][0] else: mod = preaction[1][0] base = preaction[0][0] if database.dependencyGraph[mod] == []: database.dependencyGraph[mod] = [[base]] ''' #complex catalysis reactions for key in indirectEquivalenceTranslator: #first remove these entries from the dependencyGraph since #they are not true bindingReactions for namingEquivalence in indirectEquivalenceTranslator[key]: removedElement = '' tmp3 = deepcopy(namingEquivalence[1]) if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] else: tmp3.reverse() if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] #then add the new, true dependencies #if its not supposed to be a basic element tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]] tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]]) tmp2 = deepcopy(tmp) tmp2.reverse() ##TODO: map back for the elements in namingEquivalence[2] if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \ and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]: if sorted(tmp) == sorted(tmp3): continue if all(x in database.dependencyGraph for x in tmp): if removedElement in database.dependencyGraph: database.dependencyGraph[removedElement].remove(tmp3) logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\ from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\ .format(removedElement,tmp3,namingEquivalence[3][0],tmp)) database.dependencyGraph[namingEquivalence[3][0]] = [tmp] else: logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \ {2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0], tmp,removedElement,tmp3)) #user defined stuff ''' for element in database.labelDictionary: if len(database.labelDictionary[element][0]) == 0 or element == \ database.labelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: database.dependencyGraph[element] = [ list(database.labelDictionary[element][0]) ] #stuff obtained from string similarity analysis for element in database.lexicalLabelDictionary: #similarity analysis has less priority than anything we discovered #before if element in database.dependencyGraph and \ len(database.dependencyGraph[element]) > 0: continue if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \ database.lexicalLabelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\ .format(element,database.lexicalLabelDictionary[element][0])) database.dependencyGraph[element] = [ list(database.lexicalLabelDictionary[element][0]) ] #pure lexical analysis orphanedSpecies = [ x for x in database.dependencyGraph if database.dependencyGraph[x] == [] ] strippedMolecules = [x.strip('()') for x in molecules] tmpDependency, database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification( orphanedSpecies, strippedMolecules) for species in tmpDependency: if tmpDependency[species] == []: addToDependencyGraph(database.dependencyGraph, species, []) for instance in tmpDependency[species]: addToDependencyGraph(database.dependencyGraph, species, instance) #####sct #FIXME: wtf was unevenelementdict supposed to be for #print database.dependencyGraph prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \ consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer) return prunnedDependencyGraph, database
def analyzeHelper(document,reactionDefinitions,useID,outputFile,speciesEquivalence,atomize,translator,bioGrid = False): ''' taking the atomized dictionary and a series of data structure, this method does the actual string output. ''' useArtificialRules = False parser =SBML2BNGL(document.getModel(),useID) database = structures.Databases() #translator,log,rdf = m2c.transformMolecules(parser,database,reactionDefinitions,speciesEquivalence) #try: #bioGridDict = {} #if biogrid: # bioGridDict = biogrid() #if atomize: # translator = mc.transformMolecules(parser,database,reactionDefinitions,speciesEquivalence,bioGridDict) #else: # translator={} parser =SBML2BNGL(document.getModel(),useID) #except: # print 'failure' # return None,None,None,None #translator = {} param,zparam = parser.getParameters() molecules,initialConditions,observables,speciesDict = parser.getSpecies(translator,[x.split(' ')[0] for x in param]) #finally, adjust parameters and initial concentrations according to whatever initialassignments say param,zparam,initialConditions = parser.getInitialAssignments(translator,param,zparam,molecules,initialConditions) compartments = parser.getCompartments() functions = [] assigmentRuleDefinedParameters = [] reactionParameters,rules,rateFunctions = parser.getReactions(translator,len(compartments)>1,atomize=atomize) functions.extend(rateFunctions) aParameters,aRules,nonzparam,artificialRules,removeParams,artificialObservables = parser.getAssignmentRules(zparam,param,molecules) for element in nonzparam: param.append('{0} 0'.format(element)) param = [x for x in param if x not in removeParams] tags = '@{0}'.format(compartments[0].split(' ')[0]) if len(compartments) == 1 else '@cell' molecules.extend([x.split(' ')[0] for x in removeParams]) if len(molecules) == 0: compartments = [] observables.extend('Species {0} {0}'.format(x.split(' ')[0]) for x in removeParams) for x in removeParams: initialConditions.append(x.split(' ')[0] + tags + ' ' + x.split(' ')[1]) ##Comment out those parameters that are defined with assignment rules ##TODO: I think this is correct, but it may need to be checked tmpParams = [] for idx,parameter in enumerate(param): for key in artificialObservables: if re.search('^{0}\s'.format(key),parameter)!= None: assigmentRuleDefinedParameters.append(idx) tmpParams.extend(artificialObservables) tmpParams.extend(removeParams) tmpParams = set(tmpParams) correctRulesWithParenthesis(rules,tmpParams) for element in assigmentRuleDefinedParameters: param[element] = '#' + param[element] deleteMolecules = [] deleteMoleculesFlag = True for key in artificialObservables: flag = -1 for idx,observable in enumerate(observables): if 'Species {0} {0}()'.format(key) in observable: flag = idx if flag != -1: observables.pop(flag) functions.append(artificialObservables[key]) flag = -1 if '{0}()'.format(key) in molecules: flag = molecules.index('{0}()'.format(key)) if flag != -1: if deleteMoleculesFlag: deleteMolecules.append(flag) else: deleteMolecules.append(key) #result =validateReactionUsage(molecules[flag],rules) #if result != None: # logMess('ERROR','Pseudo observable {0} in reaction {1}'.format(molecules[flag],result)) #molecules.pop(flag) flag = -1 for idx,specie in enumerate(initialConditions): if ':{0}('.format(key) in specie: flag = idx if flag != -1: initialConditions[flag] = '#' + initialConditions[flag] for flag in sorted(deleteMolecules,reverse=True): if deleteMoleculesFlag: logMess('WARNING:Simulation','{0} reported as function, but usage is ambiguous'.format(molecules[flag]) ) result =validateReactionUsage(molecules[flag],rules) if result != None: logMess('ERROR:Simulation','Pseudo observable {0} in reaction {1}'.format(molecules[flag],result)) molecules.pop(flag) else: logMess('WARNING:Simulation','{0} reported as species, but usage is ambiguous.'.format(flag) ) artificialObservables.pop(flag) functions.extend(aRules) sbmlfunctions = parser.getSBMLFunctions() processFunctions(functions,sbmlfunctions,artificialObservables,rateFunctions) for interation in range(0,3): for sbml2 in sbmlfunctions: for sbml in sbmlfunctions: if sbml == sbml2: continue if sbml in sbmlfunctions[sbml2]: sbmlfunctions[sbml2] = writer.extendFunction(sbmlfunctions[sbml2],sbml,sbmlfunctions[sbml]) functions = reorderFunctions(functions) functions = changeNames(functions,aParameters) # print [x for x in functions if 'functionRate60' in x] functions = unrollFunctions(functions) rules = changeRates(rules,aParameters) if len(compartments) > 1 and 'cell 3 1.0' not in compartments: compartments.append('cell 3 1.0') #sbml always has the 'cell' default compartment, even when it #doesn't declare it elif len(compartments) == 0 and len(molecules) != 0: compartments.append('cell 3 1.0') if len(artificialRules) + len(rules) == 0: logMess('ERROR:Simulation','The file contains no reactions') if useArtificialRules or len(rules) == 0: rules =['#{0}'.format(x) for x in rules] evaluate = evaluation(len(observables),translator) artificialRules.extend(rules) rules = artificialRules else: artificialRules =['#{0}'.format(x) for x in artificialRules] evaluate = evaluation(len(observables),translator) rules.extend(artificialRules) commentDictionary = {} if atomize: commentDictionary['notes'] = "'This is an atomized translation of an SBML model created on {0}.".format(time.strftime("%d/%m/%Y")) else: commentDictionary['notes'] = "'This is a plain translation of an SBML model created on {0}.".format(time.strftime("%d/%m/%Y")) commentDictionary['notes'] += " The original model has {0} molecules and {1} reactions. The translated model has {2} molecules and {3} rules'".format(parser.model.getNumSpecies(),parser.model.getNumReactions(),len(molecules),len(set(rules))) meta = parser.getMetaInformation(commentDictionary) from collections import OrderedDict finalString = writer.finalText(meta,param+reactionParameters,molecules,initialConditions,list(OrderedDict.fromkeys(observables)),list(OrderedDict.fromkeys(rules)),functions,compartments,outputFile) #print outputFile logMess('INFO:Summary','File contains {0} molecules out of {1} original SBML species'.format(len(molecules),len(observables))) #store a logfile try: if len(logMess.log) > 0: with open(outputFile + '.log', 'w') as f: for element in logMess.log: f.write(element + '\n') except AttributeError: print "error" except IOError: pass #print "" #rate of each classified rule evaluate2 = 0 if len(observables) == 0 else len(molecules)*1.0/len(observables) return len(rules),len(observables),evaluate,evaluate2,len(compartments), parser.getSpeciesAnnotation(),finalString,speciesDict '''
def analyzeFile(bioNumber,reactionDefinitions,useID,outputFile,speciesEquivalence=None): useArtificialRules = False reader = libsbml.SBMLReader() document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD%010i.xml' % bioNumber) parser =SBML2BNGL(document.getModel(),useID) database = structures.Databases() try: translator,log = m2c.transformMolecules(parser,database,reactionDefinitions,speciesEquivalence) #translator={} except: print 'failure' return None,None #translator = {} param,zparam = parser.getParameters() molecules,species,observables = parser.getSpecies(translator) compartments = parser.getCompartments() functions = [] idxArray = [] _,rules,tfunc = parser.getReactions(translator,True) functions.extend(tfunc) aParameters,aRules,nonzparam,artificialRules,removeParams,artificialObservables = parser.getAssignmentRules(zparam,param,molecules) for element in nonzparam: param.append('{0} 0'.format(element)) param = [x for x in param if x not in removeParams] tags = '@{0}'.format(compartments[0].split(' ')[0]) if len(compartments) == 1 else '@cell' molecules.extend([x.split(' ')[0] for x in removeParams]) if len(molecules) == 0: compartments = [] observables.extend('Species {0} {0}'.format(x.split(' ')[0]) for x in removeParams) for x in removeParams: species.append(x.split(' ')[0] + tags + ' ' + x.split(' ')[1]) ##Comment out those parameters that are defined with assignment rules ##TODO: I think this is correct, but it may need to be checked for idx,parameter in enumerate(param): for key in artificialObservables: if re.search('^{0}\s'.format(key),parameter)!= None: idxArray.append(idx) for element in idxArray: param[element] = '#' + param[element] for key in artificialObservables: flag = -1 for idx,observable in enumerate(observables): if 'Species {0} {0}()'.format(key) in observable: flag = idx if flag != -1: observables.pop(flag) functions.append(artificialObservables[key]) flag = -1 if '{0}()'.format(key) in molecules: flag = molecules.index('{0}()'.format(key)) if flag != -1: molecules.pop(flag) flag = -1 for idx,specie in enumerate(species): if ':{0}('.format(key) in specie: flag = idx if flag != -1: species[flag] = '#' + species[flag] functions.extend(aRules) sbmlfunctions = parser.getSBMLFunctions() for interation in range(0,3): for sbml2 in sbmlfunctions: for sbml in sbmlfunctions: if sbml == sbml2: continue if sbml in sbmlfunctions[sbml2]: sbmlfunctions[sbml2] = writer.extendFunction(sbmlfunctions[sbml2],sbml,sbmlfunctions[sbml]) for idx in range(0,len(functions)): for sbml in sbmlfunctions: if sbml in functions[idx]: functions[idx] = writer.extendFunction(functions[idx],sbml,sbmlfunctions[sbml]) #functions.extend(sbmlfunctions) dependencies2 = {} for idx in range(0,len(functions)): dependencies2[functions[idx].split(' = ')[0].split('(')[0].strip()] = [] for key in artificialObservables: oldfunc = functions[idx] functions[idx] = (re.sub(r'(\W|^)({0})([^\w(]|$)'.format(key),r'\1\2()\3',functions[idx])) if oldfunc != functions[idx]: dependencies2[functions[idx].split(' = ')[0].split('(')[0]].append(key) for element in sbmlfunctions: oldfunc = functions[idx] key = element.split(' = ')[0].split('(')[0] if re.search('(\W|^){0}(\W|$)'.format(key),functions[idx].split(' = ')[1]) != None: dependencies2[functions[idx].split(' = ')[0].split('(')[0]].append(key) for element in tfunc: key = element.split(' = ')[0].split('(')[0] if key in functions[idx].split(' = ')[1]: dependencies2[functions[idx].split( ' = ')[0].split('(')[0]].append(key) ''' for counter in range(0,3): for element in dependencies2: if len(dependencies2[element]) > counter: dependencies2[element].extend(dependencies2[dependencies2[element][counter]]) ''' fd = [] for function in functions: fd.append([function,resolveDependencies(dependencies2,function.split(' = ' )[0].split('(')[0],0)]) fd = sorted(fd,key= lambda rule:rule[1]) functions = [x[0] for x in fd] if len(param) == 0: param.append('dummy 0') #functions.extend(aRules) if len(compartments) > 1 and 'cell 3 1.0' not in compartments: compartments.append('cell 3 1.0') if len(artificialRules) + len(rules) == 0: logMess('ERROR','The file contains no reactions') if useArtificialRules or len(artificialRules) > 0: rules =['#{0}'.format(x) for x in rules] evaluate = evaluation(len(artificialRules),translator) artificialRules.extend(rules) writer.finalText(param,molecules,species,observables,artificialRules,functions,compartments,outputFile) else: artificialRules =['#{0}'.format(x) for x in artificialRules] evaluate = evaluation(len(rules),translator) rules.extend(artificialRules) writer.finalText(param,molecules,species,observables,rules,functions,compartments,outputFile) print outputFile if len(logMess.log) > 0: with open(outputFile + '.log', 'w') as f: for element in logMess.log: f.write(element + '\n') return len(rules), evaluate
def analyzeHelper(document, reactionDefinitions, useID, outputFile, speciesEquivalence, atomize, translator, bioGrid=False): ''' taking the atomized dictionary and a series of data structure, this method does the actual string output. ''' useArtificialRules = False parser = SBML2BNGL(document.getModel(), useID) database = structures.Databases() #translator,log,rdf = m2c.transformMolecules(parser,database,reactionDefinitions,speciesEquivalence) #try: #bioGridDict = {} #if biogrid: # bioGridDict = biogrid() #if atomize: # translator = mc.transformMolecules(parser,database,reactionDefinitions,speciesEquivalence,bioGridDict) #else: # translator={} parser = SBML2BNGL(document.getModel(), useID) #except: # print 'failure' # return None,None,None,None #translator = {} param, zparam = parser.getParameters() molecules, initialConditions, observables, speciesDict = parser.getSpecies( translator, [x.split(' ')[0] for x in param]) #finally, adjust parameters and initial concentrations according to whatever initialassignments say param, zparam, initialConditions = parser.getInitialAssignments( translator, param, zparam, molecules, initialConditions) compartments = parser.getCompartments() functions = [] assigmentRuleDefinedParameters = [] reactionParameters, rules, rateFunctions = parser.getReactions( translator, len(compartments) > 1, atomize=atomize) functions.extend(rateFunctions) aParameters, aRules, nonzparam, artificialRules, removeParams, artificialObservables = parser.getAssignmentRules( zparam, param, molecules) for element in nonzparam: param.append('{0} 0'.format(element)) param = [x for x in param if x not in removeParams] tags = '@{0}'.format( compartments[0].split(' ')[0]) if len(compartments) == 1 else '@cell' molecules.extend([x.split(' ')[0] for x in removeParams]) if len(molecules) == 0: compartments = [] observables.extend('Species {0} {0}'.format(x.split(' ')[0]) for x in removeParams) for x in removeParams: initialConditions.append( x.split(' ')[0] + tags + ' ' + x.split(' ')[1]) ##Comment out those parameters that are defined with assignment rules ##TODO: I think this is correct, but it may need to be checked tmpParams = [] for idx, parameter in enumerate(param): for key in artificialObservables: if re.search('^{0}\s'.format(key), parameter) != None: assigmentRuleDefinedParameters.append(idx) tmpParams.extend(artificialObservables) tmpParams.extend(removeParams) tmpParams = set(tmpParams) correctRulesWithParenthesis(rules, tmpParams) for element in assigmentRuleDefinedParameters: param[element] = '#' + param[element] deleteMolecules = [] deleteMoleculesFlag = True for key in artificialObservables: flag = -1 for idx, observable in enumerate(observables): if 'Species {0} {0}()'.format(key) in observable: flag = idx if flag != -1: observables.pop(flag) functions.append(artificialObservables[key]) flag = -1 if '{0}()'.format(key) in molecules: flag = molecules.index('{0}()'.format(key)) if flag != -1: if deleteMoleculesFlag: deleteMolecules.append(flag) else: deleteMolecules.append(key) #result =validateReactionUsage(molecules[flag],rules) #if result != None: # logMess('ERROR','Pseudo observable {0} in reaction {1}'.format(molecules[flag],result)) #molecules.pop(flag) flag = -1 for idx, specie in enumerate(initialConditions): if ':{0}('.format(key) in specie: flag = idx if flag != -1: initialConditions[flag] = '#' + initialConditions[flag] for flag in sorted(deleteMolecules, reverse=True): if deleteMoleculesFlag: logMess( 'WARNING:Simulation', '{0} reported as function, but usage is ambiguous'.format( molecules[flag])) result = validateReactionUsage(molecules[flag], rules) if result != None: logMess( 'ERROR:Simulation', 'Pseudo observable {0} in reaction {1}'.format( molecules[flag], result)) molecules.pop(flag) else: logMess( 'WARNING:Simulation', '{0} reported as species, but usage is ambiguous.'.format( flag)) artificialObservables.pop(flag) functions.extend(aRules) sbmlfunctions = parser.getSBMLFunctions() processFunctions(functions, sbmlfunctions, artificialObservables, rateFunctions) for interation in range(0, 3): for sbml2 in sbmlfunctions: for sbml in sbmlfunctions: if sbml == sbml2: continue if sbml in sbmlfunctions[sbml2]: sbmlfunctions[sbml2] = writer.extendFunction( sbmlfunctions[sbml2], sbml, sbmlfunctions[sbml]) functions = reorderFunctions(functions) functions = changeNames(functions, aParameters) # print [x for x in functions if 'functionRate60' in x] functions = unrollFunctions(functions) rules = changeRates(rules, aParameters) if len(compartments) > 1 and 'cell 3 1.0' not in compartments: compartments.append('cell 3 1.0') #sbml always has the 'cell' default compartment, even when it #doesn't declare it elif len(compartments) == 0 and len(molecules) != 0: compartments.append('cell 3 1.0') if len(artificialRules) + len(rules) == 0: logMess('ERROR:Simulation', 'The file contains no reactions') if useArtificialRules or len(rules) == 0: rules = ['#{0}'.format(x) for x in rules] evaluate = evaluation(len(observables), translator) artificialRules.extend(rules) rules = artificialRules else: artificialRules = ['#{0}'.format(x) for x in artificialRules] evaluate = evaluation(len(observables), translator) rules.extend(artificialRules) commentDictionary = {} if atomize: commentDictionary[ 'notes'] = "'This is an atomized translation of an SBML model created on {0}.".format( time.strftime("%d/%m/%Y")) else: commentDictionary[ 'notes'] = "'This is a plain translation of an SBML model created on {0}.".format( time.strftime("%d/%m/%Y")) commentDictionary[ 'notes'] += " The original model has {0} molecules and {1} reactions. The translated model has {2} molecules and {3} rules'".format( parser.model.getNumSpecies(), parser.model.getNumReactions(), len(molecules), len(set(rules))) meta = parser.getMetaInformation(commentDictionary) from collections import OrderedDict finalString = writer.finalText(meta, param + reactionParameters, molecules, initialConditions, list(OrderedDict.fromkeys(observables)), list(OrderedDict.fromkeys(rules)), functions, compartments, outputFile) #print outputFile logMess( 'INFO:Summary', 'File contains {0} molecules out of {1} original SBML species'.format( len(molecules), len(observables))) #store a logfile try: if len(logMess.log) > 0: with open(outputFile + '.log', 'w') as f: for element in logMess.log: f.write(element + '\n') except AttributeError: print "error" except IOError: pass #print "" #rate of each classified rule evaluate2 = 0 if len( observables) == 0 else len(molecules) * 1.0 / len(observables) return len(rules), len(observables), evaluate, evaluate2, len( compartments), parser.getSpeciesAnnotation(), finalString, speciesDict '''
def createCatalysisRBM(dependencyGraph,element,translator,reactionProperties, equivalenceDictionary,sbmlAnalyzer,database): ''' if it's a catalysis reaction create a new component/state ''' if dependencyGraph[element[0]][0][0] == element[0]: if element[0] not in translator: translator[element[0]] = createEmptySpecies(element[0]) else: componentStateArray = [] tmp = element[0] existingComponents = [] memory = [] forceActivationSwitch = False while dependencyGraph[tmp] != []: #what kind of catalysis are we dealing with #classification = identifyReaction( # equivalenceDictionary, # dependencyGraph[tmp][0][0],tmp) classification = sbmlAnalyzer.findMatchingModification(tmp,dependencyGraph[tmp][0][0]) if not classification: classification = identifyReaction( equivalenceDictionary, dependencyGraph[tmp][0][0],tmp) if not classification: classification = sbmlAnalyzer.findMatchingModification(element[0],dependencyGraph[tmp][0][0]) if not classification: classification = identifyReaction( equivalenceDictionary, dependencyGraph[tmp][0][0],element[0]) if classification is not None and \ reactionProperties[classification][0] not in existingComponents: componentStateArray.append(reactionProperties[classification]) #classificationArray.append([classification, # tmp,dependencyGraph[tmp] # [0][0]]) existingComponents.append(reactionProperties[ classification][0]) elif database.forceModificationFlag and classification is None and not forceActivationSwitch: forceActivationSwitch = True baseName = getTrueTag(dependencyGraph, dependencyGraph[element[0]][0][0]) species = createEmptySpecies(baseName) componentStateArray.append(['genericMod',tmp]) logMess('ATOMIZATION:WARNING','adding forced transformation: {0}:{1}:{2}'.format(baseName,dependencyGraph[element[0]],element[0])) #return elif classification is None: logMess('ATOMIZATION:CRITICAL','unregistered modification: {0}:{1}'.format(element[0],dependencyGraph[element[0]])) memory.append(tmp) tmp = dependencyGraph[tmp][0][0] if tmp in memory: raise CycleError baseName = getTrueTag(dependencyGraph, dependencyGraph[element[0]][0][0]) species = createEmptySpecies(baseName) #use the already existing structure if its in the #translator,otherwise empty if baseName in translator: species = translator[baseName] modifiedSpecies = deepcopy(species) for componentState in componentStateArray: #if classification[0] != None: addComponentToMolecule(species, baseName, componentState[0]) addComponentToMolecule(modifiedSpecies,baseName, componentState[0]) addStateToComponent(species,baseName, componentState[0], componentState[1]) addStateToComponent(modifiedSpecies, baseName, componentState[0], componentState[1]) addStateToComponent(species, baseName, componentState[0], '0') #update the base species if len(componentStateArray) > 0: translator[baseName] = deepcopy(species) translator[element[0]] = modifiedSpecies
def selectBestCandidate(reactant, candidates, dependencyGraph,sbmlAnalyzer, equivalenceTranslator=equivalenceTranslator,equivalenceDictionary=equivalenceDictionary): tmpCandidates = [] modifiedElements = [] unevenElements = [] for individualAnswer in candidates: tmpAnswer = [] flag = True if len(individualAnswer) == 1 and individualAnswer[0] == reactant: continue for chemical in individualAnswer: #we cannot handle tuple naming conventions for now if type(chemical) == tuple: flag = False continue rootChemical = resolveDependencyGraph(dependencyGraph, chemical) mod = resolveDependencyGraph(dependencyGraph, chemical, True) if mod != []: modifiedElements.extend(mod) for element in rootChemical: if len(element) == 1 and type(element[0]) == tuple: continue if element == chemical: tmpAnswer.append(chemical) elif type(element) == tuple: tmpAnswer.append(element) else: tmpAnswer.append(element[0]) if flag: tmpAnswer = sorted(tmpAnswer) tmpCandidates.append(tmpAnswer) #we cannot handle tuple naming conventions for now if len(tmpCandidates) == 0: logMess('CRITICAL:Atomization','I dont know how to process these candidates and I have no way to make an educated guess. Politely refusing to translate {0}={1}.'.format(reactant,candidates)) return None, None originalTmpCandidates = deepcopy(tmpCandidates) #if we have more than one modified element for a single reactant #we can try to choose the one that is most similar to the original #reactant #FIXME:Fails if there is a double modification newModifiedElements = {} for element in modifiedElements: if element[0] not in newModifiedElements or element[1] == reactant: newModifiedElements[element[0]] = element[1] #check if all candidates are the same #print '...',tmpCandidates[0] if tmpCandidates[1:] == tmpCandidates[:-1] or len(tmpCandidates) == 1: flag = True while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break #tmpCandidates[0] = candidates[0] else: #temporal solution for defaulting to the first alternative #print '---','error',reactant,newModifiedElements,tmpCandidates totalElements = [y for x in tmpCandidates for y in x] elementDict = {} for word in totalElements: if word not in elementDict: elementDict[word] = 0 elementDict[word] += 1 newTmpCandidates = [[]] for element in elementDict: if elementDict[element] % len(tmpCandidates) == 0: newTmpCandidates[0].append(element) #elif elementDict[element] % len(tmpCandidates) != 0 and re.search('(_|^){0}(_|$)'.format(element),reactant): # newTmpCandidates[0].append(element) # unevenElements.append([element]) else: logMess('WARNING:Atomization','Are these actually the same? {0}={1}.'.format(reactant,candidates)) unevenElements.append(element) flag = True #this should be done on newtmpCandidates instead of tmpcandidates while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements and newModifiedElements[chemical] in reactant: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break #if all the candidates are about modification changes to a complex #then try to do it through lexical analysis if all([len(candidate)==1 for candidate in candidates]) and \ candidates[0][0] != reactant and len(tmpCandidates[0]) > 1: if reactant != None: pass #analyze based on standard modifications lexCandidate,translationKeys,tmpequivalenceTranslator = sbmlAnalyzer.analyzeSpeciesModification(candidates[0][0],reactant,originalTmpCandidates[0]) #FIXME: this is iffy. is it always an append modification? could be prepend if lexCandidate !=None: lexCandidate = tmpCandidates[0][originalTmpCandidates[0].index(lexCandidate)] lexCandidateModification = lexCandidate + translationKeys[0] for element in tmpequivalenceTranslator: if element not in equivalenceTranslator: equivalenceTranslator[element] = [] equivalenceTranslator[element].append((lexCandidate,lexCandidateModification)) dependencyGraph[lexCandidateModification] = [[lexCandidate]] while lexCandidate in tmpCandidates[0]: tmpCandidates[0].remove(lexCandidate) tmpCandidates[0].append(lexCandidateModification) return [tmpCandidates[0]],unevenElements else: fuzzyCandidateMatch = None ''' if nothing else works and we know the result is a bimolecular complex and we know which are the basic reactants then try to do fuzzy string matching between the two. TODO: extend this to more than 2 molecule complexes. ''' if len(tmpCandidates[0]) == 2: tmpmolecules = [] tmpmolecules.extend(originalTmpCandidates[0]) tmpmolecules.extend(tmpCandidates[0]) fuzzyCandidateMatch = sbmlAnalyzer.fuzzyArtificialReaction(originalTmpCandidates[0],[reactant],tmpmolecules) if fuzzyCandidateMatch !=None: logMess('INFO:Atomization','Used fuzzy string matching from {0} to {1}'.format(reactant,fuzzyCandidateMatch)) return [fuzzyCandidateMatch],unevenElements else: #last ditch attempt using straighforward lexical analysis tmpDependency,tmpEquivalence = sbmlAnalyzer.findClosestModification([reactant],dependencyGraph.keys()) if reactant in tmpDependency and tmpDependency[reactant] in tmpCandidates[0]: for element in tmpDependency: if element not in dependencyGraph: dependencyGraph[element] = tmpDependency[element] for element in tmpEquivalence: if element not in equivalenceDictionary: equivalenceDictionary[element] = [] for equivalence in tmpEquivalence[element]: if equivalence[0] not in equivalenceDictionary[element]: equivalenceDictionary[element].append(equivalence[0]) if len(tmpDependency.keys()) > 0: return tmpDependency[reactant],unevenElements #the ive no idea whats going on branch modificationCandidates = {} if modificationCandidates == {}: logMess('CRITICAL:Atomization','I dont know how this is modified and I have no way to make an educated guess. Politely refusing to translate {0}={1}.'.format(reactant,candidates)) return None,None for idx, molecule in enumerate(tmpCandidates[0]): if molecule in modificationCandidates: tmpCandidates[0][idx] = modificationCandidates[molecule] return [tmpCandidates[0]], unevenElements elif len(tmpCandidates) > 1: pass return [tmpCandidates[0]], unevenElements
def bnglFunction(rule, functionTitle, reactants, compartments=[], parameterDict={}, reactionDict={}): def powParse(match): if match.group(1) == 'root': exponent = '(1/%s)' % match.group(3) else: exponent = match.group(3) if match.group(1) in ['root', 'pow']: operator = '^' return '({0}){1}({2})'.format(match.group(2), operator, exponent) def compParse(match): translator = { 'gt': '>', 'lt': '<', 'and': '&&', 'or': '||', 'geq': '>=', 'leq': '<=', 'eq': '==' } exponent = match.group(3) operator = translator[match.group(1)] return '{0} {1} {2}'.format(match.group(2), operator, exponent) def ceilfloorParse(math): flag = False if math.group(1) == 'ceil': flag = True if flag: return 'min(rint({0}+0.5),rint({0} + 1))'.format(math.group(2)) else: return 'min(rint({0}-0.5),rint({0}+0.5))'.format(math.group(2)) def parameterRewrite(match): return match.group(1) + 'param_' + match.group(2) + match.group(3) def constructFromList(argList, optionList): parsedString = '' idx = 0 translator = { 'gt': '>', 'lt': '<', 'and': '&&', 'or': '||', 'geq': '>=', 'leq': '<=', 'eq': '==' } while idx < len(argList): if type(argList[idx]) is list: parsedString += '(' + constructFromList( argList[idx], optionList) + ')' elif argList[idx] in optionList: if argList[idx] == 'ceil': parsedString += 'min(rint(({0}) + 0.5),rint(({0}) + 1))'.format( constructFromList(argList[idx + 1], optionList)) idx += 1 elif argList[idx] == 'floor': parsedString += 'min(rint(({0}) -0.5),rint(({0}) + 0.5))'.format( constructFromList(argList[idx + 1], optionList)) idx += 1 elif argList[idx] in ['pow']: index = rindex(argList[idx + 1], ',') parsedString += '((' + constructFromList( argList[idx + 1][0:index], optionList) + ')' parsedString += ' ^ ' + '(' + constructFromList( argList[idx + 1][index + 1:], optionList) + '))' idx += 1 elif argList[idx] in ['sqr', 'sqrt']: tag = '1/' if argList[idx] == 'sqrt' else '' parsedString += '((' + constructFromList( argList[idx + 1], optionList) + ') ^ ({0}2))'.format(tag) idx += 1 elif argList[idx] == 'root': index = rindex(argList[idx + 1], ',') tmp = '1/(' + constructFromList(argList[idx + 1][0:index], optionList) + '))' parsedString += '((' + constructFromList( argList[idx + 1][index + 1:], optionList) + ') ^ ' + tmp idx += 1 elif argList[idx] == 'piecewise': index1 = argList[idx + 1].index(',') index2 = argList[idx + 1][index1 + 1:].index(',') + index1 + 1 try: index3 = argList[idx + 1][index2 + 1:].index(',') + index2 + 1 except ValueError: index3 = -1 condition = constructFromList( [argList[idx + 1][index1 + 1:index2]], optionList) result = constructFromList([argList[idx + 1][:index1]], optionList) if index3 == -1: result2 = constructFromList( [argList[idx + 1][index2 + 1:]], optionList) else: result2 = constructFromList( ['piecewise', argList[idx + 1][index2 + 1:]], optionList) parsedString += 'if({0},{1},{2})'.format( condition, result, result2) idx += 1 elif argList[idx] in ['and', 'or']: symbolDict = {'and': ' && ', 'or': ' || '} indexArray = [-1] elementArray = [] for idx2, element in enumerate(argList[idx + 1]): if element == ',': indexArray.append(idx2) indexArray.append(len(argList[idx + 1])) tmpStr = argList[idx + 1] for idx2, _ in enumerate(indexArray[0:-1]): elementArray.append( constructFromList( tmpStr[indexArray[idx2] + 1:indexArray[idx2 + 1]], optionList)) parsedString += symbolDict[argList[idx]].join(elementArray) idx += 1 elif argList[idx] == 'lambda': tmp = '(' upperLimit = rindex(argList[idx + 1], ',') parsedParams = [] for x in argList[idx + 1][0:upperLimit]: if x == ',': tmp += ', ' else: tmp += 'param_' + x parsedParams.append(x) #tmp = ''.join([x for x in constructFromList(argList[idx+1][0:upperLimit])]) tmp2 = ') = ' + constructFromList( argList[idx + 1][rindex(argList[idx + 1], ',') + 1:], optionList) for x in parsedParams: while re.search(r'(\W|^)({0})(\W|$)'.format(x), tmp2) != None: tmp2 = re.sub(r'(\W|^)({0})(\W|$)'.format(x), r'\1param_\2 \3', tmp2) idx += 1 parsedString += tmp + tmp2 else: parsedString += argList[idx] idx += 1 return parsedString def changeToBNGL(functionList, rule, function): oldrule = '' #if the rule contains any mathematical function we need to reformat while any([ re.search(r'(\W|^)({0})(\W|$)'.format(x), rule) != None for x in functionList ]) and (oldrule != rule): oldrule = rule for x in functionList: rule = re.sub('({0})\(([^,]+),([^)]+)\)'.format(x), function, rule) if rule == oldrule: logMess('ERROR', 'Malformed pow or root function %s' % rule) print 'meep' return rule #rule = changeToBNGL(['pow','root'],rule,powParse) rule = changeToBNGL(['gt', 'lt', 'leq', 'geq', 'eq'], rule, compParse) #rule = changeToBNGL(['and','or'],rule,compParse) flag = True contentRule = pyparsing.Word( pyparsing.alphanums + '_' ) | ',' | '.' | '+' | '-' | '*' | '/' | '^' | '&' | '>' | '<' | '=' | '|' parens = pyparsing.nestedExpr('(', ')', content=contentRule) finalString = '' #remove ceil,floor if any([ re.search(r'(\W|^)({0})(\W|$)'.format(x), rule) != None for x in ['ceil', 'floor', 'pow', 'sqrt', 'sqr', 'root', 'and', 'or'] ]): argList = parens.parseString('(' + rule + ')').asList() rule = constructFromList( argList[0], ['floor', 'ceil', 'pow', 'sqrt', 'sqr', 'root', 'and', 'or']) while 'piecewise' in rule: argList = parens.parseString('(' + rule + ')').asList() rule = constructFromList(argList[0], ['piecewise']) #remove references to lambda functions if 'lambda(' in rule: lambdaList = parens.parseString('(' + rule + ')') functionBody = constructFromList(lambdaList[0].asList(), ['lambda']) flag = False rule = '{0}{1}'.format(functionTitle, functionBody) tmp = rule #delete the compartment from the rate function since cBNGL already does it for compartment in compartments: tmp = re.sub('^{0}\s*[*]'.format(compartment[0]), '', tmp) tmp = re.sub('([*]\s*{0})$'.format(compartment[0]), '', tmp) if compartment[0] in tmp: tmp = re.sub(r'(\W|^)({0})(\W|$)'.format(compartment[0]), r'\1 {0} \3'.format(str(compartment[1])), tmp) #tmp = re.sub(r'(\W)({0})(\W)'.format(compartment[0]),r'\1%s\3' % str(compartment[1]),tmp) logMess( 'INFO', 'Exchanging reference to compartment %s for its dimensions' % compartment[0]) #change references to time for time() #tmp =re.sub(r'(\W|^)(time)(\W|$)',r'\1time()\3',tmp) #tmp =re.sub(r'(\W|^)(Time)(\W|$)',r'\1time()\3',tmp) #BNGL has ^ for power. if flag: finalString = '%s = %s' % (functionTitle, tmp) else: finalString = tmp #change references to local parameters for parameter in parameterDict: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(parameter), r'\1 {0} \3'.format(parameterDict[parameter]), finalString) #change references to reaction Id's to their netflux equivalent for reaction in reactionDict: if reaction in finalString: finalString = re.sub(r'(\W|^)({0})(\W|$)'.format(reaction), r'\1 {0} \3'.format(reactionDict[reaction]), finalString) #combinations '+ -' break ibonetgen finalString = re.sub(r'(\W|^)([-])(\s)+', r'\1-', finalString) #changing reference of 't' to time() #finalString = re.sub(r'(\W|^)(t)(\W|$)',r'\1time()\3',finalString) #pi finalString = re.sub(r'(\W|^)(pi)(\W|$)', r'\1 3.1415926535 \3', finalString) #print reactants,finalString #log for log 10 finalString = re.sub(r'(\W|^)log\(', r'\1 ln(', finalString) #reserved keyword: e finalString = re.sub(r'(\W|^)(e)(\W|$)', r'\1 are \3', finalString) #changing ceil #avoiding variables whose name starts with a number #removing mass-action elements tmp = finalString #print finalString,reactants #for reactant in reactants: # finalString = re.sub(r'(\W|^)({0}\s+\*)'.format(reactant[0]),r'\1',finalString) # finalString = re.sub(r'(\W|^)(\*\s+{0}(\s|$))'.format(reactant[0]),r'\1',finalString) #print finalString #if finalString != tmp: # logMess('WARNING','Removed mass action elements from ) return finalString
def getComplexationComponents2(species,bioGridFlag): ''' method used during the atomization process. It determines how molecules in a species bind together ''' def getBiggestMolecule(array): sortedMolecule = sorted(array, key=lambda rule: (len(rule.components),len(str(rule)),str(rule))) #sortedMolecule = sorted(sortedMolecule, key=lambda rule: len(rule.components)) return sortedMolecule[-1] def getNamedMolecule(array,name): for molecule in array: if molecule.name == name: return molecule speciesDict = {} #this array will contain all molecules that bind together pairedMolecules = [] for x in species.molecules: for y in x.components: if y.name not in speciesDict: speciesDict[y.name] = [] speciesDict[y.name].append(x) #this array wil contain all molecules that dont bind to anything orphanedMolecules = [x for x in species.molecules] #determine how molecules bind together redundantBonds = [] for x in species.molecules: for component in [y for y in x.components if y.name.lower() in speciesDict.keys()]: if x.name.lower() in speciesDict: if(x in speciesDict[component.name.lower()]) and component.name in [y.name.lower() for y in speciesDict[x.name.lower()]]: for mol in speciesDict[x.name.lower()]: if mol.name.lower() == component.name and x != mol and x in \ speciesDict[component.name]: speciesDict[x.name.lower()].remove(mol) speciesDict[component.name].remove(x) if x not in orphanedMolecules and mol not in orphanedMolecules: #FIXME: is it necessary to remove double bonds in complexes? redundantBonds.append([x,mol]) #continue pairedMolecules.append([x, mol]) if x in orphanedMolecules: orphanedMolecules.remove(x) if mol in orphanedMolecules: orphanedMolecules.remove(mol) #if len(redundantBonds) > 0: # print [[x[0].name,x[1].name] for x in redundantBonds],str(species) totalComplex = [set(x) for x in pairedMolecules] isContinuousFlag = True #iterate over orphaned and find unidirectional interactions #e.g. if a molecule has a previous known interaction with the #same kind of molecule, even if it has no available components #e.g. k-mers` for element in speciesDict: for individualMolecule in speciesDict[element]: if individualMolecule in orphanedMolecules: candidatePartner = [x for x in species.molecules if x.name.lower() == element and x != individualMolecule] if len(candidatePartner) == 1: pairedMolecules.append([candidatePartner[0],individualMolecule]) orphanedMolecules.remove(individualMolecule) #determine which pairs form a continuous chain while isContinuousFlag: isContinuousFlag = False for idx in range(0, len(totalComplex) - 1): for idx2 in range(idx + 1, len(totalComplex)): if len([x for x in totalComplex[idx] if x in totalComplex[idx2]]) > 0: totalComplex[idx] = totalComplex[idx].union(totalComplex[idx2]) totalComplex.pop(idx2) isContinuousFlag = True break if isContinuousFlag: break #now we process those molecules where we need to create a new component for element in orphanedMolecules: for mol1 in species.molecules: #when adding orphaned molecules make sure it's not already in #the list if mol1 == element and mol1 not in set().union(*totalComplex): totalComplex.append(set([mol1])) #now we process for those molecules we are not sure how do they bind while len(totalComplex) > 1: if len(totalComplex[0]) ==1 and len(totalComplex[1]) == 1: mol1 = list(totalComplex[0])[0] mol2 = list(totalComplex[1])[0] else: names1 = [str(x.name) for x in totalComplex[0]] names2 = [str(x.name) for x in totalComplex[1]] dbPair = set([]) if bioGridFlag: bioGridDict = biogrid.loadBioGridDict() else: bioGridDict = {} comb = set([]) equivalence = {} comb = [(x,y) for x in names1 for y in names2] dbPair = set([]) for element in comb: if element[0].upper() in bioGridDict and element[1] in bioGridDict[element[0].upper()] or \ element[1].upper() in bioGridDict and element[0] in bioGridDict[element[1].upper()]: logMess('INFO:Atomization','Biogrid info: {0}:{1}'.format(element[0],element[1])) dbPair.add((element[0],element[1])) dbPair = list(dbPair) if dbPair != []: logMess('WARNING:Atomization',"More than one interaction was found in {0}".format(dbPair)) mol1 = getNamedMolecule(totalComplex[0],dbPair[0][0]) mol2 = getNamedMolecule(totalComplex[1],dbPair[0][1]) else: logMess('WARNING:Atomization',"We don't know how {0} and {1} bind together and there's \ no relevant BioGrid information. Defaulting to largest molecule".format( [x.name for x in totalComplex[0]],[x.name for x in totalComplex[1]])) mol1 = getBiggestMolecule(totalComplex[0]) mol2 = getBiggestMolecule(totalComplex[1]) pairedMolecules.append([mol1, mol2]) totalComplex[0] = totalComplex[0].union(totalComplex[1]) totalComplex.pop(1) #totalComplex.extend(orphanedMolecules) return pairedMolecules
def getComplexationComponents2(species, bioGridFlag): ''' method used during the atomization process. It determines how molecules in a species bind together ''' def getBiggestMolecule(array): sortedMolecule = sorted( array, key=lambda rule: (len(rule.components), len(str(rule)), str(rule))) #sortedMolecule = sorted(sortedMolecule, key=lambda rule: len(rule.components)) return sortedMolecule[-1] def getNamedMolecule(array, name): for molecule in array: if molecule.name == name: return molecule speciesDict = {} #this array will contain all molecules that bind together pairedMolecules = [] for x in species.molecules: for y in x.components: if y.name not in speciesDict: speciesDict[y.name] = [] speciesDict[y.name].append(x) #this array wil contain all molecules that dont bind to anything orphanedMolecules = [x for x in species.molecules] #determine how molecules bind together redundantBonds = [] for x in species.molecules: for component in [ y for y in x.components if y.name.lower() in speciesDict.keys() ]: if x.name.lower() in speciesDict: if (x in speciesDict[component.name.lower()] ) and component.name in [ y.name.lower() for y in speciesDict[x.name.lower()] ]: for mol in speciesDict[x.name.lower()]: if mol.name.lower() == component.name and x != mol and x in \ speciesDict[component.name]: speciesDict[x.name.lower()].remove(mol) speciesDict[component.name].remove(x) if x not in orphanedMolecules and mol not in orphanedMolecules: #FIXME: is it necessary to remove double bonds in complexes? redundantBonds.append([x, mol]) #continue pairedMolecules.append([x, mol]) if x in orphanedMolecules: orphanedMolecules.remove(x) if mol in orphanedMolecules: orphanedMolecules.remove(mol) #if len(redundantBonds) > 0: # print [[x[0].name,x[1].name] for x in redundantBonds],str(species) totalComplex = [set(x) for x in pairedMolecules] isContinuousFlag = True #iterate over orphaned and find unidirectional interactions #e.g. if a molecule has a previous known interaction with the #same kind of molecule, even if it has no available components #e.g. k-mers` for element in speciesDict: for individualMolecule in speciesDict[element]: if individualMolecule in orphanedMolecules: candidatePartner = [ x for x in species.molecules if x.name.lower() == element and x != individualMolecule ] if len(candidatePartner) == 1: pairedMolecules.append( [candidatePartner[0], individualMolecule]) orphanedMolecules.remove(individualMolecule) #determine which pairs form a continuous chain while isContinuousFlag: isContinuousFlag = False for idx in range(0, len(totalComplex) - 1): for idx2 in range(idx + 1, len(totalComplex)): if len([ x for x in totalComplex[idx] if x in totalComplex[idx2] ]) > 0: totalComplex[idx] = totalComplex[idx].union( totalComplex[idx2]) totalComplex.pop(idx2) isContinuousFlag = True break if isContinuousFlag: break #now we process those molecules where we need to create a new component for element in orphanedMolecules: for mol1 in species.molecules: #when adding orphaned molecules make sure it's not already in #the list if mol1 == element and mol1 not in set().union(*totalComplex): totalComplex.append(set([mol1])) #now we process for those molecules we are not sure how do they bind while len(totalComplex) > 1: if len(totalComplex[0]) == 1 and len(totalComplex[1]) == 1: mol1 = list(totalComplex[0])[0] mol2 = list(totalComplex[1])[0] else: names1 = [str(x.name) for x in totalComplex[0]] names2 = [str(x.name) for x in totalComplex[1]] dbPair = set([]) if bioGridFlag: bioGridDict = biogrid.loadBioGridDict() else: bioGridDict = {} comb = set([]) equivalence = {} comb = [(x, y) for x in names1 for y in names2] dbPair = set([]) for element in comb: if element[0].upper() in bioGridDict and element[1] in bioGridDict[element[0].upper()] or \ element[1].upper() in bioGridDict and element[0] in bioGridDict[element[1].upper()]: logMess( 'INFO:Atomization', 'Biogrid info: {0}:{1}'.format(element[0], element[1])) dbPair.add((element[0], element[1])) dbPair = list(dbPair) if dbPair != []: logMess( 'WARNING:Atomization', "More than one interaction was found in {0}".format( dbPair)) mol1 = getNamedMolecule(totalComplex[0], dbPair[0][0]) mol2 = getNamedMolecule(totalComplex[1], dbPair[0][1]) else: logMess( 'WARNING:Atomization', "We don't know how {0} and {1} bind together and there's \ no relevant BioGrid information. Defaulting to largest molecule".format( [x.name for x in totalComplex[0]], [x.name for x in totalComplex[1]])) mol1 = getBiggestMolecule(totalComplex[0]) mol2 = getBiggestMolecule(totalComplex[1]) pairedMolecules.append([mol1, mol2]) totalComplex[0] = totalComplex[0].union(totalComplex[1]) totalComplex.pop(1) #totalComplex.extend(orphanedMolecules) return pairedMolecules
def selectBestCandidate(reactant, candidates, dependencyGraph): tmpCandidates = [] modifiedElements = [] unevenElements = [] for individualAnswer in candidates: tmpAnswer = [] flag = True if len(individualAnswer) == 1 and individualAnswer[0] == reactant: continue for chemical in individualAnswer: # we cannot handle tuple naming conventions for now if type(chemical) == tuple: flag = False continue rootChemical = resolveDependencyGraph(dependencyGraph, chemical) mod = resolveDependencyGraph(dependencyGraph, chemical, True) if mod != []: modifiedElements.extend(mod) for element in rootChemical: if len(element) == 1 and type(element[0]) == tuple: continue if element == chemical: tmpAnswer.append(chemical) elif type(element) == tuple: tmpAnswer.append(element) else: tmpAnswer.append(element[0]) if flag: tmpAnswer = sorted(tmpAnswer) tmpCandidates.append(tmpAnswer) # we cannot handle tuple naming conventions for now if len(tmpCandidates) == 0: return None, None # if we have more than one modified element for a single reactant # we can try to choose the one that is most similar to the original # reactant newModifiedElements = {} for element in modifiedElements: if element[0] not in newModifiedElements or element[1] == reactant: newModifiedElements[element[0]] = element[1] # check if all candidates are the same # print '...',tmpCandidates[0] if tmpCandidates[1:] == tmpCandidates[:-1] or len(tmpCandidates) == 1: flag = True while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break # tmpCandidates[0] = candidates[0] else: # temporal solution for defaulting to the first alternative # print '---','error',reactant,newModifiedElements,tmpCandidates totalElements = [y for x in tmpCandidates for y in x] elementDict = {} for word in totalElements: if word not in elementDict: elementDict[word] = 0 elementDict[word] += 1 newTmpCandidates = [[]] for element in elementDict: if elementDict[element] % len(tmpCandidates) == 0: newTmpCandidates[0].append(element) # elif elementDict[element] % len(tmpCandidates) != 0 and re.search('(_|^){0}(_|$)'.format(element),reactant): # newTmpCandidates[0].append(element) # unevenElements.append([element]) else: unevenElements.append(element) flag = True # this should be done on newtmpCandidates instead of tmpcandidates while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements and newModifiedElements[chemical] in reactant: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break # print newTmpCandidates,unevenElements # print ';;;',tmpCandidates[0] if ( len(candidates) == 1 and len(candidates[0]) == 1 and candidates[0][0] != reactant and len(tmpCandidates[0]) > 1 ): candidates = [] modificationCandidates = { x[0]: x[1] for x in equivalenceTranslator if x[0] in tmpCandidates[0] and type(x[1]) is not tuple } if modificationCandidates == {}: logMess( "WARNING", "I dont know how this is modified and I have no way to make an educated guess. Politely refusing to translate {0}.".format( reactant ), ) tmpCandidates[0] = [reactant] for idx, molecule in enumerate(tmpCandidates[0]): if molecule in modificationCandidates: tmpCandidates[0][idx] = modificationCandidates[molecule] return [tmpCandidates[0]], unevenElements return [tmpCandidates[0]], unevenElements
def createCatalysisRBM(dependencyGraph, element, translator, reactionProperties, equivalenceDictionary, sbmlAnalyzer, database): ''' if it's a catalysis reaction create a new component/state ''' if dependencyGraph[element[0]][0][0] == element[0]: if element[0] not in translator: translator[element[0]] = createEmptySpecies(element[0]) else: componentStateArray = [] tmp = element[0] existingComponents = [] memory = [] forceActivationSwitch = False while dependencyGraph[tmp] != []: #what kind of catalysis are we dealing with #classification = identifyReaction( # equivalenceDictionary, # dependencyGraph[tmp][0][0],tmp) classification = sbmlAnalyzer.findMatchingModification( tmp, dependencyGraph[tmp][0][0]) if not classification: classification = identifyReaction(equivalenceDictionary, dependencyGraph[tmp][0][0], tmp) if not classification: classification = sbmlAnalyzer.findMatchingModification( element[0], dependencyGraph[tmp][0][0]) if not classification: classification = identifyReaction(equivalenceDictionary, dependencyGraph[tmp][0][0], element[0]) if classification is not None and \ reactionProperties[classification][0] not in existingComponents: componentStateArray.append(reactionProperties[classification]) #classificationArray.append([classification, # tmp,dependencyGraph[tmp] # [0][0]]) existingComponents.append( reactionProperties[classification][0]) elif database.forceModificationFlag and classification is None and not forceActivationSwitch: forceActivationSwitch = True baseName = getTrueTag(dependencyGraph, dependencyGraph[element[0]][0][0]) species = createEmptySpecies(baseName) componentStateArray.append(['genericMod', tmp]) logMess( 'ATOMIZATION:WARNING', 'adding forced transformation: {0}:{1}:{2}'.format( baseName, dependencyGraph[element[0]], element[0])) #return elif classification is None: logMess( 'ATOMIZATION:CRITICAL', 'unregistered modification: {0}:{1}'.format( element[0], dependencyGraph[element[0]])) memory.append(tmp) tmp = dependencyGraph[tmp][0][0] if tmp in memory: raise CycleError baseName = getTrueTag(dependencyGraph, dependencyGraph[element[0]][0][0]) species = createEmptySpecies(baseName) #use the already existing structure if its in the #translator,otherwise empty if baseName in translator: species = translator[baseName] modifiedSpecies = deepcopy(species) for componentState in componentStateArray: #if classification[0] != None: addComponentToMolecule(species, baseName, componentState[0]) addComponentToMolecule(modifiedSpecies, baseName, componentState[0]) addStateToComponent(species, baseName, componentState[0], componentState[1]) addStateToComponent(modifiedSpecies, baseName, componentState[0], componentState[1]) addStateToComponent(species, baseName, componentState[0], '0') #update the base species if len(componentStateArray) > 0: translator[baseName] = deepcopy(species) translator[element[0]] = modifiedSpecies
def createSpeciesCompositionGraph(parser, database, configurationFile,namingConventions, speciesEquivalences=None,bioGridFlag=False): _, rules, _ = parser.getReactions(atomize=True) molecules, _, _,_ = parser.getSpecies() database.sbmlAnalyzer = \ analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences) #classify reactions database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\ indirectEquivalenceTranslator, \ adhocLabelDictionary,lexicalDependencyGraph= database.sbmlAnalyzer.classifyReactions(rules, molecules) referenceVariables = [database.classifications,equivalenceTranslator, database.eequivalenceTranslator,indirectEquivalenceTranslator,adhocLabelDictionary] comparisonVariables = [deepcopy(x) for x in referenceVariables] #####input processing #states,components,other user options #with open('temp1.dict','w') as f: # pickle.dump(referenceVariables,f) #with open('temp2.dict','w') as f: # pickle.dump(comparisonVariables,f) database.reactionProperties = database.sbmlAnalyzer.getReactionProperties() #user defined and lexical analysis naming conventions are stored here database.reactionProperties.update(adhocLabelDictionary) database.translator, database.labelDictionary, \ database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes() database.dependencyGraph = {} #analyzeSBML.analyzeNamingConventions(molecules) rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot') ####dependency graph #binding reactions for reaction, classification in zip(rules, database.classifications): bindingReactionsAnalysis(database.dependencyGraph, list(parseReactions(reaction)),classification) for element in lexicalDependencyGraph: database.dependencyGraph[element] = lexicalDependencyGraph[element] #catalysis reactions for key in database.eequivalenceTranslator: for namingEquivalence in database.eequivalenceTranslator[key]: baseElement = min(namingEquivalence, key=len) modElement = max(namingEquivalence, key=len) if key != 'Binding': if baseElement not in database.dependencyGraph or database.dependencyGraph[baseElement] == []: if modElement not in database.dependencyGraph or database.dependencyGraph[modElement] == []: database.dependencyGraph[baseElement] = [] #do we have a meaningful reverse dependence? #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]): # addToDependencyGraph(database.dependencyGraph,baseElement,[modElement]) # continue addToDependencyGraph(database.dependencyGraph, modElement, [baseElement]) #non lexical-analysis catalysis reactions if database.forceModificationFlag: for reaction, classification in zip(rules, database.classifications): if classification == 'Transformation': preaction = list(parseReactions(reaction)) if preaction[1][0] in preaction[0][0]: base = preaction[1][0] mod = preaction[0][0] else: mod = preaction[1][0] base = preaction[0][0] if database.dependencyGraph[mod] == []: database.dependencyGraph[mod] = [[base]] ''' #complex catalysis reactions for key in indirectEquivalenceTranslator: #first remove these entries from the dependencyGraph since #they are not true bindingReactions for namingEquivalence in indirectEquivalenceTranslator[key]: removedElement = '' tmp3 = deepcopy(namingEquivalence[1]) if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] else: tmp3.reverse() if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]: removedElement = namingEquivalence[0][0] elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]: removedElement = namingEquivalence[0][1] #then add the new, true dependencies #if its not supposed to be a basic element tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]] tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]]) tmp2 = deepcopy(tmp) tmp2.reverse() ##TODO: map back for the elements in namingEquivalence[2] if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \ and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]: if sorted(tmp) == sorted(tmp3): continue if all(x in database.dependencyGraph for x in tmp): if removedElement in database.dependencyGraph: database.dependencyGraph[removedElement].remove(tmp3) logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\ from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\ .format(removedElement,tmp3,namingEquivalence[3][0],tmp)) database.dependencyGraph[namingEquivalence[3][0]] = [tmp] else: logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \ {2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0], tmp,removedElement,tmp3)) #user defined stuff ''' for element in database.labelDictionary: if len(database.labelDictionary[element][0]) == 0 or element == \ database.labelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: database.dependencyGraph[element] = [list( database.labelDictionary[element][0])] #stuff obtained from string similarity analysis for element in database.lexicalLabelDictionary: #similarity analysis has less priority than anything we discovered #before if element in database.dependencyGraph and \ len(database.dependencyGraph[element]) > 0: continue if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \ database.lexicalLabelDictionary[element][0][0]: addToDependencyGraph(database.dependencyGraph, element, []) else: logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\ .format(element,database.lexicalLabelDictionary[element][0])) database.dependencyGraph[element] = [list( database.lexicalLabelDictionary[element][0])] #pure lexical analysis orphanedSpecies = [x for x in database.dependencyGraph if database.dependencyGraph[x] == []] strippedMolecules = [x.strip('()') for x in molecules] tmpDependency,database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification(orphanedSpecies,strippedMolecules) for species in tmpDependency: if tmpDependency[species] == []: addToDependencyGraph(database.dependencyGraph,species,[]) for instance in tmpDependency[species]: addToDependencyGraph(database.dependencyGraph,species,instance) #####sct #FIXME: wtf was unevenelementdict supposed to be for #print database.dependencyGraph prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \ consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer) return prunnedDependencyGraph,database
def selectBestCandidate(reactant, candidates, dependencyGraph, sbmlAnalyzer, equivalenceTranslator=equivalenceTranslator, equivalenceDictionary=equivalenceDictionary): tmpCandidates = [] modifiedElements = [] unevenElements = [] for individualAnswer in candidates: tmpAnswer = [] flag = True if len(individualAnswer) == 1 and individualAnswer[0] == reactant: continue for chemical in individualAnswer: #we cannot handle tuple naming conventions for now if type(chemical) == tuple: flag = False continue rootChemical = resolveDependencyGraph(dependencyGraph, chemical) mod = resolveDependencyGraph(dependencyGraph, chemical, True) if mod != []: modifiedElements.extend(mod) for element in rootChemical: if len(element) == 1 and type(element[0]) == tuple: continue if element == chemical: tmpAnswer.append(chemical) elif type(element) == tuple: tmpAnswer.append(element) else: tmpAnswer.append(element[0]) if flag: tmpAnswer = sorted(tmpAnswer) tmpCandidates.append(tmpAnswer) #we cannot handle tuple naming conventions for now if len(tmpCandidates) == 0: logMess( 'CRITICAL:Atomization', 'I dont know how to process these candidates and I have no way to make an educated guess. Politely refusing to translate {0}={1}.' .format(reactant, candidates)) return None, None originalTmpCandidates = deepcopy(tmpCandidates) #if we have more than one modified element for a single reactant #we can try to choose the one that is most similar to the original #reactant #FIXME:Fails if there is a double modification newModifiedElements = {} for element in modifiedElements: if element[0] not in newModifiedElements or element[1] == reactant: newModifiedElements[element[0]] = element[1] #check if all candidates are the same #print '...',tmpCandidates[0] if tmpCandidates[1:] == tmpCandidates[:-1] or len(tmpCandidates) == 1: flag = True while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break #tmpCandidates[0] = candidates[0] else: #temporal solution for defaulting to the first alternative #print '---','error',reactant,newModifiedElements,tmpCandidates totalElements = [y for x in tmpCandidates for y in x] elementDict = {} for word in totalElements: if word not in elementDict: elementDict[word] = 0 elementDict[word] += 1 newTmpCandidates = [[]] for element in elementDict: if elementDict[element] % len(tmpCandidates) == 0: newTmpCandidates[0].append(element) #elif elementDict[element] % len(tmpCandidates) != 0 and re.search('(_|^){0}(_|$)'.format(element),reactant): # newTmpCandidates[0].append(element) # unevenElements.append([element]) else: logMess( 'WARNING:Atomization', 'Are these actually the same? {0}={1}.'.format( reactant, candidates)) unevenElements.append(element) flag = True #this should be done on newtmpCandidates instead of tmpcandidates while flag: flag = False for idx, chemical in enumerate(tmpCandidates[0]): if chemical in newModifiedElements and newModifiedElements[ chemical] in reactant: tmpCandidates[0][idx] = newModifiedElements[chemical] flag = True break #if all the candidates are about modification changes to a complex #then try to do it through lexical analysis if all([len(candidate)==1 for candidate in candidates]) and \ candidates[0][0] != reactant and len(tmpCandidates[0]) > 1: if reactant != None: pass #analyze based on standard modifications lexCandidate, translationKeys, tmpequivalenceTranslator = sbmlAnalyzer.analyzeSpeciesModification( candidates[0][0], reactant, originalTmpCandidates[0]) #FIXME: this is iffy. is it always an append modification? could be prepend if lexCandidate != None: lexCandidate = tmpCandidates[0][originalTmpCandidates[0].index( lexCandidate)] lexCandidateModification = lexCandidate + translationKeys[0] for element in tmpequivalenceTranslator: if element not in equivalenceTranslator: equivalenceTranslator[element] = [] equivalenceTranslator[element].append( (lexCandidate, lexCandidateModification)) dependencyGraph[lexCandidateModification] = [[lexCandidate]] while lexCandidate in tmpCandidates[0]: tmpCandidates[0].remove(lexCandidate) tmpCandidates[0].append(lexCandidateModification) return [tmpCandidates[0]], unevenElements else: fuzzyCandidateMatch = None ''' if nothing else works and we know the result is a bimolecular complex and we know which are the basic reactants then try to do fuzzy string matching between the two. TODO: extend this to more than 2 molecule complexes. ''' if len(tmpCandidates[0]) == 2: tmpmolecules = [] tmpmolecules.extend(originalTmpCandidates[0]) tmpmolecules.extend(tmpCandidates[0]) fuzzyCandidateMatch = sbmlAnalyzer.fuzzyArtificialReaction( originalTmpCandidates[0], [reactant], tmpmolecules) if fuzzyCandidateMatch != None: logMess( 'INFO:Atomization', 'Used fuzzy string matching from {0} to {1}'.format( reactant, fuzzyCandidateMatch)) return [fuzzyCandidateMatch], unevenElements else: #last ditch attempt using straighforward lexical analysis tmpDependency, tmpEquivalence = sbmlAnalyzer.findClosestModification( [reactant], dependencyGraph.keys()) if reactant in tmpDependency and tmpDependency[ reactant] in tmpCandidates[0]: for element in tmpDependency: if element not in dependencyGraph: dependencyGraph[element] = tmpDependency[ element] for element in tmpEquivalence: if element not in equivalenceDictionary: equivalenceDictionary[element] = [] for equivalence in tmpEquivalence[element]: if equivalence[0] not in equivalenceDictionary[ element]: equivalenceDictionary[element].append( equivalence[0]) if len(tmpDependency.keys()) > 0: return tmpDependency[reactant], unevenElements #the ive no idea whats going on branch modificationCandidates = {} if modificationCandidates == {}: logMess( 'CRITICAL:Atomization', 'I dont know how this is modified and I have no way to make an educated guess. Politely refusing to translate {0}={1}.' .format(reactant, candidates)) return None, None for idx, molecule in enumerate(tmpCandidates[0]): if molecule in modificationCandidates: tmpCandidates[0][idx] = modificationCandidates[ molecule] return [tmpCandidates[0]], unevenElements elif len(tmpCandidates) > 1: pass return [tmpCandidates[0]], unevenElements