예제 #1
0
파일: stats.py 프로젝트: jmuhlich/bionetgen
def main2():
    #336,365
    pair = [336,365]
    history = np.load('stats3.npy')
    extracts = []
    for element in history:
        if element[0] in pair:
            extracts.append(element)
        if len(extracts) == 2:
            break
    print extracts
    reader = libsbml.SBMLReader()
    document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000336.xml')
    parser = libsbml2bngl.SBML2BNGL(document.getModel())    
    rdfAnnotations = analyzeRDF.getAnnotations(parser,'miriam')
    print rdfAnnotations
    
    document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000365.xml')
    parser = libsbml2bngl.SBML2BNGL(document.getModel())    
    rdfAnnotations2 = analyzeRDF.getAnnotations(parser,'miriam')
    print rdfAnnotations2
    
    for element in rdfAnnotations:
        if element in rdfAnnotations2:
            print element,rdfAnnotations[element],rdfAnnotations2[element]
예제 #2
0
def createSpeciesCompositionGraph(parser, database, configurationFile,namingConventions,
                       speciesEquivalences=None,bioGridFlag=False):
    _, rules, _ = parser.getReactions(atomize=True)
    molecules, _, _,_ = parser.getSpecies()
    database.sbmlAnalyzer = \
    analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences)
    #classify reactions
    database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\
    indirectEquivalenceTranslator, \
    adhocLabelDictionary,lexicalDependencyGraph=  database.sbmlAnalyzer.classifyReactions(rules, molecules)
    referenceVariables = [database.classifications,equivalenceTranslator,
                          database.eequivalenceTranslator,indirectEquivalenceTranslator,adhocLabelDictionary]
    comparisonVariables = [deepcopy(x) for x in referenceVariables]
    #####input processing
    #states,components,other user options
    #with open('temp1.dict','w') as f:
    #    pickle.dump(referenceVariables,f)
    #with open('temp2.dict','w') as f:
    #    pickle.dump(comparisonVariables,f)
    database.reactionProperties = database.sbmlAnalyzer.getReactionProperties()
    #user defined and lexical analysis naming conventions are stored here
    database.reactionProperties.update(adhocLabelDictionary)
    
    database.translator, database.labelDictionary, \
    database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes()
    database.dependencyGraph = {}
    #analyzeSBML.analyzeNamingConventions(molecules)
    rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot')
    ####dependency graph
    #binding reactions
    for reaction, classification in zip(rules, database.classifications):
        bindingReactionsAnalysis(database.dependencyGraph,
                        list(parseReactions(reaction)),classification)
    for element in lexicalDependencyGraph:
        database.dependencyGraph[element] = lexicalDependencyGraph[element]
    #catalysis reactions

    for key in database.eequivalenceTranslator:
        for namingEquivalence in database.eequivalenceTranslator[key]:
            baseElement = min(namingEquivalence, key=len)
            modElement = max(namingEquivalence, key=len)
            if key != 'Binding':
                if baseElement not in database.dependencyGraph or database.dependencyGraph[baseElement] == []:
                    if modElement not in database.dependencyGraph or database.dependencyGraph[modElement] == []:
                        database.dependencyGraph[baseElement] = []
                    #do we have a meaningful reverse dependence?
                    #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]):
                    #    addToDependencyGraph(database.dependencyGraph,baseElement,[modElement])
                    #    continue
                addToDependencyGraph(database.dependencyGraph, modElement,
                                     [baseElement])
    #non lexical-analysis catalysis reactions
    if database.forceModificationFlag:
        for reaction, classification in zip(rules, database.classifications):
            if classification == 'Transformation':
                preaction = list(parseReactions(reaction))
                if preaction[1][0] in preaction[0][0]:
                    base = preaction[1][0]
                    mod = preaction[0][0]
                else:
                    mod = preaction[1][0]
                    base = preaction[0][0]
                if database.dependencyGraph[mod] == []:
                    database.dependencyGraph[mod]  = [[base]]
                
    
    '''
    #complex catalysis reactions
    for key in indirectEquivalenceTranslator:
        #first remove these entries from the dependencyGraph since 
        #they are not true bindingReactions
        for namingEquivalence in indirectEquivalenceTranslator[key]:
            removedElement = ''
            tmp3 = deepcopy(namingEquivalence[1])
            if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]:
                removedElement = namingEquivalence[0][0]
            elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]:
                removedElement = namingEquivalence[0][1]

            else:
                tmp3.reverse()
                if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]:
                    removedElement = namingEquivalence[0][0]

                elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]:
                    removedElement = namingEquivalence[0][1]
            
            
            #then add the new, true dependencies
            #if its not supposed to be a basic element
            tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]]
            tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]])
            tmp2 = deepcopy(tmp)
            tmp2.reverse()
            
            
            ##TODO: map back for the elements in namingEquivalence[2]
            if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \
                and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]:
                if sorted(tmp) == sorted(tmp3):
                    continue
                if all(x in database.dependencyGraph for x in tmp):
                    if removedElement in database.dependencyGraph:
                        database.dependencyGraph[removedElement].remove(tmp3)
                    logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\
 from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\
                    .format(removedElement,tmp3,namingEquivalence[3][0],tmp))
                    database.dependencyGraph[namingEquivalence[3][0]] = [tmp]
                else:
                    logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \
{2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0],
tmp,removedElement,tmp3))
    #user defined stuff
'''
    
    for element in database.labelDictionary:
        if len(database.labelDictionary[element][0]) == 0 or element == \
        database.labelDictionary[element][0][0]:
            addToDependencyGraph(database.dependencyGraph, element, [])
        else:
            database.dependencyGraph[element] = [list(
            database.labelDictionary[element][0])]


    #stuff obtained from string similarity analysis
    for element in database.lexicalLabelDictionary:
        #similarity analysis has less priority than anything we discovered
        #before
        if element in database.dependencyGraph and \
        len(database.dependencyGraph[element]) > 0:
            continue

        if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \
        database.lexicalLabelDictionary[element][0][0]:
            addToDependencyGraph(database.dependencyGraph, element, [])
        else:
            logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\
            .format(element,database.lexicalLabelDictionary[element][0]))
            database.dependencyGraph[element] = [list(
            database.lexicalLabelDictionary[element][0])]
    
    #pure lexical analysis
    orphanedSpecies = [x for x in database.dependencyGraph if database.dependencyGraph[x] == []]
    strippedMolecules = [x.strip('()') for x in molecules]
    tmpDependency,database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification(orphanedSpecies,strippedMolecules)          
    for species in tmpDependency:
        if tmpDependency[species] == []:
            addToDependencyGraph(database.dependencyGraph,species,[])
        for instance in tmpDependency[species]:
            addToDependencyGraph(database.dependencyGraph,species,instance)
    #####sct
    #FIXME: wtf was unevenelementdict supposed to be for
    #print database.dependencyGraph
    
    prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \
    consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer)
    
    return prunnedDependencyGraph,database
예제 #3
0
def loadAnnotations(fileName):
    reader = libsbml.SBMLReader()
    document = reader.readSBMLFromFile(fileName)
    parser = libsbml2bngl.SBML2BNGL(document.getModel())
    rdfAnnotations = analyzeRDF.getAnnotations(parser, 'miriam')
    return rdfAnnotations
예제 #4
0
def identifyNamingConvention():
    '''
    extracts statistics from the code
    '''
    
    reader = libsbml.SBMLReader()
    jsonFiles = [ f for f in listdir('./reactionDefinitions') if f[-4:-1] == 'jso']
    translationLevel = []
    arrayMolecules = []
    rules = 0
    #go through all curated models in the biomodels database
    for index in range(1,410):
        bestTranslator = {}
        
        try:
            nameStr = 'BIOMD0000000%03d' % (index)
            document = reader.readSBMLFromFile('XMLExamples/curated/' + nameStr + '.xml')
            parser = SBML2BNGL(document.getModel())
            database = structures.Databases()

            print nameStr + '.xml',
            naming = 'reactionDefinition0.json'
            bestUseID = True
            numberofMolecules = numberOfSpecies = 0
            #iterate through our naming conventions and selects that which
            #creates the most rulified elements in the translator
            for jsonFile in jsonFiles:
                for useID in [True,False]:
                    try:
                        oldmaxi = numberOfSpecies
                        parser = SBML2BNGL(document.getModel(),useID)
                        database = structures.Databases()
                        translator = m2c.transformMolecules(parser,database,'reactionDefinitions/' + jsonFile)
                        numberOfSpecies = max(numberOfSpecies,evaluation(len(parser.getSpecies()),translator))
                        if oldmaxi != numberOfSpecies:
                            naming = jsonFile
                            bestTranslator = translator
                            bestUseID = useID
                            _,rules,_ = parser.getReactions(translator)
                            numberofMolecules = len(translator)
                    except:
                        print 'ERROR',sys.exc_info()[0]
                        continue
        except:
            print 'ERROR',sys.exc_info()[0]
            continue

        _,_,obs = parser.getSpecies()
        rdfAnnotations = analyzeRDF.getAnnotations(parser,'miriam')
        #go through the annotation list and assign which species
        #correspond to which uniprot number (if it exists)
        #similarly list the number of times each individual element appears
        analyzeRDF.getAnnotations(parser,'miriam')
        molecules = {}
        if naming[-6] != 0:
            for element in bestTranslator:
                if len(bestTranslator[element].molecules) == 1:
                    name = bestTranslator[element].molecules[0].name
                    for annotation in rdfAnnotations:
                        if name in rdfAnnotations[annotation]:
                            if name not in molecules:
                                molecules[name] = [0,[]]
                            if annotation not in molecules[name][1]:
                                molecules[name][1].extend(annotation)
                    
                    if  name not in molecules:
                        molecules[name] = [1,[]]
                    for rule in rules:
                        if name in rule:
                            molecules[name][0] += 1
        
       # _,rules,_ = parser.getReactions(bestTranslator)
       #for rule in rules:
        
        if len(obs) != 0:
            print index*1.0,int(naming[-6])*1.0,numberOfSpecies*1.0/len(obs),numberofMolecules*1.0/len(obs),len(obs)*1.0,bestUseID
            
            arrayMolecule = [[x,molecules[x]] for x in molecules]
            arrayMolecules.append(arrayMolecule)
            translationLevel.append([index*1.0,int(naming[-6])*1.0,numberOfSpecies*1.0/len(obs),numberofMolecules*1.0/len(obs),len(obs)*1.0,bestUseID])
            np.save('stats3.npy',np.array(translationLevel))
        else:
            arrayMolecules.append([])
        #print arrayMolecules
        np.save('stats3b.npy',np.array(arrayMolecules))
예제 #5
0
def transformMolecules(
    parser, database, configurationFile, namingConventions, speciesEquivalences=None, bioGridFlag=False
):
    """
    main method. Receives a parser configuration, a configurationFile and a
    list of user defined species equivalences and returns a dictionary
    containing an atomized version of the model
    Keywords:
        ---parser: data structure containing the reactions and species we will use
        ---database:data structure containing the result of the outgoing translation
        ---configurationFile
        ---speciesEquivalences:predefined species
    """

    _, rules, _ = parser.getReactions(atomize=True)
    molecules, _, _, _ = parser.getSpecies()

    sbmlAnalyzer = analyzeSBML.SBMLAnalyzer(configurationFile, namingConventions, speciesEquivalences)
    # classify reactions
    classifications, equivalenceTranslator, eequivalenceTranslator, indirectEquivalenceTranslator = sbmlAnalyzer.classifyReactions(
        rules, molecules
    )
    #####input processing
    # states,components,other user options
    database.reactionProperties = sbmlAnalyzer.getReactionProperties()
    database.translator, database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes()
    database.dependencyGraph = {}
    # analyzeSBML.analyzeNamingConventions(molecules)
    rdfAnnotations = analyzeRDF.getAnnotations(parser, "uniprot")

    ####dependency graph
    # binding reactions
    for reaction, classification in zip(rules, classifications):
        dependencyGraph(database.dependencyGraph, list(parseReactions(reaction)), classification)

    # catalysis reactions
    for key in eequivalenceTranslator:
        for namingEquivalence in eequivalenceTranslator[key]:
            baseElement = min(namingEquivalence, key=len)
            modElement = max(namingEquivalence, key=len)
            if key != "Binding":
                if baseElement not in database.dependencyGraph or database.dependencyGraph[baseElement] == []:
                    if modElement not in database.dependencyGraph or database.dependencyGraph[modElement] == []:
                        database.dependencyGraph[baseElement] = []
                    elif [baseElement] not in database.dependencyGraph[modElement]:
                        addToDependencyGraph(database.dependencyGraph, baseElement, [modElement])
                        continue
                addToDependencyGraph(database.dependencyGraph, modElement, [baseElement])

    # complex catalysis reactions
    for key in indirectEquivalenceTranslator:
        # first remove these entries from the dependencyGraph since
        # they are not true bindingReactions
        for namingEquivalence in indirectEquivalenceTranslator[key]:
            tmp = deepcopy(namingEquivalence[1])
            if tmp in database.dependencyGraph[namingEquivalence[0][0]]:
                database.dependencyGraph[namingEquivalence[0][0]].remove(tmp)
            elif tmp in database.dependencyGraph[namingEquivalence[0][1]]:
                database.dependencyGraph[namingEquivalence[0][1]].remove(tmp)
            else:
                tmp.reverse()
                if tmp in database.dependencyGraph[namingEquivalence[0][0]]:
                    database.dependencyGraph[namingEquivalence[0][0]].remove(tmp)
                elif tmp in database.dependencyGraph[namingEquivalence[0][1]]:
                    database.dependencyGraph[namingEquivalence[0][1]].remove(tmp)
            # then add the new, true dependencies
            # if its not supposed to be a basic element
            tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]]
            tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]])
            tmp2 = deepcopy(tmp)
            tmp2.reverse()

            ##TODO: map back for the elements in namingEquivalence[2]
            if (
                tmp not in database.dependencyGraph[namingEquivalence[3][0]]
                and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]
            ):
                if all(x in database.dependencyGraph for x in tmp):
                    database.dependencyGraph[namingEquivalence[3][0]] = [tmp]
    # user defined stuff
    for element in database.labelDictionary:
        if len(database.labelDictionary[element][0]) == 0 or element == database.labelDictionary[element][0][0]:
            addToDependencyGraph(database.dependencyGraph, element, [])
        else:
            database.dependencyGraph[element] = [list(database.labelDictionary[element][0])]
    #####sct
    prunnedDependencyGraph, weights, unevenElementDict = consolidateDependencyGraph(
        database.dependencyGraph, equivalenceTranslator
    )
    # FIXME: I'm conatminating these data structures somewhere. In here
    # im just calling the original generator to recover them.
    classifications, equivalenceTranslator, eequivalenceTranslator, indirectEquivalenceTranslator = sbmlAnalyzer.classifyReactions(
        rules, molecules
    )

    weights = sorted(weights, key=lambda rule: rule[1])
    # print {x:str(database.translator[x]) for x in database.translator}
    atomize(
        prunnedDependencyGraph,
        weights,
        database.translator,
        database.reactionProperties,
        eequivalenceTranslator,
        bioGridFlag,
    )
    propagateChanges(database.translator, prunnedDependencyGraph)
    return database.translator
예제 #6
0
    reader = libsbml.SBMLReader()
    #BIOMD0000000272
    document = reader.readSBMLFromFile('XMLExamples/curated/BIOMD0000000272.xml')
    #document = reader.readSBMLFromFile('XMLExamples/simple4.xml')
    
    model = document.getModel()        
    parser = SBML2BNGL(model)
#    print parser.getReactions()
    _,rules,_ = parser.getReactions()
    
    #print rules
    #print rules
    classifications = analyzeSBML.classifyReactions(rules)
    print classifications
    print 'preparing database...'
    rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot')
    for rule,classification in zip(rules,classifications):   
        #print rule 
        reaction2 = list(parseReactions(rule))
        #print reaction2
        totalElements =  [item for sublist in reaction2 for item in sublist]
        totalElements = list(set(totalElements))
        labelDictionary = defineCorrespondence(reaction2,totalElements,
                                               labelDictionary,rawDatabase,
                                               classification,rdfAnnotations)
        #print labelDictionary        
        labelDictionary = resolveCorrespondence(labelDictionary)
    labelDictionary = resolveCorrespondence(labelDictionary)
    print 'label',labelDictionary
    #print labelDictionary
    #print labelDictionary
def transformMolecules(parser,database,configurationFile,speciesEquivalences=None):
    #labelDictionary = {}
    _,rules,_ = parser.getReactions()
    molecules,_,_ = parser.getSpecies()
    #synthesisdatabase = {}
    #translator = {}
    sbmlAnalyzer =analyzeSBML.SBMLAnalyzer(configurationFile,speciesEquivalences)
    classifications,equivalenceTranslator,eequivalenceTranslator = sbmlAnalyzer.classifyReactions(rules,molecules)
    database.reactionProperties = sbmlAnalyzer.getReactionProperties()
    database.translator,database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes()
    
    #analyzeSBML.analyzeNamingConventions(molecules)
    rdfAnnotations = analyzeRDF.getAnnotations(parser,'uniprot')
    #print rdfAnnotations
    #print classifications
    
    #for element in equivalenceTranslator:
    #    addToLabelDictionary(database.labelDictionary,element[0],min(element,key=len))
    #    addToLabelDictionary(database.labelDictionary,element[1],min(element,key=len))

    #for element in database.labelDictionary:
    #    database.labelDictionary[element] = [(min(database.labelDictionary[element],key=len),)] 
    
    #STEP1: Use reaction information to infer w print zip(rules,classifications)
    
    for rule,classification in zip(rules,classifications): 
        reaction2 = list(parseReactions(rule))
        totalElements =  [item for sublist in reaction2 for item in sublist]
        totalElements = list(set(totalElements))
        #obtain elements from the equivalenceTranstor that contain at least two
        #reactants/products
        
        #equivalences = [x for x in equivalenceTranslator if x[0] in reaction2[0] or x[1] in reaction2[1]]
        database.labelDictionary = defineCorrespondence(reaction2,totalElements,
                                               database,classification,
                                               rdfAnnotations)
        #if 'ERKi_MEKi_PP' in database.labelDictionary:
        #    print database.labelDictionary['ERKi_MEKi_PP'],rule
        #database.labelDictionary = resolveCorrespondence(database)
    #correctClassifications(rules,classifications,database.labelDictionary)
    #print 'step1',database.labelDictionary
            
    simplify(database.labelDictionary)
   
    #TODO: uncomment this section when we solve the bug on reclassifying
    #print database.labelDictionary 
    cycles = resolveCycles(database,equivalenceTranslator)
    database.rawLabelDictionary = deepcopy(database.labelDictionary)
    
    for _ in range(0,5):
        database.labelDictionary = resolveCorrespondence(database,cycles)
    #print 'after resolving correspondences'
    classifications2,_,eequivalenceTranslator = sbmlAnalyzer.reclassifyReactions(rules,molecules,database.labelDictionary)
    for index in range(0,len(classifications)):
        if classifications[index] in ['None','Binding'] and classifications2[index] != 'None':
            classifications[index] = classifications2[index]
    tmp = {}


    tmp = {x:[database.labelDictionary[x]] for x in database.labelDictionary}
    
    
    database.labelDictionary = tmp
    #print 'step1.5',database.labelDictionary    
    #STEP2: Use naming conventions
    for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp):
        equivalence = [x for x in equivalenceTranslator if element == max(x,key=len)]
        if equivalence != []:
            defineCorrespondenceWithNamingConventions(element,equivalence,database,createOnDemand=True)
          
    #for _ in range(0,5):
    #    database.labelDictionary = resolveCorrespondence(database,cycles)
    #print {x:type(database.labelDictionary[x]) == tuple for x in database.labelDictionary
    for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp):
        equivalence = [x for x in equivalenceTranslator if element == max(x,key=len)]
        if equivalence != []:
            defineComplexCorrespondenceWithNamingConventions(element,equivalence,database,createOnDemand=False)

    #print 'step2',database.labelDictionary    
    #STEP2.5: For those elements that where updated from the naming conventions
    #we also use the chance to update the reaction classification if necessary
    
#    correctClassifications(rules,classifications,database.labelDictionary)
    #STEP3: Use annotations
    sbmlAnalyzer.classifyReactionsWithAnnotations(rules,molecules,rdfAnnotations,database.labelDictionary)
    #for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp):
    #    annotation = [rdfAnnotations[x] for x in rdfAnnotations if element in rdfAnnotations[x]]
    #    if annotation != []:
    #        defineCorrespondenceWithAnnotations(element,annotation,database)
    simplify(database.labelDictionary)

    #print 'step3',database.labelDictionary    
    #analyzeSBML.reclassifyReactions(reactions,molecules,labelDictionary,classifications,equivalenceTranslator)
    cycles = resolveCycles(database,equivalenceTranslator)

    #TODO: this is causing errors, check
    for _ in range(0,5):
        database.labelDictionary = resolveCorrespondence(database,cycles)
         
    correctClassificationsWithCycleInformation(rules,classifications,cycles)
    #print database.labelDictionary
    counter = 0
    nonProcessedRules = zip(rules,classifications)
    #TODO:multipass stuff.
    #while len(nonProcessedRules) > 0:
    #tmp = []
    
    ##sort. Primary key: number of predependencies a rule has
    #secondary key: length of the names of the elements involved    
    ruleWeightTable = []
    ruleWeight2Table= []
    
    #print equivalenceTranslator
    for rule in rules:
        flag = False
        weight = 0
        weight2 = 0
        reaction2 = list(parseReactions(rule))
        #sort rules according to the complexity of the reactants (not the products)
        for element in reaction2[0]:
            if element not in database.labelDictionary:
                weight =50
            else:
                weight = max(weight,len(database.labelDictionary[element]))
            weight2 += len(element)
        for element in reaction2[0]:
            weight+= sum([1 for x in equivalenceTranslator if re.search(r'(_|^)({0})(_|$)'.format(x[1]),element) != None])
        for element in reaction2[0]:
            weight += element.count('_')
        ruleWeight2Table.append(weight2)
        ruleWeightTable.append(weight)
    nonProcessedRules = zip(ruleWeightTable,ruleWeight2Table,rules,classifications)
    nonProcessedRules = sorted(nonProcessedRules,key=lambda rule: rule[1])
    nonProcessedRules = sorted(nonProcessedRules,key=lambda rule: rule[0])
    database.classifications = classifications
        
    for idx,(w0,w1,rule,classification) in enumerate(nonProcessedRules):
        outputFlag = False
        #if classification == 'Modification':
        #    outputFlag = True
        counter += 1
        reaction2 = list(parseReactions(rule))
        if outputFlag:
            tmp = deepcopy(database.translator)
            print reaction2
        processRule(reaction2,database,classification,eequivalenceTranslator,outputFlag)
        #if 'EGF_EGFR2_PLCg' in database.translator:
        #    print rule,database.translator['EGF_EGFR2_PLCg'],classification
        #if 'EGF_EGFRm2_GAP_Grb2_Prot' in database.translator:
        #    print '++++',rule,difflib.SequenceMatcher(None, 'Grb2(egfr,shc!10,sos).EGF(egfr!5,modI~U,modM~M).EGFR(egf!5,egfr!8,gap!9,grb2!11,modI~U,prot,ras_gdp,shc!10).EGF(egfr!7,modI~U,modM~U).EGFR(egf!7,egfr!8,gap!8,grb2,modI~U,prot,ras_gdp,shc!9).GAP(egfr!9).Prot(egfr!11,modI~U,ras_gdp,ras_gtp)' , str(database.translator['EGF_EGFRm2_GAP_Grb2_Prot'])).ratio()
        if outputFlag:
            print {x:str(database.translator[x]) for x in database.translator if x not in tmp}
        
    for element in database.labelDictionary:
        if not isinstance(database.labelDictionary[element],tuple):
            database.translator[element] = database.translator[database.labelDictionary[element]]
    raw =  [x[0] for x in database.rawDatabase]

    for element in database.translator:
        if element in raw:
            continue
        for mol in database.translator[element].molecules:
            if mol.name in database.translator:
                mol.update(database.translator[mol.name].molecules[0])
    return database.translator,logMess.log,parser.getSpeciesAnnotation()
예제 #8
0
def transformMolecules(parser,
                       database,
                       configurationFile,
                       speciesEquivalences=None):
    #labelDictionary = {}
    _, rules, _ = parser.getReactions()
    molecules, _, _ = parser.getSpecies()
    #synthesisdatabase = {}
    #translator = {}
    sbmlAnalyzer = analyzeSBML.SBMLAnalyzer(configurationFile,
                                            speciesEquivalences)
    classifications, equivalenceTranslator, eequivalenceTranslator = sbmlAnalyzer.classifyReactions(
        rules, molecules)
    database.reactionProperties = sbmlAnalyzer.getReactionProperties()
    database.translator, database.labelDictionary = sbmlAnalyzer.getUserDefinedComplexes(
    )

    #analyzeSBML.analyzeNamingConventions(molecules)
    rdfAnnotations = analyzeRDF.getAnnotations(parser, 'uniprot')
    #print rdfAnnotations
    #print classifications

    #for element in equivalenceTranslator:
    #    addToLabelDictionary(database.labelDictionary,element[0],min(element,key=len))
    #    addToLabelDictionary(database.labelDictionary,element[1],min(element,key=len))

    #for element in database.labelDictionary:
    #    database.labelDictionary[element] = [(min(database.labelDictionary[element],key=len),)]

    #STEP1: Use reaction information to infer w print zip(rules,classifications)

    for rule, classification in zip(rules, classifications):
        reaction2 = list(parseReactions(rule))
        totalElements = [item for sublist in reaction2 for item in sublist]
        totalElements = list(set(totalElements))
        #obtain elements from the equivalenceTranstor that contain at least two
        #reactants/products

        #equivalences = [x for x in equivalenceTranslator if x[0] in reaction2[0] or x[1] in reaction2[1]]
        database.labelDictionary = defineCorrespondence(
            reaction2, totalElements, database, classification, rdfAnnotations)
        #if 'ERKi_MEKi_PP' in database.labelDictionary:
        #    print database.labelDictionary['ERKi_MEKi_PP'],rule
        #database.labelDictionary = resolveCorrespondence(database)
    #correctClassifications(rules,classifications,database.labelDictionary)
    #print 'step1',database.labelDictionary

    simplify(database.labelDictionary)

    #TODO: uncomment this section when we solve the bug on reclassifying
    #print database.labelDictionary
    cycles = resolveCycles(database, equivalenceTranslator)
    database.rawLabelDictionary = deepcopy(database.labelDictionary)

    for _ in range(0, 5):
        database.labelDictionary = resolveCorrespondence(database, cycles)
    #print 'after resolving correspondences'
    classifications2, _, eequivalenceTranslator = sbmlAnalyzer.reclassifyReactions(
        rules, molecules, database.labelDictionary)
    for index in range(0, len(classifications)):
        if classifications[index] in ['None', 'Binding'
                                      ] and classifications2[index] != 'None':
            classifications[index] = classifications2[index]
    tmp = {}

    tmp = {x: [database.labelDictionary[x]] for x in database.labelDictionary}

    database.labelDictionary = tmp
    #print 'step1.5',database.labelDictionary
    #STEP2: Use naming conventions
    for element in set(x for rule in rules for tmp in parseReactions(rule)
                       for x in tmp):
        equivalence = [
            x for x in equivalenceTranslator if element == max(x, key=len)
        ]
        if equivalence != []:
            defineCorrespondenceWithNamingConventions(element,
                                                      equivalence,
                                                      database,
                                                      createOnDemand=True)

    #for _ in range(0,5):
    #    database.labelDictionary = resolveCorrespondence(database,cycles)
    #print {x:type(database.labelDictionary[x]) == tuple for x in database.labelDictionary
    for element in set(x for rule in rules for tmp in parseReactions(rule)
                       for x in tmp):
        equivalence = [
            x for x in equivalenceTranslator if element == max(x, key=len)
        ]
        if equivalence != []:
            defineComplexCorrespondenceWithNamingConventions(
                element, equivalence, database, createOnDemand=False)

    #print 'step2',database.labelDictionary
    #STEP2.5: For those elements that where updated from the naming conventions
    #we also use the chance to update the reaction classification if necessary

#    correctClassifications(rules,classifications,database.labelDictionary)
#STEP3: Use annotations
    sbmlAnalyzer.classifyReactionsWithAnnotations(rules, molecules,
                                                  rdfAnnotations,
                                                  database.labelDictionary)
    #for element in set(x for rule in rules for tmp in parseReactions(rule) for x in tmp):
    #    annotation = [rdfAnnotations[x] for x in rdfAnnotations if element in rdfAnnotations[x]]
    #    if annotation != []:
    #        defineCorrespondenceWithAnnotations(element,annotation,database)
    simplify(database.labelDictionary)

    #print 'step3',database.labelDictionary
    #analyzeSBML.reclassifyReactions(reactions,molecules,labelDictionary,classifications,equivalenceTranslator)
    cycles = resolveCycles(database, equivalenceTranslator)

    #TODO: this is causing errors, check
    for _ in range(0, 5):
        database.labelDictionary = resolveCorrespondence(database, cycles)

    correctClassificationsWithCycleInformation(rules, classifications, cycles)
    #print database.labelDictionary
    counter = 0
    nonProcessedRules = zip(rules, classifications)
    #TODO:multipass stuff.
    #while len(nonProcessedRules) > 0:
    #tmp = []

    ##sort. Primary key: number of predependencies a rule has
    #secondary key: length of the names of the elements involved
    ruleWeightTable = []
    ruleWeight2Table = []

    #print equivalenceTranslator
    for rule in rules:
        flag = False
        weight = 0
        weight2 = 0
        reaction2 = list(parseReactions(rule))
        #sort rules according to the complexity of the reactants (not the products)
        for element in reaction2[0]:
            if element not in database.labelDictionary:
                weight = 50
            else:
                weight = max(weight, len(database.labelDictionary[element]))
            weight2 += len(element)
        for element in reaction2[0]:
            weight += sum([
                1 for x in equivalenceTranslator
                if re.search(r'(_|^)({0})(_|$)'.format(x[1]), element) != None
            ])
        for element in reaction2[0]:
            weight += element.count('_')
        ruleWeight2Table.append(weight2)
        ruleWeightTable.append(weight)
    nonProcessedRules = zip(ruleWeightTable, ruleWeight2Table, rules,
                            classifications)
    nonProcessedRules = sorted(nonProcessedRules, key=lambda rule: rule[1])
    nonProcessedRules = sorted(nonProcessedRules, key=lambda rule: rule[0])
    database.classifications = classifications

    for idx, (w0, w1, rule, classification) in enumerate(nonProcessedRules):
        outputFlag = False
        #if classification == 'Modification':
        #    outputFlag = True
        counter += 1
        reaction2 = list(parseReactions(rule))
        if outputFlag:
            tmp = deepcopy(database.translator)
            print reaction2
        processRule(reaction2, database, classification,
                    eequivalenceTranslator, outputFlag)
        #if 'EGF_EGFR2_PLCg' in database.translator:
        #    print rule,database.translator['EGF_EGFR2_PLCg'],classification
        #if 'EGF_EGFRm2_GAP_Grb2_Prot' in database.translator:
        #    print '++++',rule,difflib.SequenceMatcher(None, 'Grb2(egfr,shc!10,sos).EGF(egfr!5,modI~U,modM~M).EGFR(egf!5,egfr!8,gap!9,grb2!11,modI~U,prot,ras_gdp,shc!10).EGF(egfr!7,modI~U,modM~U).EGFR(egf!7,egfr!8,gap!8,grb2,modI~U,prot,ras_gdp,shc!9).GAP(egfr!9).Prot(egfr!11,modI~U,ras_gdp,ras_gtp)' , str(database.translator['EGF_EGFRm2_GAP_Grb2_Prot'])).ratio()
        if outputFlag:
            print {
                x: str(database.translator[x])
                for x in database.translator if x not in tmp
            }

    for element in database.labelDictionary:
        if not isinstance(database.labelDictionary[element], tuple):
            database.translator[element] = database.translator[
                database.labelDictionary[element]]
    raw = [x[0] for x in database.rawDatabase]

    for element in database.translator:
        if element in raw:
            continue
        for mol in database.translator[element].molecules:
            if mol.name in database.translator:
                mol.update(database.translator[mol.name].molecules[0])
    return database.translator, logMess.log, parser.getSpeciesAnnotation()
예제 #9
0
def createSpeciesCompositionGraph(parser,
                                  database,
                                  configurationFile,
                                  namingConventions,
                                  speciesEquivalences=None,
                                  bioGridFlag=False):
    _, rules, _ = parser.getReactions(atomize=True)
    molecules, _, _, _ = parser.getSpecies()
    database.sbmlAnalyzer = \
    analyzeSBML.SBMLAnalyzer(parser,configurationFile, namingConventions,speciesEquivalences)
    #classify reactions
    database.classifications, equivalenceTranslator, database.eequivalenceTranslator,\
    indirectEquivalenceTranslator, \
    adhocLabelDictionary,lexicalDependencyGraph=  database.sbmlAnalyzer.classifyReactions(rules, molecules)
    referenceVariables = [
        database.classifications, equivalenceTranslator,
        database.eequivalenceTranslator, indirectEquivalenceTranslator,
        adhocLabelDictionary
    ]
    comparisonVariables = [deepcopy(x) for x in referenceVariables]
    #####input processing
    #states,components,other user options
    #with open('temp1.dict','w') as f:
    #    pickle.dump(referenceVariables,f)
    #with open('temp2.dict','w') as f:
    #    pickle.dump(comparisonVariables,f)
    database.reactionProperties = database.sbmlAnalyzer.getReactionProperties()
    #user defined and lexical analysis naming conventions are stored here
    database.reactionProperties.update(adhocLabelDictionary)

    database.translator, database.labelDictionary, \
    database.lexicalLabelDictionary = database.sbmlAnalyzer.getUserDefinedComplexes()
    database.dependencyGraph = {}
    #analyzeSBML.analyzeNamingConventions(molecules)
    rdfAnnotations = analyzeRDF.getAnnotations(parser, 'uniprot')
    ####dependency graph
    #binding reactions
    for reaction, classification in zip(rules, database.classifications):
        bindingReactionsAnalysis(database.dependencyGraph,
                                 list(parseReactions(reaction)),
                                 classification)
    for element in lexicalDependencyGraph:
        database.dependencyGraph[element] = lexicalDependencyGraph[element]
    #catalysis reactions

    for key in database.eequivalenceTranslator:
        for namingEquivalence in database.eequivalenceTranslator[key]:
            baseElement = min(namingEquivalence, key=len)
            modElement = max(namingEquivalence, key=len)
            if key != 'Binding':
                if baseElement not in database.dependencyGraph or database.dependencyGraph[
                        baseElement] == []:
                    if modElement not in database.dependencyGraph or database.dependencyGraph[
                            modElement] == []:
                        database.dependencyGraph[baseElement] = []
                    #do we have a meaningful reverse dependence?
                    #elif all([baseElement not in x for x in database.dependencyGraph[modElement]]):
                    #    addToDependencyGraph(database.dependencyGraph,baseElement,[modElement])
                    #    continue
                addToDependencyGraph(database.dependencyGraph, modElement,
                                     [baseElement])
    #non lexical-analysis catalysis reactions
    if database.forceModificationFlag:
        for reaction, classification in zip(rules, database.classifications):
            if classification == 'Transformation':
                preaction = list(parseReactions(reaction))
                if preaction[1][0] in preaction[0][0]:
                    base = preaction[1][0]
                    mod = preaction[0][0]
                else:
                    mod = preaction[1][0]
                    base = preaction[0][0]
                if database.dependencyGraph[mod] == []:
                    database.dependencyGraph[mod] = [[base]]
    '''
    #complex catalysis reactions
    for key in indirectEquivalenceTranslator:
        #first remove these entries from the dependencyGraph since 
        #they are not true bindingReactions
        for namingEquivalence in indirectEquivalenceTranslator[key]:
            removedElement = ''
            tmp3 = deepcopy(namingEquivalence[1])
            if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]:
                removedElement = namingEquivalence[0][0]
            elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]:
                removedElement = namingEquivalence[0][1]

            else:
                tmp3.reverse()
                if tmp3 in database.dependencyGraph[namingEquivalence[0][0]]:
                    removedElement = namingEquivalence[0][0]

                elif tmp3 in database.dependencyGraph[namingEquivalence[0][1]]:
                    removedElement = namingEquivalence[0][1]
            
            
            #then add the new, true dependencies
            #if its not supposed to be a basic element
            tmp = [x for x in namingEquivalence[1] if x not in namingEquivalence[2]]
            tmp.extend([x for x in namingEquivalence[2] if x not in namingEquivalence[1]])
            tmp2 = deepcopy(tmp)
            tmp2.reverse()
            
            
            ##TODO: map back for the elements in namingEquivalence[2]
            if tmp not in database.dependencyGraph[namingEquivalence[3][0]] \
                and tmp2 not in database.dependencyGraph[namingEquivalence[3][0]]:
                if sorted(tmp) == sorted(tmp3):
                    continue
                if all(x in database.dependencyGraph for x in tmp):
                    if removedElement in database.dependencyGraph:
                        database.dependencyGraph[removedElement].remove(tmp3)
                    logMess('INFO:Atomization','Removing {0}={1} and adding {2}={3} instead\
 from the dependency list since we determined it is not a true binding reaction based on lexical analysis'\
                    .format(removedElement,tmp3,namingEquivalence[3][0],tmp))
                    database.dependencyGraph[namingEquivalence[3][0]] = [tmp]
                else:
                    logMess('WARNING:Atomization','We determined that {0}={1} based on lexical analysis instead of \
{2}={3} (stoichiometry) but one of the constituent components in {1} is not a molecule so no action was taken'.format(namingEquivalence[3][0],
tmp,removedElement,tmp3))
    #user defined stuff
'''

    for element in database.labelDictionary:
        if len(database.labelDictionary[element][0]) == 0 or element == \
        database.labelDictionary[element][0][0]:
            addToDependencyGraph(database.dependencyGraph, element, [])
        else:
            database.dependencyGraph[element] = [
                list(database.labelDictionary[element][0])
            ]

    #stuff obtained from string similarity analysis
    for element in database.lexicalLabelDictionary:
        #similarity analysis has less priority than anything we discovered
        #before
        if element in database.dependencyGraph and \
        len(database.dependencyGraph[element]) > 0:
            continue

        if len(database.lexicalLabelDictionary[element][0]) == 0 or element == \
        database.lexicalLabelDictionary[element][0][0]:
            addToDependencyGraph(database.dependencyGraph, element, [])
        else:
            logMess('INFO:Atomization','added induced speciesStructure {0}={1}'\
            .format(element,database.lexicalLabelDictionary[element][0]))
            database.dependencyGraph[element] = [
                list(database.lexicalLabelDictionary[element][0])
            ]

    #pure lexical analysis
    orphanedSpecies = [
        x for x in database.dependencyGraph
        if database.dependencyGraph[x] == []
    ]
    strippedMolecules = [x.strip('()') for x in molecules]
    tmpDependency, database.tmpEquivalence = database.sbmlAnalyzer.findClosestModification(
        orphanedSpecies, strippedMolecules)
    for species in tmpDependency:
        if tmpDependency[species] == []:
            addToDependencyGraph(database.dependencyGraph, species, [])
        for instance in tmpDependency[species]:
            addToDependencyGraph(database.dependencyGraph, species, instance)
    #####sct
    #FIXME: wtf was unevenelementdict supposed to be for
    #print database.dependencyGraph

    prunnedDependencyGraph, database.weights, unevenElementDict,database.artificialEquivalenceTranslator = \
    consolidateDependencyGraph(database.dependencyGraph, equivalenceTranslator,database.eequivalenceTranslator,database.sbmlAnalyzer)

    return prunnedDependencyGraph, database
예제 #10
0
def loadAnnotations(fileName):
    reader = libsbml.SBMLReader()
    document = reader.readSBMLFromFile(fileName)
    parser = libsbml2bngl.SBML2BNGL(document.getModel())
    rdfAnnotations = analyzeRDF.getAnnotations(parser, "miriam")
    return rdfAnnotations