def toBel(dbId): ''' Convert to BEL formats ''' entity = getEntityData(dbId) print('toBel dbId: ', dbId) type = entity['schemaClass'] if (type == 'Compartment'): bel = toBelCompartment(entity) elif type == 'EntityCompartment': bel = toBelEntityCompartment(entity) elif type == 'OtherEntity': bel = toBelOtherEntity(entity) elif type == 'Polymer': bel = toBelPolymer(entity) elif type == 'Complex': bel = toBelComplexComponents(entity) # alternative version - toBelComplexNamed() elif type == 'SimpleEntity': bel = toBelSimpleEntity(entity) elif type == 'EntityWithAccessionedSequence': bel = toBelEntityWithAccessionedSequence(entity) elif 'Set' in type: bel = toBelSets(entity) elif 'GenomeEncodedEntity' in type: bel = toBelGenomeEncodedEntity(entity) elif 'CatalystActivity' in type: bel = toBelCatalystActivity(entity) return bel
def toBel(dbId): ''' Convert to BEL formats ''' entity = getEntityData(dbId) print('toBel dbId: ', dbId) type = entity['schemaClass'] if (type == 'Compartment'): bel = toBelCompartment(entity) elif type == 'EntityCompartment': bel = toBelEntityCompartment(entity) elif type == 'OtherEntity': bel = toBelOtherEntity(entity) elif type == 'Polymer': bel = toBelPolymer(entity) elif type == 'Complex': bel = toBelComplexComponents( entity) # alternative version - toBelComplexNamed() elif type == 'SimpleEntity': bel = toBelSimpleEntity(entity) elif type == 'EntityWithAccessionedSequence': bel = toBelEntityWithAccessionedSequence(entity) elif 'Set' in type: bel = toBelSets(entity) elif 'GenomeEncodedEntity' in type: bel = toBelGenomeEncodedEntity(entity) elif 'CatalystActivity' in type: bel = toBelCatalystActivity(entity) return bel
def buildBelEvidences(reactionList, belversion, pathways=None): ''' Load reactions and build BEL Evidences''' rxnUrlTpl = 'http://www.reactome.org/PathwayBrowser/#' evidences = [] bad_namespaces_evidences = [] # collect bad evidences # indexCnt = 0 for rxnId, rxnName in reactionList: print('rxnId: ', rxnId) # indexCnt += 1 # if indexCnt > 2: # break # Process Annotation information rxnData = getEntityData(rxnId) if 'stableIdentifier' not in rxnData: continue stableId = rxnData['stableIdentifier']['displayName'] rxnUrl = '{}{}'.format(rxnUrlTpl, stableId) rxnName = escapeBelString(rxnData['displayName']) rxnType = rxnData['schemaClass'] # Todo collect all compartments and annotate compartment = 'Unknown' if 'compartment' in rxnData: compartment = rxnData['compartment'][0]['displayName'] rxnAuthor = rxnDate = None if 'created' in rxnData: try: matches = re.search(r'(.*?),\s+(\d{4,4}-\d{2,2}-\d{2,2})', rxnData['created']['displayName']) if matches: rxnAuthor = matches.group(1) rxnDate = matches.group(2) except: log.info( 'Rxn - cannot find created date and author in object: {}'. format(rxnId)) if rxnDate and rxnAuthor: citation = '{{"Online Resource", "{}", "{}", "{}", "{}"}}'.format( rxnName, rxnUrl, rxnDate, rxnAuthor) else: citation = '{{"Online Resource", "{}", "{}"}}'.format( rxnName, rxnUrl) evidence = { 'name': rxnName, 'rxnId': rxnId, # TODO remove after debugging 'rxnType': rxnType, 'compartment': compartment, 'species': rxnData['speciesName'], 'species_tax_id': convertSpeciesNameToTaxId(rxnData['speciesName']), 'summary_text': rxnName, 'citation': citation, } # Process BEL Statement catalysts, inputs, outputs = [], [], [] if 'catalystActivity' in rxnData: for catalyst in rxnData['catalystActivity']: catalysts.append(toBel(catalyst['dbId'])) # print('Catalyst: {}'.format(catalyst['dbId'])) if 'input' in rxnData: for input in dedup(rxnData['input']): inputs.append(toBel(input['dbId'])) if 'output' in rxnData: for output in dedup(rxnData['output']): outputs.append(toBel(output['dbId'])) print('Catalysts ', catalysts) print('Inputs ', inputs) print('Outputs ', outputs) print('\n') statements = buildStatements(catalysts, inputs, outputs) evidence['statements'] = dedupList(statements) bad_namespace_flag = False for statement in statements: if 'ENSEMBL' in statement or 'EMBL' in statement: bad_namespace_flag = True # with open('tmp_evidences.json', 'a') as f: # json.dump(evidence, f, indent=4) # f.write('\n\n') if bad_namespace_flag: bad_namespaces_evidences.append(copy.deepcopy(evidence)) else: evidences.append(copy.deepcopy(evidence)) belscript = render_template(template_filename, evidences, pathways=pathways) fn = 'reactome.bels' if belversion == '2': fn += '2' with open(fn, 'w') as f: f.write(belscript) import json with open('bad_evidences.json', 'w') as f: json.dump(bad_namespaces_evidences, f, indent=4)
def buildBelEvidences(reactionList, belversion, pathways=None): ''' Load reactions and build BEL Evidences''' rxnUrlTpl = 'http://www.reactome.org/PathwayBrowser/#' evidences = [] bad_namespaces_evidences = [] # collect bad evidences # indexCnt = 0 for rxnId, rxnName in reactionList: print('rxnId: ', rxnId) # indexCnt += 1 # if indexCnt > 2: # break # Process Annotation information rxnData = getEntityData(rxnId) if 'stableIdentifier' not in rxnData: continue stableId = rxnData['stableIdentifier']['displayName'] rxnUrl = '{}{}'.format(rxnUrlTpl, stableId) rxnName = escapeBelString(rxnData['displayName']) rxnType = rxnData['schemaClass'] # Todo collect all compartments and annotate compartment = 'Unknown' if 'compartment' in rxnData: compartment = rxnData['compartment'][0]['displayName'] rxnAuthor = rxnDate = None if 'created' in rxnData: try: matches = re.search(r'(.*?),\s+(\d{4,4}-\d{2,2}-\d{2,2})', rxnData['created']['displayName']) if matches: rxnAuthor = matches.group(1) rxnDate = matches.group(2) except: log.info('Rxn - cannot find created date and author in object: {}'.format(rxnId)) if rxnDate and rxnAuthor: citation = '{{"Online Resource", "{}", "{}", "{}", "{}"}}'.format(rxnName, rxnUrl, rxnDate, rxnAuthor) else: citation = '{{"Online Resource", "{}", "{}"}}'.format(rxnName, rxnUrl) evidence = { 'name': rxnName, 'rxnId': rxnId, # TODO remove after debugging 'rxnType': rxnType, 'compartment': compartment, 'species': rxnData['speciesName'], 'species_tax_id': convertSpeciesNameToTaxId(rxnData['speciesName']), 'summary_text': rxnName, 'citation': citation, } # Process BEL Statement catalysts, inputs, outputs = [], [], [] if 'catalystActivity' in rxnData: for catalyst in rxnData['catalystActivity']: catalysts.append(toBel(catalyst['dbId'])) # print('Catalyst: {}'.format(catalyst['dbId'])) if 'input' in rxnData: for input in dedup(rxnData['input']): inputs.append(toBel(input['dbId'])) if 'output' in rxnData: for output in dedup(rxnData['output']): outputs.append(toBel(output['dbId'])) print('Catalysts ', catalysts) print('Inputs ', inputs) print('Outputs ', outputs) print('\n') statements = buildStatements(catalysts, inputs, outputs) evidence['statements'] = dedupList(statements) bad_namespace_flag = False for statement in statements: if 'ENSEMBL' in statement or 'EMBL' in statement: bad_namespace_flag = True # with open('tmp_evidences.json', 'a') as f: # json.dump(evidence, f, indent=4) # f.write('\n\n') if bad_namespace_flag: bad_namespaces_evidences.append(copy.deepcopy(evidence)) else: evidences.append(copy.deepcopy(evidence)) belscript = render_template(template_filename, evidences, pathways=pathways) fn = 'reactome.bels' if belversion == '2': fn += '2' with open(fn, 'w') as f: f.write(belscript) import json with open('bad_evidences.json', 'w') as f: json.dump(bad_namespaces_evidences, f, indent=4)