def get_geneIDsAndRxnIDsAndMetIDs_modelsBioCycAndModelsCOBRA( self, pathways): #initialize supporting objects cobra01 = models_COBRA_query(self.session,self.engine,self.settings); cobra01.initialize_supportedTables(); cobra_dependencies = models_BioCyc_dependencies(); #query the pathways biocyc_pathways = self.getParsed_genesAndPathwaysAndReactions_namesAndDatabase_modelsBioCycPathways( names_I=pathways, database_I='ECOLI', query_I={}, output_O='listDict', dictColumn_I=None); genes = list(set([g['gene'] for g in biocyc_pathways if g['gene']!=''])); #join list of genes with alternative identifiers biocyc_genes = self.getParsed_genesAndAccessionsAndSynonyms_namesAndDatabase_modelsBioCycPolymerSegments( names_I=genes, database_I='ECOLI', query_I={}, output_O='listDict', dictColumn_I=None); gene_ids = list(set(genes + [g['synonym'] for g in biocyc_genes if g['synonym']])); accession_1 = list(set([g['accession_1'] for g in biocyc_genes if g['accession_1']!=''])); #Join accession_1 with COBRA reactions cobra_rxnIDs = cobra01.get_rows_modelIDAndOrderedLocusNames_dataStage02PhysiologyModelReactions( model_id_I='150526_iDM2015', ordered_locus_names_I=accession_1, query_I={}, output_O='listDict', dictColumn_I=None) rxn_ids = list(set([g['rxn_id'].replace('_reverse','') for g in cobra_rxnIDs if g['rxn_id']!=''])); #COBRA metabolites met_ids = list(set([p for g in cobra_rxnIDs if g['products_ids'] for p in g['products_ids']]+\ [p for g in cobra_rxnIDs if g['reactants_ids'] for p in g['reactants_ids']])); #deformat met_ids from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies cobra_dependencies = models_COBRA_dependencies(); met_ids_deformated = list(set([cobra_dependencies.deformat_metid(m).replace('13dpg','23dpg')\ .replace('3pg','Pool_2pg_3pg')\ .replace('glycogen','adpglc')\ .replace('uacgam','udpglcur') for m in met_ids])); #return values return gene_ids,rxn_ids,met_ids,met_ids_deformated;
def convertAndMap_BioCycTranscriptionFactor2COBRA( self, BioCyc2COBRA_regulation_I, BioCyc_polymerSegments_I = None, BioCyc_compounds_I = None, COBRA_metabolites_I = None, chebi2inchi_I = None, ): '''Convert and map BioCyc Transcription factor (ligand-binding) reactions to COBRA model ids INPUT: BioCyc2COBRA_regulation_I = output from convertAndMap_BioCycRegulation2COBRA BioCyc_polymerSegments_I = (TODO) listDict of models_BioCyc_polymerSegments BioCyc_compounds_I = listDict of models_BioCyc_compounds COBRA_metabolites_I = listDict of models_COBRA_metabolites chebi2inchi_I = listDict of CHEBI_ID to InCHI OUTPUT: ''' from SBaaS_models.models_BioCyc_dependencies import models_BioCyc_dependencies BioCyc_dependencies = models_BioCyc_dependencies(); if not BioCyc2COBRA_regulation_I is None and BioCyc2COBRA_regulation_I: BioCyc2COBRA_regulators = list(set([r['regulator'] for r in BioCyc2COBRA_regulation_I \ if 'DNA-binding transcriptional dual regulator' in r['regulator']])); else: BioCyc2COBRA_regulators=BioCyc2COBRA_regulation_I; if not chebi2inchi_I is None and chebi2inchi_I: chebi2inchi_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2inchi_I} else: chebi2inchi_dict_I=chebi2inchi_I; if not BioCyc_compounds_I is None and BioCyc_compounds_I: #BioCyc_compounds_dict_I = {r['name']:r for r in BioCyc_compounds_I} BioCyc_compounds_dict_I = {} for row in BioCyc_compounds_I: keys = []; keys.append(row['name']) keys = list(set([k for k in keys if k!=''])) for k in keys: if not k in BioCyc_compounds_dict_I.keys(): BioCyc_compounds_dict_I[k]=[]; if not row in BioCyc_compounds_dict_I[k]: BioCyc_compounds_dict_I[k].append(row); else: BioCyc_compounds_dict_I=BioCyc_compounds_I; if not BioCyc_polymerSegments_I is None and BioCyc_polymerSegments_I: BioCyc_polymerSegments_dict_I = {} for r in BioCyc_polymerSegments_I: products = models_BioCyc_dependencies.convert_bioCycList2List(r['product']) for p in products: if not p in BioCyc_polymerSegments_dict_I.keys(): BioCyc_polymerSegments_dict_I[p]=[]; else: BioCyc_polymerSegments_dict_I[p].append(r); else: BioCyc_polymerSegments_dict_I = BioCyc_polymerSegments_I BioCyc2COBRA_regulators_O = {} for e in BioCyc2COBRA_regulators: BioCyc2COBRA_regulators_O[e]=[]; #spot checks: if e == 'Cra DNA-binding transcriptional dual regulator': #error mapping fdp_c print('check'); elif e == 'GalR DNA-binding transcriptional dual regulator': #gene is being identified as a TU print('check'); elif e == 'β-D-galactose': #not a transcription factor print('check'); tmp = self.get_rows_substratesAndParentClassesAndDatabase_modelsBioCycReactions( e, database_I='ECOLI', query_I={}, output_O='listDict', dictColumn_I=None ); for t in tmp: ligands = {'COBRA_met_id': [], 'BioCyc_name': []}; genes = []; tus = []; #parse left and right left = BioCyc_dependencies.convert_bioCycList2List(t['left']) right = BioCyc_dependencies.convert_bioCycList2List(t['right']) #check for tus if e in left: tus.append(e); mode = '("-")'; elif e in right: tus.append(e); mode = '("+")'; else: continue; #query proteins to look up the gene #query compounds to look up the ligands for l in left: proteins,compounds = [],[]; proteins = self.get_rows_nameAndDatabase_modelsBioCycProteins( l,database_I = 'ECOLI' ); compounds = self.get_rows_nameAndDatabase_modelsBioCycCompounds( l,database_I = 'ECOLI' ); if proteins: for p in proteins: #1. parse genes directly genes.extend(BioCyc_dependencies.convert_bioCycList2List(p['gene'])); #2. if genes are not specified (i.e., protein complex) query and parse polymerSegments names = BioCyc_dependencies.convert_bioCycList2List(p['names']) for n in names: ##TODO: test #if n in BioCyc_polymerSegments_dict_I.keys(): # for row in BioCyc_polymerSegments_dict_I[n]: # genes.append(row['name']) rows = self.get_rows_productAndDatabase_modelsBioCycPolymerSegments( n,database_I = 'ECOLI'); genes.extend(r['name'] for r in rows); elif compounds: #map the ligand names... original,converted = BioCyc_dependencies.map_BioCyc2COBRA( compounds, #[c['name'] for c in compounds], BioCyc_components_dict_I=BioCyc_compounds_dict_I, BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA, BioCyc2COBRA_params_I={ 'COBRA_metabolites_I':COBRA_metabolites_I, 'chebi2inchi_dict_I':chebi2inchi_dict_I, } ); ligands['BioCyc_name'].extend([c['name'] for c in original]) #ligands['BioCyc_name'].extend(original) ligands['COBRA_met_id'].extend(converted) for r in right: proteins,compounds = [],[]; proteins = self.get_rows_nameAndDatabase_modelsBioCycProteins( r,database_I = 'ECOLI' ); compounds = self.get_rows_nameAndDatabase_modelsBioCycCompounds( r,database_I = 'ECOLI' ); if proteins: for p in proteins: #1. parse genes directly genes.extend(BioCyc_dependencies.convert_bioCycList2List(p['gene'])); #2. if genes are not specified (i.e., protein complex) query and parse polymerSegments names = BioCyc_dependencies.convert_bioCycList2List(p['names']) for n in names: ##TODO: test #if n in BioCyc_polymerSegments_dict_I.keys(): # for row in BioCyc_polymerSegments_dict_I[n]: # genes.append(row['name']) rows = self.get_rows_productAndDatabase_modelsBioCycPolymerSegments( n,database_I = 'ECOLI'); genes.extend(r['name'] for r in rows); elif compounds: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( compounds, #[c['name'] for c in compounds], BioCyc_components_dict_I=BioCyc_compounds_dict_I, BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA, BioCyc2COBRA_params_I={ 'COBRA_metabolites_I':COBRA_metabolites_I, 'chebi2inchi_dict_I':chebi2inchi_dict_I, } ); ligands['BioCyc_name'].extend([c['name'] for c in original]) #ligands['BioCyc_name'].extend(original) ligands['COBRA_met_id'].extend(converted) genes = list(set([g for g in genes if g!=''])) #check that there is only 1 tu: assert(len(tus)==1); #only 1 tu tu = tus[0]; #NOTE: there can be multiple ligands/genes associated with the tu BioCyc2COBRA_regulators_O[e].append({ 'ligands':ligands, 'genes':genes, 'tu':tu, 'regulator':e, 'mode':mode, }); return BioCyc2COBRA_regulators_O;
def convertAndMap_BioCycRegulation2COBRA( self, BioCyc_regulation_I, BioCyc_reactions_I = None, BioCyc_enzymaticReactions2PolymerSegments_I = None, BioCyc_compounds_I = None, COBRA_reactions_I = None, COBRA_metabolites_I = None, chebi2inchi_I = None, #chebi2database_I = None, MetaNetX_reactions_I = None, MetaNetX_metabolites_I = None,): '''Convert and map BioCyc Regulation to COBRA model ids INPUT: BioCyc_regulation_I = listDict BioCyc_reactions_I = listDict of models_BioCyc_reactions BioCyc_enzymaticReactions2PolymerSegments_I = listDict of join between models_BioCyc_enzymaticReactions and models_BioCyc_polymerSegments (getJoin_genes_namesAndDatabase_modelsBioCycEnzymaticReactionsAndPolymerSegments) BioCyc_compounds_I = listDict of models_BioCyc_compounds COBRA_reactions_I = listDict of models_COBRA_reactions COBRA_metabolites_I = listDict of models_COBRA_metabolites chebi2inchi_I = listDict of CHEBI_ID to InCHI MetaNetX_reactions_I = listDict of MetaNetX reaction xrefs MetaNetX_metabolites_I = listDict of MetaNetX chemical xrefs OUTPUT: ''' BioCyc_dependencies = models_BioCyc_dependencies(); #reformat input into a dict for fast traversal if not chebi2inchi_I is None and chebi2inchi_I: chebi2inchi_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2inchi_I} else: chebi2inchi_dict_I=chebi2inchi_I; #if not chebi2database_I is None and chebi2database_I: # chebi2database_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2database_I} #else: # chebi2database_dict_I=chebi2database_I; if not BioCyc_compounds_I is None and BioCyc_compounds_I: #BioCyc_compounds_dict_I = {r['name']:r for r in BioCyc_compounds_I} BioCyc_compounds_dict_I = {} for row in BioCyc_compounds_I: keys = []; keys.append(row['name']) keys = list(set([k for k in keys if k!=''])) for k in keys: if not k in BioCyc_compounds_dict_I.keys(): BioCyc_compounds_dict_I[k]=[]; if not row in BioCyc_compounds_dict_I[k]: BioCyc_compounds_dict_I[k].append(row); else: BioCyc_compounds_dict_I=BioCyc_compounds_I; if not BioCyc_reactions_I is None and BioCyc_reactions_I: BioCyc_reactions_dict_I = {} for row in BioCyc_reactions_I: keys = []; keys.append(row['common_name']) keys.extend(BioCyc_dependencies.convert_bioCycList2List(row['enzymatic_reaction'])) keys = list(set([k for k in keys if k!=''])) for k in keys: if not k in BioCyc_reactions_dict_I.keys(): BioCyc_reactions_dict_I[k]=[]; if not row in BioCyc_reactions_dict_I[k]: BioCyc_reactions_dict_I[k].append(row); else: BioCyc_reactions_dict_I=BioCyc_reactions_I; if not BioCyc_enzymaticReactions2PolymerSegments_I is None and BioCyc_enzymaticReactions2PolymerSegments_I: BioCyc_enzymaticReactions_dict_I = {} for row in BioCyc_enzymaticReactions2PolymerSegments_I: try: if not row['name'] in BioCyc_enzymaticReactions_dict_I.keys(): BioCyc_enzymaticReactions_dict_I[row['name']]={ 'name':'', 'enzyme':[], 'gene_ids':[], 'accession_1':[], } BioCyc_enzymaticReactions_dict_I[row['name']]['name']=row['name']; BioCyc_enzymaticReactions_dict_I[row['name']]['enzyme'].append(row['enzyme']); BioCyc_enzymaticReactions_dict_I[row['name']]['gene_ids'].extend(row['gene_ids']); BioCyc_enzymaticReactions_dict_I[row['name']]['accession_1'].extend(row['accession_1']); except Exception as e: print(e) else: BioCyc_enzymaticReactions_dict_I=BioCyc_enzymaticReactions2PolymerSegments_I; if not MetaNetX_reactions_I is None and MetaNetX_reactions_I: MetaNetX_reactions_dict_I = {} for row in MetaNetX_reactions_I: try: if not row['MNX_ID'] in MetaNetX_reactions_dict_I.keys(): MetaNetX_reactions_dict_I[row['MNX_ID']]={} key_value = row['#XREF'].split(':') MetaNetX_reactions_dict_I[row['MNX_ID']][key_value[0]]=key_value[1]; except Exception as e: print(e) #print(row) else: MetaNetX_reactions_dict_I=MetaNetX_reactions_I; if not MetaNetX_metabolites_I is None and MetaNetX_metabolites_I: MetaNetX_metabolites_dict_I = {} for row in MetaNetX_metabolites_I: try: if not row['MNX_ID'] in MetaNetX_metabolites_dict_I.keys(): MetaNetX_metabolites_dict_I[row['MNX_ID']]={} key_value = row['#XREF'].split(':') MetaNetX_metabolites_dict_I[row['MNX_ID']][key_value[0]]=key_value[1]; except Exception as e: print(e) #print(row) else: MetaNetX_metabolites_dict_I=MetaNetX_metabolites_I; regulation_O = []; for i,reg in enumerate(BioCyc_regulation_I): #if reg['name'] == 'Regulation of galSp by GalR DNA-binding transcriptional dual regulator': # print('check') #elif reg['name'] == 'Regulation of ribonucleoside-diphosphate reductase by dATP': # print('check'); #elif reg['regulated_entity_enzymaticReaction'] == 'formate dehydrogenase': # print('check'); unique = { 'regulator':reg['regulator'], 'regulated_entity':reg['regulated_entity'], 'mode':reg['mode'], 'mechanism':reg['mechanism'], 'name':reg['name'], 'parent_classes':reg['parent_classes'] } tmp = { 'regulators_EcoCyc':[], 'regulators_COBRA':[], 'regulated_entities_EcoCyc':[], 'regulated_entities_COBRA':[], } #convert the regulators if reg['regulator_gene']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulator_gene'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulators_EcoCyc']=original; tmp['regulators_COBRA']=converted; elif reg['regulator_protein']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulator_protein'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulators_EcoCyc']=original; tmp['regulators_COBRA']=converted; elif reg['regulator_RNA']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulator_RNA'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulators_EcoCyc']=original; tmp['regulators_COBRA']=converted; elif reg['regulator_compound']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulator_compound'], BioCyc_components_dict_I=BioCyc_compounds_dict_I, BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA, BioCyc2COBRA_params_I={ 'COBRA_metabolites_I':COBRA_metabolites_I, 'chebi2inchi_dict_I':chebi2inchi_dict_I, 'MetaNetX_metabolites_dict_I':MetaNetX_metabolites_dict_I, } ); tmp['regulators_EcoCyc']=original; tmp['regulators_COBRA']=converted; #convert the regulated_entities if reg['regulated_entity_gene']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulated_entity_gene'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulated_entities_EcoCyc']=original; tmp['regulated_entities_COBRA']=converted; elif reg['regulated_entity_enzymaticReaction']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulated_entity_enzymaticReaction'], BioCyc_components_dict_I=BioCyc_reactions_dict_I, BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycReaction2COBRA, BioCyc2COBRA_params_I={ 'COBRA_reactions_I':COBRA_reactions_I, 'MetaNetX_reactions_dict_I':MetaNetX_reactions_dict_I, 'BioCyc_reaction2Genes_dict_I':BioCyc_enzymaticReactions_dict_I, } ); tmp['regulated_entities_EcoCyc']=original; tmp['regulated_entities_COBRA']=converted; elif reg['regulated_entity_promoter']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulated_entity_promoter'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulated_entities_EcoCyc']=original; tmp['regulated_entities_COBRA']=converted; elif reg['regulated_entity_product']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulated_entity_product'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulated_entities_EcoCyc']=original; tmp['regulated_entities_COBRA']=converted; elif reg['regulated_entity_protein']: original,converted = BioCyc_dependencies.map_BioCyc2COBRA( reg['regulated_entity_protein'], BioCyc2COBRA_func_I=None, BioCyc2COBRA_params_I={} ); tmp['regulated_entities_EcoCyc']=original; tmp['regulated_entities_COBRA']=converted; #check that mappings/conversions took place if not tmp['regulators_EcoCyc'] or not tmp['regulated_entities_EcoCyc'] or \ not tmp['regulators_COBRA'] or not tmp['regulated_entities_COBRA']: continue; #flatten EcoCyc_flattened = BioCyc_dependencies.crossMultiple_2lists( tmp['regulators_EcoCyc'], tmp['regulated_entities_EcoCyc'], 'regulators_EcoCyc', 'regulated_entities_EcoCyc', ) COBRA_flattened = BioCyc_dependencies.crossMultiple_2lists( tmp['regulators_COBRA'], tmp['regulated_entities_COBRA'], 'regulators_COBRA', 'regulated_entities_COBRA', ) for i in range(len(EcoCyc_flattened)): tmp1 = {}; tmp1.update(EcoCyc_flattened[i]) tmp1.update(COBRA_flattened[i]) tmp1.update(unique) regulation_O.append(tmp1); #remove duplicate entries #(NOTE: only works because each dictionary is constructed identically) data_O = []; for row in regulation_O: if not row in data_O: data_O.append(row); return data_O;
#make the COBRA table from SBaaS_models.models_COBRA_execute import models_COBRA_execute cobra01 = models_COBRA_execute(session,engine,pg_settings.datadir_settings); cobra01.initialize_supportedTables() cobra01.initialize_tables() #make the BioCyc table from SBaaS_models.models_BioCyc_execute import models_BioCyc_execute biocyc01 = models_BioCyc_execute(session,engine,pg_settings.datadir_settings); biocyc01.initialize_supportedTables() biocyc01.initialize_tables() #BioCyc dependencies from SBaaS_models.models_BioCyc_dependencies import models_BioCyc_dependencies biocyc01_dep = models_BioCyc_dependencies(); #BioCyc dependencies from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies cobra01_dep = models_COBRA_dependencies(); sys.path.append(pg_settings.datadir_settings['workspace']+'/sbaas_shared') from ALEsKOs01_shared.ALEsKOs01_commonRoutines import * iobase = base_importData(); iobase.read_json( pg_settings.datadir_settings['workspace_data']+\ '/_output/BioCyc_regulation.json'); regulation_O = iobase.data; #protein-mediated-translation-regulation not annotated