def get_geneIDsAndRxnIDsAndMetIDs_modelsBioCycAndModelsCOBRA(
        self,
        pathways):

        #initialize supporting objects
        cobra01 = models_COBRA_query(self.session,self.engine,self.settings);
        cobra01.initialize_supportedTables();
        cobra_dependencies = models_BioCyc_dependencies();

        #query the pathways
        biocyc_pathways = self.getParsed_genesAndPathwaysAndReactions_namesAndDatabase_modelsBioCycPathways(
            names_I=pathways,
            database_I='ECOLI',
            query_I={},
            output_O='listDict',
            dictColumn_I=None);
        genes = list(set([g['gene'] for g in biocyc_pathways if g['gene']!='']));
        #join list of genes with alternative identifiers
        biocyc_genes = self.getParsed_genesAndAccessionsAndSynonyms_namesAndDatabase_modelsBioCycPolymerSegments(
            names_I=genes,
            database_I='ECOLI',
            query_I={},
            output_O='listDict',
            dictColumn_I=None);
        gene_ids = list(set(genes + [g['synonym'] for g in biocyc_genes if g['synonym']]));
        accession_1 = list(set([g['accession_1'] for g in biocyc_genes if g['accession_1']!='']));
        #Join accession_1 with COBRA reactions
        cobra_rxnIDs = cobra01.get_rows_modelIDAndOrderedLocusNames_dataStage02PhysiologyModelReactions(
            model_id_I='150526_iDM2015',
            ordered_locus_names_I=accession_1,
            query_I={},
            output_O='listDict',
            dictColumn_I=None)
        rxn_ids = list(set([g['rxn_id'].replace('_reverse','') for g in cobra_rxnIDs if g['rxn_id']!='']));
        #COBRA metabolites
        met_ids = list(set([p for g in cobra_rxnIDs if g['products_ids'] for p in g['products_ids']]+\
            [p for g in cobra_rxnIDs if g['reactants_ids'] for p in g['reactants_ids']]));
        #deformat met_ids
        from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies
        cobra_dependencies = models_COBRA_dependencies();
        met_ids_deformated = list(set([cobra_dependencies.deformat_metid(m).replace('13dpg','23dpg')\
            .replace('3pg','Pool_2pg_3pg')\
            .replace('glycogen','adpglc')\
            .replace('uacgam','udpglcur') for m in met_ids]));
        #return values
        return gene_ids,rxn_ids,met_ids,met_ids_deformated;
    def convertAndMap_BioCycTranscriptionFactor2COBRA(
        self,
        BioCyc2COBRA_regulation_I,
        BioCyc_polymerSegments_I = None,
        BioCyc_compounds_I = None,
        COBRA_metabolites_I = None,
        chebi2inchi_I = None,
        ):
        '''Convert and map BioCyc Transcription factor (ligand-binding) reactions
        to COBRA model ids
        INPUT:
        BioCyc2COBRA_regulation_I = output from convertAndMap_BioCycRegulation2COBRA
        BioCyc_polymerSegments_I = (TODO) listDict of models_BioCyc_polymerSegments
        BioCyc_compounds_I = listDict of models_BioCyc_compounds
        COBRA_metabolites_I = listDict of models_COBRA_metabolites
        chebi2inchi_I = listDict of CHEBI_ID to InCHI
    
        OUTPUT:
    
        '''
        
        from SBaaS_models.models_BioCyc_dependencies import models_BioCyc_dependencies
        BioCyc_dependencies = models_BioCyc_dependencies();

        if not BioCyc2COBRA_regulation_I is None and BioCyc2COBRA_regulation_I:
            BioCyc2COBRA_regulators = list(set([r['regulator'] for r in BioCyc2COBRA_regulation_I \
                if 'DNA-binding transcriptional dual regulator' in r['regulator']]));
        else:
            BioCyc2COBRA_regulators=BioCyc2COBRA_regulation_I;
        if not chebi2inchi_I is None and chebi2inchi_I:
            chebi2inchi_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2inchi_I}
        else:
            chebi2inchi_dict_I=chebi2inchi_I;
        if not BioCyc_compounds_I is None and BioCyc_compounds_I:
            #BioCyc_compounds_dict_I = {r['name']:r for r in BioCyc_compounds_I}
            BioCyc_compounds_dict_I = {}
            for row in BioCyc_compounds_I:
                keys = [];
                keys.append(row['name'])
                keys = list(set([k for k in keys if k!='']))
                for k in keys:
                    if not k in BioCyc_compounds_dict_I.keys():
                        BioCyc_compounds_dict_I[k]=[];
                    if not row in BioCyc_compounds_dict_I[k]:
                        BioCyc_compounds_dict_I[k].append(row);
        else:
            BioCyc_compounds_dict_I=BioCyc_compounds_I;
        if not BioCyc_polymerSegments_I is None and BioCyc_polymerSegments_I:
            BioCyc_polymerSegments_dict_I = {}
            for r in BioCyc_polymerSegments_I:
                products = models_BioCyc_dependencies.convert_bioCycList2List(r['product'])
                for p in products:
                    if not p in BioCyc_polymerSegments_dict_I.keys():
                        BioCyc_polymerSegments_dict_I[p]=[];
                    else:
                        BioCyc_polymerSegments_dict_I[p].append(r);
        else:
            BioCyc_polymerSegments_dict_I = BioCyc_polymerSegments_I

        BioCyc2COBRA_regulators_O = {}
        for e in BioCyc2COBRA_regulators:
            BioCyc2COBRA_regulators_O[e]=[];
            #spot checks:
            if e == 'Cra DNA-binding transcriptional dual regulator':
                #error mapping fdp_c
                print('check');
            elif e == 'GalR DNA-binding transcriptional dual regulator':
                #gene is being identified as a TU
                print('check');
            elif e == 'β-D-galactose':
                #not a transcription factor
                print('check');
            tmp = self.get_rows_substratesAndParentClassesAndDatabase_modelsBioCycReactions(
                e,
                database_I='ECOLI',
                query_I={},
                output_O='listDict',
                dictColumn_I=None
                );
            for t in tmp:
                ligands = {'COBRA_met_id': [], 
                           'BioCyc_name': []};
                genes = [];
                tus = [];
                #parse left and right
                left = BioCyc_dependencies.convert_bioCycList2List(t['left'])
                right = BioCyc_dependencies.convert_bioCycList2List(t['right'])
                #check for tus
                if e in left:
                    tus.append(e);
                    mode = '("-")';
                elif e in right:
                    tus.append(e);
                    mode = '("+")';
                else:
                    continue;
                #query proteins to look up the gene
                #query compounds to look up the ligands
                for l in left:
                    proteins,compounds = [],[];
                    proteins = self.get_rows_nameAndDatabase_modelsBioCycProteins(
                        l,database_I = 'ECOLI'
                        );
                    compounds = self.get_rows_nameAndDatabase_modelsBioCycCompounds(
                        l,database_I = 'ECOLI'
                        );
                    if proteins:
                        for p in proteins:
                            #1. parse genes directly
                            genes.extend(BioCyc_dependencies.convert_bioCycList2List(p['gene']));  
                            #2. if genes are not specified (i.e., protein complex) query and parse polymerSegments
                            names = BioCyc_dependencies.convert_bioCycList2List(p['names'])
                            for n in names:
                                ##TODO: test
                                #if n in BioCyc_polymerSegments_dict_I.keys():
                                #    for row in BioCyc_polymerSegments_dict_I[n]:
                                #        genes.append(row['name'])
                                rows = self.get_rows_productAndDatabase_modelsBioCycPolymerSegments(
                                    n,database_I = 'ECOLI');
                                genes.extend(r['name'] for r in rows);
                    elif compounds:
                        #map the ligand names...
                        original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                            compounds,
                            #[c['name'] for c in compounds],
                            BioCyc_components_dict_I=BioCyc_compounds_dict_I,
                            BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA,
                            BioCyc2COBRA_params_I={
                                'COBRA_metabolites_I':COBRA_metabolites_I,
                                'chebi2inchi_dict_I':chebi2inchi_dict_I,
                            }
                        );
                        ligands['BioCyc_name'].extend([c['name'] for c in original])
                        #ligands['BioCyc_name'].extend(original)
                        ligands['COBRA_met_id'].extend(converted)
                for r in right:
                    proteins,compounds = [],[];
                    proteins = self.get_rows_nameAndDatabase_modelsBioCycProteins(
                        r,database_I = 'ECOLI'
                        );
                    compounds = self.get_rows_nameAndDatabase_modelsBioCycCompounds(
                        r,database_I = 'ECOLI'
                        );
                    if proteins:
                        for p in proteins:
                            #1. parse genes directly
                            genes.extend(BioCyc_dependencies.convert_bioCycList2List(p['gene']));  
                            #2. if genes are not specified (i.e., protein complex) query and parse polymerSegments
                            names = BioCyc_dependencies.convert_bioCycList2List(p['names'])
                            for n in names:
                                ##TODO: test
                                #if n in BioCyc_polymerSegments_dict_I.keys():
                                #    for row in BioCyc_polymerSegments_dict_I[n]:
                                #        genes.append(row['name'])
                                rows = self.get_rows_productAndDatabase_modelsBioCycPolymerSegments(
                                    n,database_I = 'ECOLI');
                                genes.extend(r['name'] for r in rows);
                    elif compounds:
                        original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                            compounds,
                            #[c['name'] for c in compounds],
                            BioCyc_components_dict_I=BioCyc_compounds_dict_I,
                            BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA,
                            BioCyc2COBRA_params_I={
                                'COBRA_metabolites_I':COBRA_metabolites_I,
                                'chebi2inchi_dict_I':chebi2inchi_dict_I,
                            }
                        );
                        ligands['BioCyc_name'].extend([c['name'] for c in original])
                        #ligands['BioCyc_name'].extend(original)
                        ligands['COBRA_met_id'].extend(converted)
                genes = list(set([g for g in genes if g!=''])) 
                #check that there is only 1 tu:
                assert(len(tus)==1); #only 1 tu
                tu = tus[0];
                #NOTE: there can be multiple ligands/genes associated with the tu
                BioCyc2COBRA_regulators_O[e].append({
                    'ligands':ligands,
                    'genes':genes,
                    'tu':tu,
                    'regulator':e,
                    'mode':mode,
                    });
        return BioCyc2COBRA_regulators_O;
    def convertAndMap_BioCycRegulation2COBRA(
        self,
        BioCyc_regulation_I,
        BioCyc_reactions_I = None,
        BioCyc_enzymaticReactions2PolymerSegments_I = None,
        BioCyc_compounds_I = None,
        COBRA_reactions_I = None,
        COBRA_metabolites_I = None,
        chebi2inchi_I = None,
        #chebi2database_I = None,
        MetaNetX_reactions_I = None,
        MetaNetX_metabolites_I = None,):
        '''Convert and map BioCyc Regulation
        to COBRA model ids
        INPUT:
        BioCyc_regulation_I = listDict
        BioCyc_reactions_I = listDict of models_BioCyc_reactions
        BioCyc_enzymaticReactions2PolymerSegments_I = listDict of 
            join between models_BioCyc_enzymaticReactions and 
            models_BioCyc_polymerSegments
            (getJoin_genes_namesAndDatabase_modelsBioCycEnzymaticReactionsAndPolymerSegments)
        BioCyc_compounds_I = listDict of models_BioCyc_compounds
        COBRA_reactions_I = listDict of models_COBRA_reactions
        COBRA_metabolites_I = listDict of models_COBRA_metabolites
        chebi2inchi_I = listDict of CHEBI_ID to InCHI
        MetaNetX_reactions_I = listDict of MetaNetX reaction xrefs
        MetaNetX_metabolites_I = listDict of MetaNetX chemical xrefs
    
        OUTPUT:
    
        '''
        BioCyc_dependencies = models_BioCyc_dependencies();

        #reformat input into a dict for fast traversal
        if not chebi2inchi_I is None and chebi2inchi_I:
            chebi2inchi_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2inchi_I}
        else:
            chebi2inchi_dict_I=chebi2inchi_I;
            
        #if not chebi2database_I is None and chebi2database_I:
        #    chebi2database_dict_I = {r['CHEBI_ID']:r['InChI'] for r in chebi2database_I}
        #else:
        #    chebi2database_dict_I=chebi2database_I;

        if not BioCyc_compounds_I is None and BioCyc_compounds_I:
            #BioCyc_compounds_dict_I = {r['name']:r for r in BioCyc_compounds_I}
            BioCyc_compounds_dict_I = {}
            for row in BioCyc_compounds_I:
                keys = [];
                keys.append(row['name'])
                keys = list(set([k for k in keys if k!='']))
                for k in keys:
                    if not k in BioCyc_compounds_dict_I.keys():
                        BioCyc_compounds_dict_I[k]=[];
                    if not row in BioCyc_compounds_dict_I[k]:
                        BioCyc_compounds_dict_I[k].append(row);
        else:
            BioCyc_compounds_dict_I=BioCyc_compounds_I;

        if not BioCyc_reactions_I is None and BioCyc_reactions_I:
            BioCyc_reactions_dict_I = {}
            for row in BioCyc_reactions_I:
                keys = [];
                keys.append(row['common_name'])
                keys.extend(BioCyc_dependencies.convert_bioCycList2List(row['enzymatic_reaction']))
                keys = list(set([k for k in keys if k!='']))
                for k in keys:
                    if not k in BioCyc_reactions_dict_I.keys():
                        BioCyc_reactions_dict_I[k]=[];
                    if not row in BioCyc_reactions_dict_I[k]:
                        BioCyc_reactions_dict_I[k].append(row);
        else:
            BioCyc_reactions_dict_I=BioCyc_reactions_I;

        if not BioCyc_enzymaticReactions2PolymerSegments_I is None and BioCyc_enzymaticReactions2PolymerSegments_I:
            BioCyc_enzymaticReactions_dict_I = {}
            for row in BioCyc_enzymaticReactions2PolymerSegments_I:
                try:
                    if not row['name'] in BioCyc_enzymaticReactions_dict_I.keys():
                        BioCyc_enzymaticReactions_dict_I[row['name']]={
                            'name':'',
                            'enzyme':[],
                            'gene_ids':[],
                            'accession_1':[],
                            }
                    BioCyc_enzymaticReactions_dict_I[row['name']]['name']=row['name'];
                    BioCyc_enzymaticReactions_dict_I[row['name']]['enzyme'].append(row['enzyme']);
                    BioCyc_enzymaticReactions_dict_I[row['name']]['gene_ids'].extend(row['gene_ids']);
                    BioCyc_enzymaticReactions_dict_I[row['name']]['accession_1'].extend(row['accession_1']);
                except Exception as e:
                    print(e)
        else:
            BioCyc_enzymaticReactions_dict_I=BioCyc_enzymaticReactions2PolymerSegments_I;

        if not MetaNetX_reactions_I is None and MetaNetX_reactions_I:
            MetaNetX_reactions_dict_I = {}
            for row in MetaNetX_reactions_I:
                try:
                    if not row['MNX_ID'] in MetaNetX_reactions_dict_I.keys():
                        MetaNetX_reactions_dict_I[row['MNX_ID']]={}
                    key_value = row['#XREF'].split(':')
                    MetaNetX_reactions_dict_I[row['MNX_ID']][key_value[0]]=key_value[1];
                except Exception as e:
                    print(e)
                    #print(row)
        else:
            MetaNetX_reactions_dict_I=MetaNetX_reactions_I;

        if not MetaNetX_metabolites_I is None and MetaNetX_metabolites_I:
            MetaNetX_metabolites_dict_I = {}
            for row in MetaNetX_metabolites_I:
                try:
                    if not row['MNX_ID'] in MetaNetX_metabolites_dict_I.keys():
                        MetaNetX_metabolites_dict_I[row['MNX_ID']]={}
                    key_value = row['#XREF'].split(':')
                    MetaNetX_metabolites_dict_I[row['MNX_ID']][key_value[0]]=key_value[1];
                except Exception as e:
                    print(e)
                    #print(row)
        else:
            MetaNetX_metabolites_dict_I=MetaNetX_metabolites_I;

    
        regulation_O = [];
        for i,reg in enumerate(BioCyc_regulation_I):
            #if reg['name'] == 'Regulation of galSp by GalR DNA-binding transcriptional dual regulator':
            #    print('check')
            #elif reg['name'] == 'Regulation of ribonucleoside-diphosphate reductase by dATP':
            #    print('check');
            #elif reg['regulated_entity_enzymaticReaction'] == 'formate dehydrogenase':
            #    print('check');
            unique = {
                'regulator':reg['regulator'],
                'regulated_entity':reg['regulated_entity'],
                'mode':reg['mode'],
                'mechanism':reg['mechanism'],
                'name':reg['name'],
                'parent_classes':reg['parent_classes']
            }
            tmp = {
                'regulators_EcoCyc':[],
                'regulators_COBRA':[],
                'regulated_entities_EcoCyc':[],
                'regulated_entities_COBRA':[],
            }
            #convert the regulators
            if reg['regulator_gene']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulator_gene'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulators_EcoCyc']=original;
                tmp['regulators_COBRA']=converted;
            elif reg['regulator_protein']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulator_protein'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulators_EcoCyc']=original;
                tmp['regulators_COBRA']=converted;
            elif reg['regulator_RNA']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulator_RNA'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulators_EcoCyc']=original;
                tmp['regulators_COBRA']=converted;
            elif reg['regulator_compound']:
            
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulator_compound'],
                    BioCyc_components_dict_I=BioCyc_compounds_dict_I,
                    BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycCompound2COBRA,
                    BioCyc2COBRA_params_I={
                        'COBRA_metabolites_I':COBRA_metabolites_I,
                        'chebi2inchi_dict_I':chebi2inchi_dict_I,
                        'MetaNetX_metabolites_dict_I':MetaNetX_metabolites_dict_I,
                    }
                );
                tmp['regulators_EcoCyc']=original;
                tmp['regulators_COBRA']=converted;
            #convert the regulated_entities
            if reg['regulated_entity_gene']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulated_entity_gene'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulated_entities_EcoCyc']=original;
                tmp['regulated_entities_COBRA']=converted;
            elif reg['regulated_entity_enzymaticReaction']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulated_entity_enzymaticReaction'],
                    BioCyc_components_dict_I=BioCyc_reactions_dict_I,
                    BioCyc2COBRA_func_I=BioCyc_dependencies.map_BioCycReaction2COBRA,
                    BioCyc2COBRA_params_I={
                        'COBRA_reactions_I':COBRA_reactions_I,
                        'MetaNetX_reactions_dict_I':MetaNetX_reactions_dict_I,
                        'BioCyc_reaction2Genes_dict_I':BioCyc_enzymaticReactions_dict_I,
                    }
                );
                tmp['regulated_entities_EcoCyc']=original;
                tmp['regulated_entities_COBRA']=converted;
            elif reg['regulated_entity_promoter']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulated_entity_promoter'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulated_entities_EcoCyc']=original;
                tmp['regulated_entities_COBRA']=converted;
            elif reg['regulated_entity_product']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulated_entity_product'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulated_entities_EcoCyc']=original;
                tmp['regulated_entities_COBRA']=converted;
            elif reg['regulated_entity_protein']:
                original,converted = BioCyc_dependencies.map_BioCyc2COBRA(
                    reg['regulated_entity_protein'],
                    BioCyc2COBRA_func_I=None,
                    BioCyc2COBRA_params_I={}
                );
                tmp['regulated_entities_EcoCyc']=original;
                tmp['regulated_entities_COBRA']=converted;
            #check that mappings/conversions took place
            if not tmp['regulators_EcoCyc'] or not tmp['regulated_entities_EcoCyc'] or \
                not tmp['regulators_COBRA'] or not tmp['regulated_entities_COBRA']:
                continue;
            #flatten
            EcoCyc_flattened = BioCyc_dependencies.crossMultiple_2lists(
                tmp['regulators_EcoCyc'],
                tmp['regulated_entities_EcoCyc'],
                'regulators_EcoCyc',
                'regulated_entities_EcoCyc',
            )
            COBRA_flattened = BioCyc_dependencies.crossMultiple_2lists(
                tmp['regulators_COBRA'],
                tmp['regulated_entities_COBRA'],
                'regulators_COBRA',
                'regulated_entities_COBRA',
            )
            for i in range(len(EcoCyc_flattened)):
                tmp1 = {};
                tmp1.update(EcoCyc_flattened[i])
                tmp1.update(COBRA_flattened[i])
                tmp1.update(unique)        
                regulation_O.append(tmp1);
                        
        #remove duplicate entries
        #(NOTE: only works because each dictionary is constructed identically)
        data_O = [];
        for row in regulation_O:
            if not row in data_O:
                data_O.append(row);
        return data_O;
Esempio n. 4
0
#make the COBRA table
from SBaaS_models.models_COBRA_execute import models_COBRA_execute
cobra01 = models_COBRA_execute(session,engine,pg_settings.datadir_settings);
cobra01.initialize_supportedTables()
cobra01.initialize_tables()

#make the BioCyc table
from SBaaS_models.models_BioCyc_execute import models_BioCyc_execute
biocyc01 = models_BioCyc_execute(session,engine,pg_settings.datadir_settings);
biocyc01.initialize_supportedTables()
biocyc01.initialize_tables()

#BioCyc dependencies
from SBaaS_models.models_BioCyc_dependencies import models_BioCyc_dependencies
biocyc01_dep = models_BioCyc_dependencies();

#BioCyc dependencies
from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies
cobra01_dep = models_COBRA_dependencies();

sys.path.append(pg_settings.datadir_settings['workspace']+'/sbaas_shared')
from ALEsKOs01_shared.ALEsKOs01_commonRoutines import *

iobase = base_importData();
iobase.read_json(
    pg_settings.datadir_settings['workspace_data']+\
    '/_output/BioCyc_regulation.json');
regulation_O = iobase.data;

#protein-mediated-translation-regulation not annotated