def execute_makeMetabolomicsData_intracellular(self,experiment_id_I,data_I=[],compartment_id_I='c'): '''Get the currated metabolomics data from data_stage01_quantification_averagesMIGeo''' # get rows: met_id_conv_dict = {'Hexose_Pool_fru_glc-D':['glc-D','fru-D'], 'Pool_2pg_3pg':['2pg','3pg'], '23dpg':['13dpg']}; cobradependencies = models_COBRA_dependencies(); data_O = []; if data_I: data = data_I; else: data = []; data = self.get_rows_experimentID_dataStage01AveragesMIgeo(experiment_id_I); for d in data: if d['component_group_name'] in list(met_id_conv_dict.keys()): met2conv = d['component_group_name']; for met_conv in met_id_conv_dict[met2conv]: row_tmp = copy.copy(d) row_tmp['component_group_name'] = met_conv; data_O.append(row_tmp); else: data_O.append(d); for d in data_O: d['met_id']=cobradependencies.format_metid(d['component_group_name'],compartment_id_I); d['measured']=True; d['concentration_var']=d['calculated_concentration_var']; d['concentration_lb']=d['calculated_concentration_lb']; d['concentration_ub']=d['calculated_concentration_ub']; d['concentration']=d['calculated_concentration_average']; d['concentration_units']=d['calculated_concentration_units']; d['comment_']=None; #add data to the DB self.add_dataStage03QuantificationMetabolomicsData(data_O);
def get_geneIDsAndRxnIDsAndMetIDs_modelsBioCycAndModelsCOBRA( self, pathways): #initialize supporting objects cobra01 = models_COBRA_query(self.session,self.engine,self.settings); cobra01.initialize_supportedTables(); cobra_dependencies = models_BioCyc_dependencies(); #query the pathways biocyc_pathways = self.getParsed_genesAndPathwaysAndReactions_namesAndDatabase_modelsBioCycPathways( names_I=pathways, database_I='ECOLI', query_I={}, output_O='listDict', dictColumn_I=None); genes = list(set([g['gene'] for g in biocyc_pathways if g['gene']!=''])); #join list of genes with alternative identifiers biocyc_genes = self.getParsed_genesAndAccessionsAndSynonyms_namesAndDatabase_modelsBioCycPolymerSegments( names_I=genes, database_I='ECOLI', query_I={}, output_O='listDict', dictColumn_I=None); gene_ids = list(set(genes + [g['synonym'] for g in biocyc_genes if g['synonym']])); accession_1 = list(set([g['accession_1'] for g in biocyc_genes if g['accession_1']!=''])); #Join accession_1 with COBRA reactions cobra_rxnIDs = cobra01.get_rows_modelIDAndOrderedLocusNames_dataStage02PhysiologyModelReactions( model_id_I='150526_iDM2015', ordered_locus_names_I=accession_1, query_I={}, output_O='listDict', dictColumn_I=None) rxn_ids = list(set([g['rxn_id'].replace('_reverse','') for g in cobra_rxnIDs if g['rxn_id']!=''])); #COBRA metabolites met_ids = list(set([p for g in cobra_rxnIDs if g['products_ids'] for p in g['products_ids']]+\ [p for g in cobra_rxnIDs if g['reactants_ids'] for p in g['reactants_ids']])); #deformat met_ids from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies cobra_dependencies = models_COBRA_dependencies(); met_ids_deformated = list(set([cobra_dependencies.deformat_metid(m).replace('13dpg','23dpg')\ .replace('3pg','Pool_2pg_3pg')\ .replace('glycogen','adpglc')\ .replace('uacgam','udpglcur') for m in met_ids])); #return values return gene_ids,rxn_ids,met_ids,met_ids_deformated;
def execute_thermodynamicSampling(self,simulation_id_I,models_I, data_dir_I,rxn_ids_I=[], inconsistent_dG_f_I=[],inconsistent_concentrations_I=[], inconsistent_tcc_I=[], measured_concentration_coverage_criteria_I=0.5, measured_dG_f_coverage_criteria_I=0.99, solver_I='glpk'): '''execute a thermodynamic analysis using the thermodynamic module for cobrapy Input: inconsistent_dG_f_I = dG_f measured values to be substituted for estimated values inconsistent_concentrations_I = concentration measured values to be substituted for estimated values inconsistent_tcc_I = reactions considered feasible to be changed to infeasible so that dG0_r constraints do not break the model measured_concentration_coverage_criteria_I = float, minimum concentration coverage to consider for feasibility measured_dG_f_coveragea_criteria_I = float, minimum dG_f coverage to consider for feasibility data_dir_I = directory of sampled points solver_I = string, solver name ''' modelsCOBRA = models_COBRA_dependencies(); print('execute_thermodynamicSampling...') # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationIDAndSimulationType_dataStage03QuantificationSimulation(simulation_id_I,'sampling') if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get simulation parameters simulation_parameters_all = []; simulation_parameters_all = self.get_rows_simulationID_dataStage03QuantificationSimulationParameters(simulation_id_I); if not simulation_parameters_all: print('simulation not found!') return; simulation_parameters = simulation_parameters_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models_I[simulation_info['model_id']]; # copy the model cobra_model_copy = cobra_model.copy(); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage03QuantificationMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # make the model irreversible convert_to_irreversible(cobra_model_copy); # get otherData pH,temperature,ionic_strength = {},{},{} pH,temperature,ionic_strength = self.get_rowsFormatted_experimentIDAndTimePointAndSampleNameAbbreviation_dataStage03QuantificationOtherData(simulation_info['experiment_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); # load pH, ionic_strength, and temperature parameters other_data = thermodynamics_otherData(pH_I=pH,temperature_I=temperature,ionic_strength_I=ionic_strength); other_data.check_data(); # get dG_f data: dG_f = {}; dG_f = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationDGf(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); dG_f_data = thermodynamics_dG_f_data(dG_f_I=dG_f); dG_f_data.format_dG_f(); dG_f_data.generate_estimated_dG_f(cobra_model) dG_f_data.check_data(); # remove an inconsistent dGf values if inconsistent_dG_f_I: dG_f_data.remove_measured_dG_f(inconsistent_dG_f_I) # query metabolomicsData concentrations = []; concentrations = self.get_rowsDict_experimentIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationMetabolomicsData(simulation_info['experiment_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); # load metabolomicsData metabolomics_data = thermodynamics_metabolomicsData(measured_concentrations_I=concentrations); metabolomics_data.generate_estimated_metabolomics_data(cobra_model); # remove an inconsistent concentration values if inconsistent_concentrations_I: metabolomics_data.remove_measured_concentrations(inconsistent_concentrations_I); # get dG0r, dGr, and tcc data dG0_r = {}; dG0_r = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationDG0r(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']) measured_concentration_coverage,measured_dG_f_coverage,feasible = {},{},{}; measured_concentration_coverage,measured_dG_f_coverage,feasible = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationTCC(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation'],0,0) tcc = thermodynamics_dG_r_data(dG0_r_I = dG0_r, dG_r_coverage_I = measured_dG_f_coverage, metabolomics_coverage_I = measured_concentration_coverage, thermodynamic_consistency_check_I = feasible); if inconsistent_tcc_I: tcc.change_feasibleReactions(inconsistent_tcc_I); # apply tfba constraints tfba = thermodynamics_tfba() tfba._add_conc_ln_constraints_transport(cobra_model_copy, metabolomics_data.measured_concentrations, metabolomics_data.estimated_concentrations, tcc.dG0_r, other_data.pH,other_data.temperature,tcc.metabolomics_coverage, tcc.dG_r_coverage, tcc.thermodynamic_consistency_check, measured_concentration_coverage_criteria_I, measured_dG_f_coverage_criteria_I, use_measured_concentrations=True,use_measured_dG0_r=True); # Test model if modelsCOBRA.test_model(cobra_model_I=cobra_model_copy): sampling = cobra_sampling(data_dir_I = data_dir_I); if simulation_parameters['sampler_id']=='gpSampler': filename_model = simulation_id_I + '.mat'; filename_script = simulation_id_I + '.m'; filename_points = simulation_id_I + '_points' + '.mat'; sampling.export_sampling_matlab(cobra_model=cobra_model_copy,filename_model=filename_model,filename_script=filename_script,filename_points=filename_points,\ solver_id_I = simulation_parameters['solver_id'],\ n_points_I = simulation_parameters['n_points'],\ n_steps_I = simulation_parameters['n_steps'],\ max_time_I = simulation_parameters['max_time']); elif simulation_parameters['sampler_id']=='optGpSampler': return; else: print('sampler_id not recognized'); else: print('no solution found!');
def execute_analyzeThermodynamicSamplingPoints(self,simulation_id_I,models_I, data_dir_I,data_dir_O,rxn_ids_I=[], inconsistent_dG_f_I=[],inconsistent_concentrations_I=[], inconsistent_tcc_I=[], measured_concentration_coverage_criteria_I=0.5, measured_dG_f_coverage_criteria_I=0.99, remove_pointsNotInSolutionSpace_I=True, min_pointsInSolutionSpace_I=1000): '''Load and analyze sampling points Input: inconsistent_dG_f_I = dG_f measured values to be substituted for estimated values inconsistent_concentrations_I = concentration measured values to be substituted for estimated values inconsistent_tcc_I = reactions considered feasible to be changed to infeasible so that dG0_r constraints do not break the model measured_concentration_coverage_criteria_I = float, minimum concentration coverage to consider for feasibility measured_dG_f_coveragea_criteria_I = float, minimum dG_f coverage to consider for feasibility remove_pointsNotInSolutionSpace_I = boolean, remove points not in the solution space (i.e., within the lower/upper bounds) min_pointsInSolutionSpace_I = int, minimum number of points in the solution space. if the number of points is less that the minimum, the solution space will be increased by (upper_bounds-lower_bounds)/4 until the minimum number of points is met data_dir_I = directory of sampled points data_dir_O = director to write QC'd sampled points solver_I = string, solver name ''' print('analyzing sampling points'); modelsCOBRA = models_COBRA_dependencies(); # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationIDAndSimulationType_dataStage03QuantificationSimulation(simulation_id_I,'sampling') if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get simulation parameters simulation_parameters_all = []; simulation_parameters_all = self.get_rows_simulationID_dataStage03QuantificationSimulationParameters(simulation_id_I); if not simulation_parameters_all: print('simulation not found!') return; simulation_parameters = simulation_parameters_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models_I[simulation_info['model_id']]; # copy the model cobra_model_copy = cobra_model.copy(); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage03QuantificationMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # make the model irreversible convert_to_irreversible(cobra_model_copy); # get otherData pH,temperature,ionic_strength = {},{},{} pH,temperature,ionic_strength = self.get_rowsFormatted_experimentIDAndTimePointAndSampleNameAbbreviation_dataStage03QuantificationOtherData(simulation_info['experiment_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); # load pH, ionic_strength, and temperature parameters other_data = thermodynamics_otherData(pH_I=pH,temperature_I=temperature,ionic_strength_I=ionic_strength); other_data.check_data(); # get dG_f data: dG_f = {}; dG_f = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationDGf(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); dG_f_data = thermodynamics_dG_f_data(dG_f_I=dG_f); dG_f_data.format_dG_f(); dG_f_data.generate_estimated_dG_f(cobra_model) dG_f_data.check_data(); # remove an inconsistent dGf values if inconsistent_dG_f_I: dG_f_data.remove_measured_dG_f(inconsistent_dG_f_I) # query metabolomicsData concentrations = []; concentrations = self.get_rowsDict_experimentIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationMetabolomicsData(simulation_info['experiment_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']); # load metabolomicsData metabolomics_data = thermodynamics_metabolomicsData(measured_concentrations_I=concentrations); metabolomics_data.generate_estimated_metabolomics_data(cobra_model); # remove an inconsistent concentration values if inconsistent_concentrations_I: metabolomics_data.remove_measured_concentrations(inconsistent_concentrations_I); # get dG0r, dGr, and tcc data dG0_r = {}; dG0_r = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationDG0r(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation']) measured_concentration_coverage,measured_dG_f_coverage,feasible = {},{},{}; measured_concentration_coverage,measured_dG_f_coverage,feasible = self.get_rowsDict_experimentIDAndModelIDAndTimePointAndSampleNameAbbreviations_dataStage03QuantificationTCC(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['time_point'],simulation_info['sample_name_abbreviation'],0,0) tcc = thermodynamics_dG_r_data(dG0_r_I = dG0_r, dG_r_coverage_I = measured_dG_f_coverage, metabolomics_coverage_I = measured_concentration_coverage, thermodynamic_consistency_check_I = feasible); if inconsistent_tcc_I: tcc.change_feasibleReactions(inconsistent_tcc_I); # apply tfba constraints tfba = thermodynamics_tfba() tfba._add_conc_ln_constraints_transport(cobra_model_copy, metabolomics_data.measured_concentrations, metabolomics_data.estimated_concentrations, tcc.dG0_r, other_data.pH,other_data.temperature,tcc.metabolomics_coverage, tcc.dG_r_coverage, tcc.thermodynamic_consistency_check, measured_concentration_coverage_criteria_I, measured_dG_f_coverage_criteria_I, use_measured_concentrations=True,use_measured_dG0_r=True); # Test each model if modelsCOBRA.test_model(cobra_model_I=cobra_model_copy): sampling = cobra_sampling(data_dir_I = data_dir_I,model_I = cobra_model_copy); if simulation_parameters['sampler_id']=='gpSampler': # load the results of sampling filename_points = simulation_id_I + '_points' + '.mat'; sampling.get_points_matlab(filename_points,'sampler_out'); # check if points were sampled outside the solution space if remove_pointsNotInSolutionSpace_I: pruned_reactions = sampling.remove_points_notInSolutionSpace(min_points_I=min_pointsInSolutionSpace_I); ## check if the model contains loops #sampling.simulate_loops(data_fva=settings.workspace_data + '/loops_fva_tmp.json'); #sampling.find_loops(data_fva=settings.workspace_data + '/loops_fva_tmp.json'); #sampling.remove_loopsFromPoints(); sampling.descriptive_statistics(); elif simulation_parameters['sampler_id']=='optGpSampler': return; else: print('sampler_id not recognized'); # add data to the database row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'mixed_fraction':sampling.mixed_fraction, 'data_dir':data_dir_I+'/'+filename_points, 'infeasible_loops':sampling.loops, 'used_':True, 'comment_':None }; self.add_dataStage03QuantificationSampledPoints([row]) #row = None; #row = data_stage03_quantification_sampledPoints( # simulation_id_I, # sampling.simulation_dateAndTime, # sampling.mixed_fraction, # data_dir_I+'/'+filename_points, # sampling.loops, # True, # None); #self.session.add(row); # write points to json file # add data to the database sampledData_O = []; for k,v in sampling.points_statistics.items(): type,units = tfba.get_variableTypeAndUnits(k); row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'variable_id':k, 'variable_type':type, 'variable_units':units, 'sampling_points':None, #v['points'], 'sampling_ave':v['ave'], 'sampling_var':v['var'], 'sampling_lb':v['lb'], 'sampling_ub':v['ub'], 'sampling_ci':0.95, 'sampling_min':v['min'], 'sampling_max':v['max'], 'sampling_median':v['median'], 'sampling_iq_1':v['iq_1'], 'sampling_iq_3':v['iq_3'], 'used_':True, 'comment_':None}; sampledData_O.append(row); #row = None; #row = data_stage03_quantification_sampledData( # simulation_id_I, # sampling.simulation_dateAndTime, # k, # type, # units, # None, #v['points'], # v['ave'], # v['var'], # v['lb'], # v['ub'], # v['min'], # 0.95, # v['max'], # v['median'], # v['iq_1'], # v['iq_3'], # True, # None); #self.session.add(row); self.add_dataStage03QuantificationSampledData(sampledData_O); else: print('no solution found!');
def join_BioCyc2COBRAregulationWithCOBRAinteractions(self, BioCyc2COBRA_regulation, COBRA_interaction, BioCyc_alt_id = {}, COBRA_alt_id = {}, COBRA_alt_id2 = {}, deformat_met_id_I = True ): ''' return a list mapped and unmapped components INPUT: BioCyc2COBRA_regulation = [{left:[string],right:[string],mode:[string], parent_classes:[string],mechanism:[string]}, {left_EcoCyc:[string],right_EcoCyc:[string]] COBRA_interaction = [{left:[string],right:[string],mode:[string], parent_classes:[string],mechanism:[string]}] BioCyc_alt_id = {name:{'synonym':[],'common_name':[],'accession_1':[],'accession_2':[]}} output from get_alternativeGeneIdentifiers_modelsBioCycPolymerSegments COBRA_alt_id = {rxn_id:'pathways':[],'stoichiometry':[]}} output from get_rowsDict_modelID_dataStage02PhysiologyModelPathways convert_netRxnDict2rxnNetRxnDict COBRA_alt_id2 = {bnumber:'bnumber':'','gene_name':[]}} OUTPUT: data_O = [{left:[string],right:[string],mode:[string],parent_classes:[string]}] ''' from .models_COBRA_dependencies import models_COBRA_dependencies COBRA_dependencies = models_COBRA_dependencies(); def deformatAndConvert_metID(met_id_I): met_id_O = None; if '_c' in met_id_I: met_id_O = COBRA_dependencies.deformat_metid(met_id_I)\ .replace('13dpg','23dpg')\ .replace('3pg','Pool_2pg_3pg')\ .replace('glycogen','adpglc')\ .replace('uacgam','udpglcur'); return met_id_O; data_tmp = [] #BioCyc for row in BioCyc2COBRA_regulation: if not row['used_']: continue; unique = { #'left':row['left'], #'right':row['right'], 'mode':row['mode'], 'mechanism':row['mechanism'], #'name':row['name'], 'parent_classes':row['parent_classes'] }; #BioCyc Left identifiers left_ids=[]; if type(row['left'])!=type([]) and row['left'] in BioCyc_alt_id.keys(): left_alt_ids = list(set(BioCyc_alt_id[row['left']]['common_name']+\ BioCyc_alt_id[row['left']]['synonym']+\ [row['left']])) left_ids.extend(left_alt_ids) elif type(row['left_EcoCyc'])!=type([]) and row['left_EcoCyc'] in BioCyc_alt_id.keys(): left_alt_ids = list(set(BioCyc_alt_id[row['left_EcoCyc']]['common_name']+\ BioCyc_alt_id[row['left_EcoCyc']]['synonym']+\ [row['left_EcoCyc']])) left_ids.extend(left_alt_ids) elif type(row['left'])!=type([]) and row['left'] in COBRA_alt_id.keys(): left_ids.append(row['left']) left_ids.extend(COBRA_alt_id[row['left']]['pathways']) elif row['left']: left_ids.append(row['left']) if row['left']: met_id_left = deformatAndConvert_metID(row['left']) if met_id_left: left_ids.append(met_id_left) #BioCyc Right identifiers right_ids=[]; if type(row['right'])!=type([]) and row['right'] in BioCyc_alt_id.keys(): right_alt_ids = list(set(BioCyc_alt_id[row['right']]['common_name']+\ BioCyc_alt_id[row['right']]['synonym']+\ [row['right']])) right_ids.extend(right_alt_ids) elif type(row['right_EcoCyc'])!=type([]) and row['right_EcoCyc'] in BioCyc_alt_id.keys(): right_alt_ids = list(set(BioCyc_alt_id[row['right_EcoCyc']]['common_name']+\ BioCyc_alt_id[row['right_EcoCyc']]['synonym']+\ [row['right_EcoCyc']])) right_ids.extend(right_alt_ids) elif type(row['right'])!=type([]) and row['right'] in COBRA_alt_id.keys(): right_ids.append(row['right']) right_ids.extend(COBRA_alt_id[row['right']]['pathways']) elif row['right']: right_ids.append(row['right']) if row['right']: met_id_right = deformatAndConvert_metID(row['right']) if met_id_right: right_ids.append(met_id_right) #Flatten left and right identifiers for l in left_ids: for r in right_ids: tmp = {} tmp['left'] = l; tmp['right'] = r; tmp.update(unique); data_tmp.append(tmp); #COBRA for row in COBRA_interaction: unique = { #'left':row['left'], #'right':row['right'], 'mode':row['mode'], 'mechanism':row['mechanism'], #'name':'', 'parent_classes':row['parent_classes'] }; left_ids=[]; left_ids.append(row['left']) if row['left'] in COBRA_alt_id2.keys(): left_alt_ids = list(set(COBRA_alt_id2[row['left']]['gene_name'])) left_ids.extend(left_alt_ids) met_id_left = deformatAndConvert_metID(row['left']) if met_id_left: left_ids.append(met_id_left) right_ids=[]; right_ids.append(row['right']) if row['right'] in COBRA_alt_id2.keys(): left_alt_ids = list(set(COBRA_alt_id2[row['right']]['gene_name'])) met_id_right = deformatAndConvert_metID(row['right']) if met_id_right: right_ids.append(met_id_right) #Flatten left and right identifiers for l in left_ids: for r in right_ids: tmp = {} tmp['left'] = l; tmp['right'] = r; tmp.update(unique); data_tmp.append(tmp); #remove duplicate entries #(NOTE: only works because each dictionary is constructed identically) data_O = []; for row in data_tmp: if not row in data_O: data_O.append(row); return data_O;
def execute_testMeasuredFluxes(self,experiment_id_I, models_I, ko_list_I={}, flux_dict_I={}, model_ids_I=[], sample_name_abbreviations_I=[],time_points_I=[], adjustment_1_I=True,adjustment_2_I=True,diagnose_I=False, update_measuredFluxes_I=False): '''Test each model constrained to the measure fluxes''' cobradependencies = models_COBRA_dependencies(); diagnose_variables_O = {}; flux_dict_O = []; test_O = []; # get the model ids: if model_ids_I: model_ids = model_ids_I; else: model_ids = []; model_ids = self.get_modelID_experimentID_dataStage02PhysiologySimulation(experiment_id_I); for model_id in model_ids: diagnose_variables_O[model_id] = {}; cobra_model_base = models_I[model_id]; print('testing model ' + model_id); # get sample names and sample name abbreviations if sample_name_abbreviations_I: sample_name_abbreviations = sample_name_abbreviations_I; else: sample_name_abbreviations = []; sample_name_abbreviations = self.get_sampleNameAbbreviations_experimentIDAndModelID_dataStage02PhysiologySimulation(experiment_id_I,model_id); for sna_cnt,sna in enumerate(sample_name_abbreviations): diagnose_variables_O[model_id][sna] = {}; print('testing sample_name_abbreviation ' + sna); # get the time_points if time_points_I: time_points = time_points_I; else: time_points = []; time_points = self.get_timePoints_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage02PhysiologySimulation(experiment_id_I,model_id,sna) for tp in time_points: diagnose_variables_O[model_id][sna][tp] = {'bad_lbub_1':None,'bad_lbub_2':None}; print('testing time_point ' + tp); # get the flux data if flux_dict_I: flux_dict = flux_dict_I else: flux_dict = {}; flux_dict = self.get_fluxDict_experimentIDAndModelIDAndSampleNameAbbreviationsAndTimePoint_dataStage03QuantificationMeasuredFluxes(experiment_id_I,model_id,sna,tp); # get the ko list if ko_list_I: ko_list = ko_list_I; else: ko_list = []; # copy the cobra_model cobra_model = cobra_model_base.copy(); # check each flux bounds if diagnose_I: # record the variables summary_O = cobradependencies.diagnose_modelLBAndUB(cobra_model,ko_list,flux_dict, adjustment_1_I=adjustment_1_I,adjustment_2_I=adjustment_2_I) diagnose_variables_O[model_id][sna][tp]=summary_O; diagnose_variables_O[model_id][sna][tp]['flux_dict']=flux_dict; for rxn_id,d in list(flux_dict.items()): #if rxn_id in summary_O['bad_lbub_1'] or rxn_id in summary_O['bad_lbub_2']: # comment_ = 'adjusted'; #else: # comment_ = None; tmp = {'experiment_id':experiment_id_I, 'model_id':model_id, 'sample_name_abbreviation':sna, 'time_point':tp, 'rxn_id':rxn_id, 'flux_average':d['flux'], 'flux_stdev':d['stdev'], 'flux_lb':d['lb'], 'flux_ub':d['ub'], 'flux_units':d['units'], 'used_':d['used_'], 'comment_':d['comment_']} flux_dict_O.append(tmp); else: # test and constrain each model test = False; test = cobradependencies.test_model(cobra_model_I=cobra_model,ko_list=ko_list,flux_dict=flux_dict,description=None); test_O.append(test); if diagnose_I and update_measuredFluxes_I: #update measuredFluxes self.update_unique_dataStage03QuantificationMeasuredFluxes(flux_dict_O); return diagnose_variables_O; elif diagnose_I: return diagnose_variables_O; else: return test_O;
def execute_fba(self,simulation_id_I, rxn_ids_I=[], models_I = {}, method_I='fba', options_I = {}, allow_loops_I=True, solver_id_I = 'cglpk'): ''' INPUT: simulation_id = string rxn_ids_I = [{}], specifying specifc rxn ub and lb e.g., [{'rxn_id':string, 'rxn_lb':float, 'rxn_ub':float},...] models_I = {} of model_id:cobra_model method_I = string, method to use for optimization options_I = {}, optional optimization parameters allow_loops_I = boolean, False: loop-law will be applied prior to calculation default: True solver_id_I = string, solver to use for optimization OUTPUT: ''' print('executing fba...'); # input: modelsCOBRA = models_COBRA_dependencies(); models = models_I; # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationID_dataStage02PhysiologySimulation(simulation_id_I); if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models[simulation_info['model_id']]; # copy the model cobra_model_copy = cobra_model.copy(); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage02PhysiologyMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # Test model if modelsCOBRA.test_model(cobra_model_I=cobra_model_copy): simulated_data = cobra_simulatedData(); simulated_data.generate_fba_data( cobra_model_copy, solver=solver_id_I, allow_loops=allow_loops_I, method_I=method_I ); # perform flux variability analysis #add data to the DB data_O = []; for k,v in simulated_data.fba_primal_data.items(): data_tmp = { 'simulation_id':simulation_id_I, 'simulation_dateAndTime':datetime.datetime.now(), 'rxn_id':k, 'fba_flux':v, 'fba_method':method_I, 'allow_loops':allow_loops_I, 'fba_options':options_I, 'solver_id':solver_id_I, 'flux_units':'mmol*gDCW-1*hr-1', 'used_':True, 'comment_':None}; data_O.append(data_tmp); self.add_dataStage02PhysiologySimulatedData('data_stage02_physiology_simulatedData_fbaPrimal',data_O); data_O = []; for k,v in simulated_data.fba_dual_data.items(): data_tmp = { 'simulation_id':simulation_id_I, 'simulation_dateAndTime':datetime.datetime.now(), 'met_id':k, 'fba_shadowPrice':v, 'fba_method':method_I, 'allow_loops':allow_loops_I, 'fba_options':options_I, 'solver_id':solver_id_I, 'flux_units':'mmol*gDCW-1*hr-1', 'used_':True, 'comment_':None}; data_O.append(data_tmp); self.add_dataStage02PhysiologySimulatedData('data_stage02_physiology_simulatedData_fbaDual',data_O); else: print('no solution found!');
def execute_testConstraintsCumulative(self,simulation_id_I, rxn_ids_I=[], models_I = {}, solver_id_I = 'cglpk', gr_check_I = None, diagnose_threshold_I=0.98, diagnose_break_I=0.1): ''' INPUT: simulation_id = string rxn_ids_I = [{}], specifying specifc rxn ub and lb e.g., [{'rxn_id':string, 'rxn_lb':float, 'rxn_ub':float},...] models_I = {} of model_id:cobra_model method_I = string, method to use for optimization solver_id_I = string, solver to use for optimization gr_check_I = float, growth rate to use for comparison (default=None) diagnose_threshold_I = % of orginal growth rate to flag a constrain diagnose_break_I = % of original growth rate to stop the diagnosis OUTPUT: data_O = constraints that break the model ''' print('executing cumulative contraint test...'); data_O=[]; # input: modelsCOBRA = models_COBRA_dependencies(); models = models_I; # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationID_dataStage02PhysiologySimulation(simulation_id_I); if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models[simulation_info['model_id']]; if gr_check_I: gr_check = gr_check_I; else: cobra_model.optimize(solver=solver_id_I); gr_check = cobra_model.solution.f; print('original model growth rate = ' + str(gr_check)); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage02PhysiologyMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); #check 1: check cumulative constraints # copy the model cobra_model_copy = cobra_model.copy(); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # Test model cobra_model_copy.optimize(solver=solver_id_I); if not cobra_model_copy.solution.f: print('model broken by ' + rxn['rxn_id']); tmp = {}; tmp['gr']=0.0; tmp.update(rxn) data_O.append(tmp); break; elif cobra_model_copy.solution.f <= diagnose_break_I*gr_check: print('diagnose_break limit exceeded by ' + rxn['rxn_id']); tmp = {}; tmp['gr']=cobra_model_copy.solution.f; tmp.update(rxn) data_O.append(tmp); break; elif cobra_model_copy.solution.f <= diagnose_threshold_I*gr_check: print('diagnose_threshold limit exceeded by ' + rxn['rxn_id']); tmp = {}; tmp['gr']=cobra_model_copy.solution.f; tmp.update(rxn) data_O.append(tmp); else: print('contrained model growth rate = ' + str(cobra_model_copy.solution.f) + ' for rxn_id: ' + rxn['rxn_id']); return data_O;
def execute_analyzeSamplingPoints(self,simulation_id_I, rxn_ids_I=[], data_dir_I='C:/Users/dmccloskey-sbrg/Documents/MATLAB/sampling_physiology', models_I = {}, points_overview_I=True, flux_stats_I=True, metabolite_stats_I=False, subsystem_stats_I=False, ): '''Load and analyze sampling points INPUT: simulation_id_I = rxn_ids_I = [] of strings, list of reaction ids data_dir_I = string, directory of the sampled points file models_I = {} string:cobra_model object, model_id:cobra_model points_overview_I = boolean, if True, the sampled points overview will be added to the database flux_stats_I = boolean, if True, sampled points descriptive statistics will be calcluated metabolite_stats_I = boolean, if True, sampled points will be converted to metabolite flux sum values and descriptive statistics will be calculated subsystem_stats_I = boolean, if True, sampled points will be converted to subsystem flux sum values and descriptive statistics will be calculated OUTPUT: ''' print('analyzing sampling points'); modelsCOBRA = models_COBRA_dependencies(); data_dir = data_dir_I; models = models_I; # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationIDAndSimulationType_dataStage02PhysiologySimulation(simulation_id_I,'sampling'); if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get simulation parameters simulation_parameters_all = []; simulation_parameters_all = self.get_rows_simulationID_dataStage02PhysiologySamplingParameters(simulation_id_I); if not simulation_parameters_all: print('simulation not found!') return; simulation_parameters = simulation_parameters_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models[simulation_info['model_id']]; # copy the model cobra_model_copy = cobra_model.copy(); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage02PhysiologyMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # Test each model if modelsCOBRA.test_model(cobra_model_I=cobra_model_copy): if simulation_parameters['sampler_id']=='gpSampler': sampling = matlab_sampling(data_dir_I = data_dir); # load the results of sampling filename_points = simulation_id_I + '_points' + '.mat'; sampling.get_points_matlab(filename_points,'sampler_out'); elif simulation_parameters['sampler_id']=='optGpSampler': sampling = optGpSampler_sampling( data_dir_I = data_dir, model_I=cobra_model_copy); filename_points = simulation_id_I + '_points' + '.json'; filename_warmup = simulation_id_I + '_warmup' + '.json'; sampling.get_points_json(filename_points); sampling.get_warmup_json(filename_warmup); sampling.calculate_mixFraction(); else: print('sampler_id not recognized'); return; # check if the model contains loops #loops_bool = self.sampling.check_loops(); sampling.simulate_loops( data_fva=self.settings['workspace_data'] + '/loops_fva_tmp.json', solver_I = simulation_parameters['solver_id']); sampling.find_loops(data_fva=self.settings['workspace_data'] + '/loops_fva_tmp.json'); sampling.remove_loopsFromPoints(); # add points overview to the database if points_overview_I: row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'mixed_fraction':sampling.mixed_fraction, 'data_dir':data_dir_I+'/'+filename_points, 'infeasible_loops':sampling.loops, 'used_':True, 'comment_':None }; self.add_dataStage02PhysiologySampledPoints([row]) # calculate the flux descriptive statistics if flux_stats_I: sampling.descriptive_statistics(points_I='flux'); # add data to the database sampledData_O = []; for k,v in sampling.points_statistics.items(): row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'rxn_id':k, 'flux_units':'mmol*gDW-1*hr-1', 'sampling_points':None, #v['points'], 'sampling_n':v['n'], 'sampling_ave':v['ave'], 'sampling_var':v['var'], 'sampling_lb':v['lb'], 'sampling_ub':v['ub'], 'sampling_ci':0.95, 'sampling_min':v['min'], 'sampling_max':v['max'], 'sampling_median':v['median'], 'sampling_iq_1':v['iq_1'], 'sampling_iq_3':v['iq_3'], 'used_':True, 'comment_':None}; sampledData_O.append(row); self.add_rows_table('data_stage02_physiology_sampledData',sampledData_O); # calculate descriptive stats for metabolites if metabolite_stats_I: sampling.convert_points2MetabolitePoints(); sampling.descriptive_statistics(points_I='metabolite'); # add data to the database sampledData_O = []; for k,v in sampling.points_statistics.items(): row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'met_id':k, 'flux_units':'mmol*gDW-1*hr-1_metSum', 'sampling_points':None, #v['points'], 'sampling_n':v['n'], 'sampling_ave':v['ave'], 'sampling_var':v['var'], 'sampling_lb':v['lb'], 'sampling_ub':v['ub'], 'sampling_ci':0.95, 'sampling_min':v['min'], 'sampling_max':v['max'], 'sampling_median':v['median'], 'sampling_iq_1':v['iq_1'], 'sampling_iq_3':v['iq_3'], 'used_':True, 'comment_':None}; sampledData_O.append(row); self.add_rows_table('data_stage02_physiology_sampledMetaboliteData',sampledData_O); # calculate descriptive stats for subsystems if subsystem_stats_I: sampling.convert_points2SubsystemPoints(); sampling.descriptive_statistics(points_I='subsystem'); # add data to the database sampledData_O = []; for k,v in sampling.points_statistics.items(): row = {'simulation_id':simulation_id_I, 'simulation_dateAndTime':sampling.simulation_dateAndTime, 'subsystem_id':k, 'flux_units':'mmol*gDW-1*hr-1_subsystemSum', 'sampling_points':None, #v['points'], 'sampling_n':v['n'], 'sampling_ave':v['ave'], 'sampling_var':v['var'], 'sampling_lb':v['lb'], 'sampling_ub':v['ub'], 'sampling_ci':0.95, 'sampling_min':v['min'], 'sampling_max':v['max'], 'sampling_median':v['median'], 'sampling_iq_1':v['iq_1'], 'sampling_iq_3':v['iq_3'], 'used_':True, 'comment_':None}; sampledData_O.append(row); self.add_rows_table('data_stage02_physiology_sampledSubsystemData',sampledData_O); else: print('no solution found!');
def execute_sampling(self,simulation_id_I, rxn_ids_I=[], data_dir_I='C:/Users/dmccloskey-sbrg/Documents/MATLAB/sampling_physiology', models_I = {}, ): '''Sample a specified model that is constrained to measured physiological data INPUT: OUTPUT: ''' print('executing sampling...'); # input modelsCOBRA = models_COBRA_dependencies(); data_dir = data_dir_I; models = models_I; # get simulation information simulation_info_all = []; simulation_info_all = self.get_rows_simulationID_dataStage02PhysiologySimulation(simulation_id_I); if not simulation_info_all: print('simulation not found!') return; simulation_info = simulation_info_all[0]; # unique constraint guarantees only 1 row will be returned # get simulation parameters simulation_parameters_all = []; simulation_parameters_all = self.get_rows_simulationID_dataStage02PhysiologySamplingParameters(simulation_id_I); if not simulation_parameters_all: print('simulation parameters not found!') return; simulation_parameters = simulation_parameters_all[0]; # unique constraint guarantees only 1 row will be returned # get the cobra model cobra_model = models[simulation_info['model_id']]; # copy the model cobra_model_copy = cobra_model.copy(); # get rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rows_experimentIDAndModelIDAndSampleNameAbbreviation_dataStage02PhysiologyMeasuredFluxes(simulation_info['experiment_id'],simulation_info['model_id'],simulation_info['sample_name_abbreviation']); for rxn in rxn_ids: # constrain the model cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).lower_bound = rxn['flux_lb']; cobra_model_copy.reactions.get_by_id(rxn['rxn_id']).upper_bound = rxn['flux_ub']; # Test model if modelsCOBRA.test_model(cobra_model_I=cobra_model_copy): if simulation_parameters['sampler_id']=='gpSampler': sampling = matlab_sampling(data_dir_I = data_dir); filename_model = simulation_id_I + '.mat'; filename_script = simulation_id_I + '.m'; filename_points = simulation_id_I + '_points' + '.mat'; sampling.export_sampling_matlab(cobra_model=cobra_model_copy, filename_model=filename_model, filename_script=filename_script, filename_points=filename_points, solver_id_I = simulation_parameters['solver_id'], n_points_I = simulation_parameters['n_points'], n_steps_I = simulation_parameters['n_steps'], max_time_I = simulation_parameters['max_time']); elif simulation_parameters['sampler_id']=='optGpSampler': sampling = optGpSampler_sampling(data_dir_I = data_dir); filename_model = simulation_id_I + '.json'; filename_script = simulation_id_I + '.py'; filename_points = simulation_id_I + '_points' + '.json'; filename_warmup = simulation_id_I + '_warmup' + '.json'; sampling.export_sampling_optGpSampler(cobra_model=cobra_model_copy, filename_model=filename_model, filename_script=filename_script, filename_points=filename_points, filename_warmup=filename_warmup, solver_id_I = simulation_parameters['solver_id'], n_points_I = simulation_parameters['n_points'], n_steps_I = simulation_parameters['n_steps'], n_threads_I = simulation_parameters['n_threads']); #sampling.generate_samples(cobra_model=cobra_model_copy, # filename_points=filename_points, # solver_id_I = simulation_parameters['solver_id'], # n_points_I = simulation_parameters['n_points'], # n_steps_I = simulation_parameters['n_steps']); else: print('sampler_id not recognized'); else: print('no solution found!');
cobra01.initialize_supportedTables() cobra01.initialize_tables() #make the BioCyc table from SBaaS_models.models_BioCyc_execute import models_BioCyc_execute biocyc01 = models_BioCyc_execute(session,engine,pg_settings.datadir_settings); biocyc01.initialize_supportedTables() biocyc01.initialize_tables() #BioCyc dependencies from SBaaS_models.models_BioCyc_dependencies import models_BioCyc_dependencies biocyc01_dep = models_BioCyc_dependencies(); #BioCyc dependencies from SBaaS_models.models_COBRA_dependencies import models_COBRA_dependencies cobra01_dep = models_COBRA_dependencies(); sys.path.append(pg_settings.datadir_settings['workspace']+'/sbaas_shared') from ALEsKOs01_shared.ALEsKOs01_commonRoutines import * iobase = base_importData(); iobase.read_json( pg_settings.datadir_settings['workspace_data']+\ '/_output/BioCyc_regulation.json'); regulation_O = iobase.data; #protein-mediated-translation-regulation not annotated gmr_str = 'fusA,rplA,rplE,rplF,rplJ,rplK,rplL,rplN,rplO,rplR,rplX,rpmD,rpmJ,rpoB,rpoC,rpsB,rpsE,rpsG,rpsH,rpsL,rpsN,secY,tsf,tufA' gmr = gmr_str.split(','); BioCyc_regulation_1reg = biocyc01_dep.filter_singleRegulatorGenes_BioCycRegulation( regulation_O,