def make_13CEnsemble(self,formula_str_I): '''Make formula for m + 0 to m + # carbons''' # input: # formula_str_I = string of formula # output: # mass_ensemble_O = ensemble of distributions formula_str = re.sub('[+-]', '', formula_str_I) # remove '-' or '+' formula = Formula(formula_str); mass_ensemble_O = {}; if 'C' in formula._elements: nC = formula._elements['C'][0]; # count the number of carbon; for c in range(nC+1): tmp = Formula(formula_str); if c==0:tmp._elements['C'] = {0:nC-c}; elif nC-c==0:tmp._elements['C'] = {13:c}; else:tmp._elements['C'] = {0:nC-c, 13:c}; mass_ensemble_O[c] = Formula(tmp.formula); return mass_ensemble_O; else: nC = 0; mass_ensemble_O = {0:formula}; return mass_ensemble_O
def execute_scheduledMRMPro_quant(self,met_ids_I): '''generate the MRMs for each compound for the scheduled MRM pro acquisition method''' # input: # met_ids_I = [{'met_id': , 'precursor_formula':, 'product_formula':},] # ouptput: # dictionary of MRMs mrms_O = []; # loop over each met_id for met in met_ids_I: # query transition from the tuning method trans = self.get_row_MSComponents_metIDAndFormula(met['met_id'],met['precursor_formula'],met['product_formula'],'tuning'); transUC13 = trans.copy(); # make component_name, group_name trans['component_name'] = met['met_id'] + '.' + met['met_id'] + '_' + str(trans['quantifier']) + '.Light'; trans['ms_group'] = met['met_id']; trans['ms_methodtype'] = 'quantification'; # make UC13 component_name, group_name transUC13['component_name'] = met['met_id'] + '.' + met['met_id'] + '_' + str(trans['quantifier']) + '.Heavy'; transUC13['met_ID'] = met['met_id'] + '-UC13'; transUC13['met_name'] = transUC13['met_name'] + '-UC13'; transUC13['ms_group'] = met['met_id'] + '-UC13'; transUC13['ms_methodtype'] = 'quantification'; # make UC13 equivalent: q1/q3_mass, precursor/product_formula, precursor/product_exactmass if trans['precursor_formula']: trans_precursor_formula = Formula(re.sub('[+-]', '', trans['precursor_formula'])) # remove '-' or '+' trans['precursor_formula'] = trans_precursor_formula.formula + trans['ms_mode']; trans['precursor_exactmass'] = trans_precursor_formula.isotope.mass; if 'C' in list(trans_precursor_formula._elements.keys()): nC = trans_precursor_formula._elements['C'][0]; tmp = Formula(trans_precursor_formula.formula); tmp._elements['C'] = {13:nC}; transUC13_precursor_formula = Formula(tmp.formula); else: transUC13_precursor_formula = trans_precursor_formula; transUC13['precursor_formula'] = transUC13_precursor_formula.formula + trans['ms_mode']; transUC13['precursor_exactmass'] = transUC13_precursor_formula.isotope.mass; # substitute for algorithm that checks for unique q1_masses # therefore, must ensure that each q1_mass/q3_mass is unique for a given mode in ms_components transUC13['q1_mass'] = trans['q1_mass'] + transUC13_precursor_formula.isotope.mass - trans_precursor_formula.isotope.mass; if trans['product_formula']: trans_product_formula = Formula(re.sub('[+-]', '', trans['product_formula'])) # remove '-' or '+' trans['product_formula'] = trans_product_formula.formula + trans['ms_mode']; trans['product_exactmass'] = trans_product_formula.isotope.mass; if 'C' in list(trans_product_formula._elements.keys()): nC = trans_product_formula._elements['C'][0]; tmp = Formula(trans_product_formula.formula); tmp._elements['C'] = {13:nC}; transUC13_product_formula = Formula(tmp.formula); else: transUC13_product_formula = trans_product_formula; transUC13['product_formula'] = transUC13_product_formula.formula + trans['ms_mode']; transUC13['product_exactmass'] = transUC13_product_formula.isotope.mass; # substitute for algorithm that checks for unique q1_masses # therefore, must ensure that each q1_mass/q3_mass is unique for a given mode in ms_components transUC13['q3_mass'] = trans['q3_mass'] + transUC13_product_formula.isotope.mass - trans_product_formula.isotope.mass; # set defaults: window = 120 sec, dwell = 1, priority, ms_include = False mrms_O.append(trans); mrms_O.append(transUC13); self.add_MSComponents(mrms_O);
def _parse_model_sbml(self,model_id_I,date_I,filename_I): # Read in the sbml file and define the model conditions cobra_model = create_cobra_model_from_sbml_file(filename_I, print_time=True) model_data = []; model_data_tmp = {}; # parse out model metadata model_data_tmp['model_id'] = model_id_I; model_data_tmp['model_name'] = None; model_data_tmp['date'] = date_I; model_data_tmp['model_description'] = cobra_model.description; with open(filename_I, 'r') as f: model_data_tmp['model_file'] = f.read(); model_data_tmp['file_type'] = 'sbml' model_data.append(model_data_tmp) reaction_data = []; # parse out reaction data for r in cobra_model.reactions: reaction_data_dict = {}; reaction_data_dict['model_id'] = model_id_I reaction_data_dict['rxn_id'] = r.id reaction_data_dict['rxn_name'] = r.name reaction_data_dict['equation'] = r.build_reaction_string() reaction_data_dict['subsystem'] = r.subsystem reaction_data_dict['gpr'] = r.gene_reaction_rule reaction_data_dict['genes'] = [] genes = r.genes; for g in genes: reaction_data_dict['genes'].append(g.id); reaction_data_dict['reactants_stoichiometry'] = []; reaction_data_dict['reactants_ids'] = []; reactants = r.reactants; for react in reactants: reaction_data_dict['reactants_stoichiometry'].append(r.get_coefficient(react.id)); reaction_data_dict['reactants_ids'].append(react.id); reaction_data_dict['products_stoichiometry'] = []; reaction_data_dict['products_ids'] = []; products = r.products; for prod in products: reaction_data_dict['products_stoichiometry'].append(r.get_coefficient(prod.id)); reaction_data_dict['products_ids'].append(prod.id); reaction_data_dict['lower_bound'] = r.lower_bound reaction_data_dict['upper_bound'] = r.upper_bound reaction_data_dict['objective_coefficient'] = r.objective_coefficient reaction_data_dict['flux_units'] = 'mmol*gDW-1*hr-1' reaction_data_dict['reversibility'] = r.reversibility #reaction_data_dict['reactants_stoichiometry_tracked'] = None; #reaction_data_dict['products_stoichiometry_tracked'] = None; #reaction_data_dict['reactants_ids_tracked'] = None; #reaction_data_dict['products_ids_tracked'] = None; #reaction_data_dict['reactants_mapping'] = None; #reaction_data_dict['products_mapping'] = None; #reaction_data_dict['rxn_equation'] = None; reaction_data_dict['fixed'] = None; reaction_data_dict['free'] = None; reaction_data_dict['weight'] = None; reaction_data_dict['used_'] = True reaction_data_dict['comment_'] = None; reaction_data.append(reaction_data_dict); metabolite_data = []; # parse out metabolite data for met in cobra_model.metabolites: metabolite_data_tmp = {}; metabolite_data_tmp['model_id'] = model_id_I metabolite_data_tmp['met_name'] = met.name; metabolite_data_tmp['met_id'] = met.id; formula = {}; for k,v in met.formula.elements.items(): formula[k] = {0:v}; tmp = Formula() tmp._elements=formula metabolite_data_tmp['formula'] = tmp.formula; metabolite_data_tmp['charge'] = met.charge metabolite_data_tmp['compartment'] = met.compartment metabolite_data_tmp['bound'] = met._bound metabolite_data_tmp['constraint_sense'] = met._constraint_sense #metabolite_data_tmp['met_elements'] = None; #metabolite_data_tmp['met_atompositions'] = None; #metabolite_data_tmp['balanced'] = None; #metabolite_data_tmp['met_symmetry'] = None; #metabolite_data_tmp['met_symmetry_atompositions'] = None; metabolite_data_tmp['used_'] = True metabolite_data_tmp['comment_'] = None; metabolite_data.append(metabolite_data_tmp); return model_data,reaction_data,metabolite_data
def _parse_json_sbml_cobra_model(cobra_model, model_id, date, model_file_name, filetype): """ Helper function for parse_cobra_model(), parses reaction- and metabolite information out of an already loaded cobra model Parameters ---------- cobra_model : cobra.Model Cobra metabolic model as loaded by the file type specific import function model_id : str Name of the model (for downstream reference) date : str Date of model processing (for downstream reference) model_file_name : str or path + str Filename or path to file + filename of the cobra metabolic model filetype : str Extension of the provided file Returns ------- model_data : pandas.DataFrame General information about the processed metabolic model reaction_data : pandas.DataFrame Information about the reactions in the metabolic model metabolite_data : pandas.DataFrame Information about the metabolites in the metabolic model """ # Pre-process the model file information with open(model_file_name, "r", encoding="utf-8") as f: model_file = f.read() # parse out model data model_data = pd.DataFrame( { "model_id": model_id, "date": date, "model_description": cobra_model.description, "model_file": model_file, "file_type": filetype, }, index=[0], ) # parse out reaction data reaction_data_temp = {} for cnt, r in enumerate(cobra_model.reactions): reaction_data_dict = { "model_id": model_id, "rxn_id": r.id, "rxn_name": r.name, "equation": r.build_reaction_string(), "subsystem": r.subsystem, "gpr": r.gene_reaction_rule, "genes": [g.id for g in r.genes], "reactants_stoichiometry": [r.get_coefficient(react.id) for react in r.reactants], "reactants_ids": [react.id for react in r.reactants], "products_stoichiometry": [r.get_coefficient(prod.id) for prod in r.products], "products_ids": [prod.id for prod in r.products], "lower_bound": r.lower_bound, "upper_bound": r.upper_bound, "objective_coefficient": r.objective_coefficient, "flux_units": "mmol*gDW-1*hr-1", "reversibility": r.reversibility, "used_": True, } reaction_data_temp[cnt] = reaction_data_dict reaction_data = pd.DataFrame.from_dict(reaction_data_temp, "index") # parse out metabolite data metabolite_data_tmp = {} for cnt, met in enumerate(cobra_model.metabolites): # Pre-process formulas using FIA-MS database methods if is_valid(met): formula = Formula(met.formula) formula = str(formula) else: formula = None # set up part of temp dict to transform into df later metabolite_data_dict = { "model_id": model_id, "met_name": met.name, "met_id": met.id, "formula": formula, "charge": met.charge, "compartment": met.compartment, "bound": met._bound, "annotations": met.annotation, "used_": True, } metabolite_data_tmp[cnt] = metabolite_data_dict metabolite_data = pd.DataFrame.from_dict(metabolite_data_tmp, "index") return model_data, reaction_data, metabolite_data
def sort_parameter_info(self, f_par_info, simulation_info, fittedData): """ Seperate the information from the original input, the "get_fitted_parameters(f, simulation_info)" function, the "extract_file_info(filename)" function and the "extract_base_stats(f, simulation_id, info)" function into appropriate rows Parameters ---------- f_par_info : dict output of the get_fitted_parameters(f, simulation_info) function simulation_info : pandas.DataFrame the MS fragment file corresponding to the simulation fittedData : pandas.DataFrame the output of the "extract_base_stats(f, simulation_id, info)" function Returns ------- fittedFluxes : pandas.DataFrame info about the parameters of the fluxes used as an input for the simulation fittedFragments : pandas.DataFrame info about the parameters of the MS data used as an input for the simulation """ fittedFluxes = {} fittedFragments = {} rxn_id = f_par_info["rxn_id"] flux = f_par_info["flux"] flux_stdev = f_par_info["flux_stdev"] par_type = f_par_info["par_type"] flux_lb = f_par_info["flux_lb"] flux_ub = f_par_info["flux_ub"] flux_units = f_par_info["flux_units"] fit_alf = f_par_info["fit_alf"] free = f_par_info["free"] # f_par_chi2s = f_par_info["fit_chi2s"] # f_par_cor = f_par_info["fit_cor"] # f_par_cov = f_par_info["fit_cov"] simulation_id = fittedData["simulation_id"].unique()[0] simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique( )[0] for cnt, p_type in enumerate(par_type): if p_type == "Net flux": fittedFluxes[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "rxn_id": rxn_id[cnt], "flux": flux[cnt], "flux_stdev": flux_stdev[cnt], "flux_lb": flux_lb[cnt], "flux_ub": flux_ub[cnt], "flux_units": flux_units[cnt], "fit_alf": fit_alf[cnt], "fit_chi2s": None, "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } elif p_type == "Norm": # parse the id id_list = rxn_id[cnt].split(" ") expt = id_list[0] fragment_id = id_list[1] fragment_string = id_list[2] units = id_list[3] # parse the id into fragment_id and mass fragment_string = re.sub("_DASH_", "-", fragment_string) fragment_string = re.sub("_LPARANTHES_", "[(]", fragment_string) fragment_string = re.sub("_RPARANTHES_", "[)]", fragment_string) fragment_list = fragment_string.split("_") if not len(fragment_list) > 5 or not ("MRM" in fragment_list or "EPI" in fragment_list): fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[3]) time_point = fragment_list[4] else: fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[4]) time_point = fragment_list[5] if expt in list(simulation_info["experiment_id"]): fittedFragments[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": expt, "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "fit_val": flux[cnt], "fit_stdev": flux_stdev[cnt], "fit_units": units, "fit_alf": fit_alf[cnt], "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } elif expt in list(simulation_info["sample_name_abbreviation"]): fittedFragments[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": expt, "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "fit_val": flux[cnt], "fit_stdev": flux_stdev[cnt], "fit_units": units, "fit_alf": fit_alf[cnt], "fit_cor": None, "fit_cov": None, "free": free[cnt], "used_": True, "comment_": None, } else: print("type not recognized") fittedFluxes = pd.DataFrame.from_dict(fittedFluxes, "index") fittedFragments = pd.DataFrame.from_dict(fittedFragments, "index") return fittedFluxes, fittedFragments
def sort_residual_info(self, f_mnt_res_info, simulation_info, fittedData): """ Seperate the information from the original input, the "get_residuals_info(f)" function, the "extract_file_info(filename)" function and the "extract_base_stats(f, simulation_id, info)" function into appropriate rows Parameters ---------- f_mnt_res_info : dict the output of the "get_residuals_info(f)" function simulation_info : pandas.DataFrame the MS fragment file corresponding to the simulation fittedData : pandas.DataFrame the output of the "extract_base_stats(f, simulation_id, info)" function Returns ------- fittedMeasuredFluxResiduals : pandas.DataFrame info about the residuals of the fluxes used as an input for the simulation fittedMeasuredFragmentResiduals : pandas.DataFrame info about the residuals of the fragments in the MS data used as an input for the simulation """ fittedMeasuredFluxResiduals = {} fittedMeasuredFragmentResiduals = {} expt_type = f_mnt_res_info["expt_type"] experiment_id = f_mnt_res_info["experiment_id"] time_point = f_mnt_res_info["time_point"] rxn_id = f_mnt_res_info["rxn_id"] res_data = f_mnt_res_info["res_data"] res_fit = f_mnt_res_info["res_fit"] res_peak = f_mnt_res_info["res_peak"] res_stdev = f_mnt_res_info["res_stdev"] res_val = f_mnt_res_info["res_val"] simulation_id = fittedData["simulation_id"].unique()[0] simulation_dateAndTime = fittedData["simulation_dateAndTime"].unique( )[0] for cnt, x_type in enumerate(expt_type): if x_type == "Flux": if experiment_id[cnt] in list( simulation_info["experiment_id"]): fittedMeasuredFluxResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": experiment_id[cnt], "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point[cnt], "rxn_id": rxn_id[cnt], "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif experiment_id[cnt] in list( simulation_info["sample_name_abbreviation"]): fittedMeasuredFluxResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": experiment_id[cnt], "time_point": time_point[cnt], "rxn_id": rxn_id[cnt], "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif x_type == "MS": # parse the id into fragment_id and mass fragment_string = rxn_id[cnt] fragment_string = re.sub("_DASH_", "-", fragment_string) fragment_string = re.sub("_LPARANTHES_", "[(]", fragment_string) fragment_string = re.sub("_RPARANTHES_", "[)]", fragment_string) fragment_list = fragment_string.split("_") if not len(fragment_list) > 5 or not ("MRM" in fragment_list or "EPI" in fragment_list): fragment_id = "_".join( [fragment_list[0], fragment_list[1], fragment_list[2]]) fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[3]) time_point = fragment_list[4] else: fragment_id = "_".join([ fragment_list[0], fragment_list[1], fragment_list[2], fragment_list[3], ]) fragment_mass = Formula(fragment_list[2]).mass + float( fragment_list[4]) time_point = fragment_list[5] fragment_id = re.sub("-", "_DASH_", fragment_id) fragment_id = re.sub("[(]", "_LPARANTHES_", fragment_id) fragment_id = re.sub("[)]", "_RPARANTHES_", fragment_id) if experiment_id[cnt] in list( simulation_info["experiment_id"]): fittedMeasuredFragmentResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": experiment_id[cnt], "sample_name_abbreviation": simulation_info["sample_name_abbreviation"][0], "time_point": time_point, "fragment_id": fragment_id, "fragment_mass": fragment_mass, "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } elif experiment_id[cnt] in list( simulation_info["sample_name_abbreviation"]): fittedMeasuredFragmentResiduals[cnt] = { "simulation_id": simulation_id, "simulation_dateAndTime": simulation_dateAndTime, "experiment_id": simulation_info["experiment_id"][0], "sample_name_abbreviation": experiment_id[cnt], "time_point": time_point[cnt], "fragment_id": fragment_id, "fragment_mass": fragment_mass, "res_data": float(res_data[cnt]), "res_fit": float(res_fit[cnt]), "res_peak": res_peak[cnt], "res_stdev": float(res_stdev[cnt]), "res_val": float(res_val[cnt]), "res_msens": None, "res_esens": None, "used_": True, "comment_": None, } else: print("type not recognized") fittedMeasuredFluxResiduals = pd.DataFrame.from_dict( fittedMeasuredFluxResiduals, "index") fittedMeasuredFragmentResiduals = pd.DataFrame.from_dict( fittedMeasuredFragmentResiduals, "index") return fittedMeasuredFluxResiduals, fittedMeasuredFragmentResiduals