def read_cbmodel_from_file(filename, gpr_filename=None): parser = BiooptParser() model = parser.parse_file(filename) if gpr_filename: with open(gpr_filename, "r") as gpr_file: for line in gpr_file: r_id, gpr_string = [ col.strip() for col in re.split("\t", line) ] if r_id in model.reactions and gpr_string: gpr = parse_gpr_rule(gpr_string) for gene in gpr.get_genes(): if gene not in model.genes: gene = Gene(elem_id=gene) model.add_gene(gene) model.set_gpr_association(r_id, gpr) else: warnings.warn( UserWarning( "Reaction {} from GPR was not found in model". format(r_id))) return model
def build_ensemble(model, reaction_scores, size, outputfile=None, flavor=None, init_env=None): """ Reconstruct a model ensemble using the CarveMe approach. Args: model (CBModel): universal model reaction_scores (dict): reaction scores size (int): ensemble size outputfile (str): write model to SBML file (optional) flavor (str): SBML flavor ('cobra' or 'fbc2', optional) init_env (Environment): initialize final model with given Environment (optional) Returns: EnsembleModel: reconstructed ensemble """ scores = dict(reaction_scores[['reaction', 'normalized_score']].values) unscored = [r_id for r_id in model.reactions if r_id not in scores and not r_id.startswith('R_EX')] logstd = np.std(np.log([x for x in scores.values() if x > 0])) reaction_status = {r_id: [] for r_id in model.reactions} solver = solver_instance(model) failed = 0 for i in range(size): random_scores = -np.exp(logstd * np.random.randn(len(unscored))) all_scores = dict(zip(unscored, random_scores)) all_scores.update(scores) sol = minmax_reduction(model, all_scores, solver=solver) if sol.status == Status.OPTIMAL: for r_id in model.reactions: active = (abs(sol.values[r_id]) >= 1e-6 or (sol.values.get('yf_' + r_id, 0) > 0.5) or (sol.values.get('yr_' + r_id, 0) > 0.5)) reaction_status[r_id].append(active) else: failed += 1 ensemble_size = size - failed ensemble = EnsembleModel(model, ensemble_size, reaction_status) ensemble.simplify() for i, row in reaction_scores.iterrows(): r_id = row['reaction'] if r_id in ensemble.model.reactions: gpr = parse_gpr_rule(row['GPR']) ensemble.model.reactions[r_id].set_gpr_association(gpr) if init_env: init_env.apply(ensemble.model, inplace=True, warning=False) if outputfile: cleanup_metadata(ensemble.model) save_ensemble(ensemble, outputfile, flavor=flavor)
def create_gpr_table(model_specific_data, reactions=None, outputfile=None): """ Extract GPR associations from data into a relational database format. Note: The boolean rules are converted to relational format: (Gene, Protein, Reaction). Since GPRs don't contain protein identifiers, the protein id is a concatenation of all subunit gene ids. Pseudo-genes corresponding to spontaneous reactions are discarded. Examples: The rule ((G1 and G2) or G3) -> R1, becomes: (G1, G1:G2, R1) (G2, G1:G2, R1) (G3, G3, R1) Args: model_specific_data (pandas.DataFrame): model specific data reactions (list): only extract data for given reactions (optional) outputfile (str): output CSV file (optional) Returns: pandas.DataFrame: GPR association table """ rows = [] # spontaneous = {'G_s0001', 'G_S0001', 'G_s_0001', 'G_S_0001', 'G_KPN_SPONT'} for i, row in model_specific_data.iterrows(): rxn, model_id, _, _, _, gpr_str = row r_id = 'R_' + rxn if (reactions is None or r_id in reactions) and pd.notnull(gpr_str): gpr = parse_gpr_rule(gpr_str) for protein in gpr.proteins: genes = sorted(set(protein.genes)) # genes = sorted(set(protein.genes) - spontaneous) p_id = 'P_' + ':'.join([gene[2:] for gene in genes]) for gene in genes: rows.append((gene, p_id, r_id, model_id)) columns = ['gene', 'protein', 'reaction', 'model'] df = pd.DataFrame(rows, columns=columns) if outputfile: df.to_csv(outputfile, index=False) return df
def read_cbmodel_from_file(filename, gpr_filename=None): parser = BiooptParser() model = parser.parse_file(filename) if gpr_filename: with open(gpr_filename, "r") as gpr_file: for line in gpr_file: r_id, gpr_string = [col.strip() for col in re.split("\t", line)] if r_id in model.reactions and gpr_string: gpr = parse_gpr_rule(gpr_string) for gene in gpr.get_genes(): if gene not in model.genes: gene = Gene(elem_id=gene) model.add_gene(gene) model.set_gpr_association(r_id, gpr) else: warnings.warn(UserWarning("Reaction {} from GPR was not found in model".format(r_id))) return model
def carve_model(model, reaction_scores, outputfile=None, flavor=None, inplace=True, default_score=-1.0, uptake_score=0.0, soft_score=1.0, soft_constraints=None, hard_constraints=None, ref_model=None, ref_score=0.0, init_env=None, debug_output=None): """ Reconstruct a metabolic model using the CarveMe approach. Args: model (CBModel): universal model reaction_scores (pandas.DataFrame): reaction scores outputfile (str): write model to SBML file (optional) flavor (str): SBML flavor ('cobra' or 'fbc2', optional) inplace (bool): Change model in place (default: True) default_score (float): penalty for non-annotated intracellular reactions (default: -1.0) uptake_score (float): penalty for utilization of extracellular compounds (default: 0.0) soft_score (float): score for soft constraints (default: 1.0) soft_constraints (dict): dictionary from reaction id to expected flux direction (-1, 1, 0) hard_constraints (dict): dictionary of flux bounds init_env (Environment): initialize final model with given Environment (optional) Returns: CBModel: reconstructed model """ if not inplace: model = model.copy() scores = dict(reaction_scores[['reaction', 'normalized_score']].values) if soft_constraints: not_in_model = set(soft_constraints) - set(model.reactions) if not_in_model: soft_constraints = { r_id: val for r_id, val in soft_constraints.items() if r_id in model.reactions } warnings.warn( "Soft constraints contain reactions not in the model:\n" + "\n".join(not_in_model)) if hard_constraints: not_in_model = set(hard_constraints) - set(model.reactions) if not_in_model: hard_constraints = { r_id: (lb, ub) for r_id, (lb, ub) in hard_constraints.items() if r_id in model.reactions } warnings.warn( "Hard constraints contain reactions not in the model:\n" + "\n".join(not_in_model)) if ref_model: ref_reactions = set(model.reactions) & set(ref_model.reactions) else: ref_reactions = None sol = minmax_reduction(model, scores, default_score=default_score, uptake_score=uptake_score, soft_score=soft_score, soft_constraints=soft_constraints, hard_constraints=hard_constraints, ref_reactions=ref_reactions, ref_score=ref_score, debug_output=debug_output) if sol.status == Status.OPTIMAL: inactive = inactive_reactions(model, sol) else: print("MILP solver failed: {}".format(sol.message)) return if debug_output: pd.DataFrame.from_dict(sol.values, orient='index').to_csv( debug_output + '_milp_solution.tsv', sep='\t', header=False) model.remove_reactions(inactive) del_metabolites = disconnected_metabolites(model) model.remove_metabolites(del_metabolites) for i, row in reaction_scores.iterrows(): r_id = row['reaction'] if r_id in model.reactions: try: gpr = parse_gpr_rule(row['GPR'], prefix='G_') model.set_gpr_association(r_id, gpr, add_genes=True) except: print('Failed to parse:', row['GPR']) cleanup_metadata(model) if init_env: init_env.apply(model, inplace=True, warning=False) if outputfile: save_cbmodel(model, outputfile, flavor=flavor) return model