예제 #1
0
def read_cbmodel_from_file(filename, gpr_filename=None):
    parser = BiooptParser()
    model = parser.parse_file(filename)

    if gpr_filename:
        with open(gpr_filename, "r") as gpr_file:
            for line in gpr_file:
                r_id, gpr_string = [
                    col.strip() for col in re.split("\t", line)
                ]
                if r_id in model.reactions and gpr_string:
                    gpr = parse_gpr_rule(gpr_string)
                    for gene in gpr.get_genes():
                        if gene not in model.genes:
                            gene = Gene(elem_id=gene)
                            model.add_gene(gene)

                    model.set_gpr_association(r_id, gpr)
                else:
                    warnings.warn(
                        UserWarning(
                            "Reaction {} from GPR was not found in model".
                            format(r_id)))

    return model
예제 #2
0
파일: carving.py 프로젝트: rpelicae/carveme
def build_ensemble(model, reaction_scores, size, outputfile=None, flavor=None, init_env=None):
    """ Reconstruct a model ensemble using the CarveMe approach.

    Args:
        model (CBModel): universal model
        reaction_scores (dict): reaction scores
        size (int): ensemble size
        outputfile (str): write model to SBML file (optional)
        flavor (str): SBML flavor ('cobra' or 'fbc2', optional)
        init_env (Environment): initialize final model with given Environment (optional)

    Returns:
        EnsembleModel: reconstructed ensemble
    """

    scores = dict(reaction_scores[['reaction', 'normalized_score']].values)
    unscored = [r_id for r_id in model.reactions if r_id not in scores and not r_id.startswith('R_EX')]
    logstd = np.std(np.log([x for x in scores.values() if x > 0]))

    reaction_status = {r_id: [] for r_id in model.reactions}
    solver = solver_instance(model)
    failed = 0

    for i in range(size):
        random_scores = -np.exp(logstd * np.random.randn(len(unscored)))
        all_scores = dict(zip(unscored, random_scores))
        all_scores.update(scores)

        sol = minmax_reduction(model, all_scores, solver=solver)

        if sol.status == Status.OPTIMAL:
            for r_id in model.reactions:
                active = (abs(sol.values[r_id]) >= 1e-6
                          or (sol.values.get('yf_' + r_id, 0) > 0.5)
                          or (sol.values.get('yr_' + r_id, 0) > 0.5))
                reaction_status[r_id].append(active)
        else:
            failed += 1

    ensemble_size = size - failed
    ensemble = EnsembleModel(model, ensemble_size, reaction_status)
    ensemble.simplify()

    for i, row in reaction_scores.iterrows():
        r_id = row['reaction']
        if r_id in ensemble.model.reactions:
            gpr = parse_gpr_rule(row['GPR'])
            ensemble.model.reactions[r_id].set_gpr_association(gpr)

    if init_env:
        init_env.apply(ensemble.model, inplace=True, warning=False)

    if outputfile:
        cleanup_metadata(ensemble.model)
        save_ensemble(ensemble, outputfile, flavor=flavor)
예제 #3
0
def create_gpr_table(model_specific_data, reactions=None, outputfile=None):
    """ Extract GPR associations from data into a relational database format.

    Note:
        The boolean rules are converted to relational format: (Gene, Protein, Reaction). Since GPRs don't
        contain protein identifiers, the protein id is a concatenation of all subunit gene ids.
        Pseudo-genes corresponding to spontaneous reactions are discarded.

    Examples:
        The rule ((G1 and G2) or G3) -> R1, becomes:

        (G1, G1:G2, R1)
        (G2, G1:G2, R1)
        (G3, G3,    R1)

    Args:
        model_specific_data (pandas.DataFrame): model specific data
        reactions (list): only extract data for given reactions (optional)
        outputfile (str): output CSV file (optional)

    Returns:
        pandas.DataFrame: GPR association table
    """
    rows = []
    #    spontaneous = {'G_s0001', 'G_S0001', 'G_s_0001', 'G_S_0001', 'G_KPN_SPONT'}

    for i, row in model_specific_data.iterrows():
        rxn, model_id, _, _, _, gpr_str = row
        r_id = 'R_' + rxn
        if (reactions is None or r_id in reactions) and pd.notnull(gpr_str):
            gpr = parse_gpr_rule(gpr_str)
            for protein in gpr.proteins:
                genes = sorted(set(protein.genes))
                #                genes = sorted(set(protein.genes) - spontaneous)
                p_id = 'P_' + ':'.join([gene[2:] for gene in genes])
                for gene in genes:
                    rows.append((gene, p_id, r_id, model_id))

    columns = ['gene', 'protein', 'reaction', 'model']
    df = pd.DataFrame(rows, columns=columns)

    if outputfile:
        df.to_csv(outputfile, index=False)

    return df
예제 #4
0
def read_cbmodel_from_file(filename, gpr_filename=None):
    parser = BiooptParser()
    model = parser.parse_file(filename)

    if gpr_filename:
        with open(gpr_filename, "r") as gpr_file:
            for line in gpr_file:
                r_id, gpr_string = [col.strip() for col in re.split("\t", line)]
                if r_id in model.reactions and gpr_string:
                    gpr = parse_gpr_rule(gpr_string)
                    for gene in gpr.get_genes():
                        if gene not in model.genes:
                            gene = Gene(elem_id=gene)
                            model.add_gene(gene)

                    model.set_gpr_association(r_id, gpr)
                else:
                    warnings.warn(UserWarning("Reaction {} from GPR was not found in model".format(r_id)))

    return model
예제 #5
0
def carve_model(model,
                reaction_scores,
                outputfile=None,
                flavor=None,
                inplace=True,
                default_score=-1.0,
                uptake_score=0.0,
                soft_score=1.0,
                soft_constraints=None,
                hard_constraints=None,
                ref_model=None,
                ref_score=0.0,
                init_env=None,
                debug_output=None):
    """ Reconstruct a metabolic model using the CarveMe approach.

    Args:
        model (CBModel): universal model
        reaction_scores (pandas.DataFrame): reaction scores
        outputfile (str): write model to SBML file (optional)
        flavor (str): SBML flavor ('cobra' or 'fbc2', optional)
        inplace (bool): Change model in place (default: True)
        default_score (float): penalty for non-annotated intracellular reactions (default: -1.0)
        uptake_score (float): penalty for utilization of extracellular compounds (default: 0.0)
        soft_score (float): score for soft constraints (default: 1.0)
        soft_constraints (dict): dictionary from reaction id to expected flux direction (-1, 1, 0)
        hard_constraints (dict): dictionary of flux bounds
        init_env (Environment): initialize final model with given Environment (optional)

    Returns:
        CBModel: reconstructed model
    """

    if not inplace:
        model = model.copy()

    scores = dict(reaction_scores[['reaction', 'normalized_score']].values)

    if soft_constraints:
        not_in_model = set(soft_constraints) - set(model.reactions)
        if not_in_model:
            soft_constraints = {
                r_id: val
                for r_id, val in soft_constraints.items()
                if r_id in model.reactions
            }
            warnings.warn(
                "Soft constraints contain reactions not in the model:\n" +
                "\n".join(not_in_model))

    if hard_constraints:
        not_in_model = set(hard_constraints) - set(model.reactions)
        if not_in_model:
            hard_constraints = {
                r_id: (lb, ub)
                for r_id, (lb, ub) in hard_constraints.items()
                if r_id in model.reactions
            }
            warnings.warn(
                "Hard constraints contain reactions not in the model:\n" +
                "\n".join(not_in_model))

    if ref_model:
        ref_reactions = set(model.reactions) & set(ref_model.reactions)
    else:
        ref_reactions = None

    sol = minmax_reduction(model,
                           scores,
                           default_score=default_score,
                           uptake_score=uptake_score,
                           soft_score=soft_score,
                           soft_constraints=soft_constraints,
                           hard_constraints=hard_constraints,
                           ref_reactions=ref_reactions,
                           ref_score=ref_score,
                           debug_output=debug_output)

    if sol.status == Status.OPTIMAL:
        inactive = inactive_reactions(model, sol)
    else:
        print("MILP solver failed: {}".format(sol.message))
        return

    if debug_output:
        pd.DataFrame.from_dict(sol.values, orient='index').to_csv(
            debug_output + '_milp_solution.tsv', sep='\t', header=False)

    model.remove_reactions(inactive)

    del_metabolites = disconnected_metabolites(model)
    model.remove_metabolites(del_metabolites)

    for i, row in reaction_scores.iterrows():
        r_id = row['reaction']
        if r_id in model.reactions:
            try:
                gpr = parse_gpr_rule(row['GPR'], prefix='G_')
                model.set_gpr_association(r_id, gpr, add_genes=True)
            except:
                print('Failed to parse:', row['GPR'])

    cleanup_metadata(model)

    if init_env:
        init_env.apply(model, inplace=True, warning=False)

    if outputfile:
        save_cbmodel(model, outputfile, flavor=flavor)

    return model