Esempio n. 1
0
def simulatingDels():
    import pandas
    from time import time
    import cobra.test
    from cobra.flux_analysis import (single_gene_deletion,
                                     single_reaction_deletion,
                                     double_gene_deletion,
                                     double_reaction_deletion)
    cobra_model = cobra.test.create_test_model("textbook")
    ecoli_model = cobra.test.create_test_model("ecoli")

    print('complete model: ', cobra_model.optimize())
    with cobra_model:
        cobra_model.reactions.PFK.knock_out()
        print('pfk knocked out: ', cobra_model.optimize())

    print('complete model: ', cobra_model.optimize())
    with cobra_model:
        cobra_model.genes.b1723.knock_out()
        print('pfkA knocked out: ', cobra_model.optimize())
        cobra_model.genes.b3916.knock_out()
        print('pfkB knocked out: ', cobra_model.optimize())

    deletion_results = single_gene_deletion(cobra_model)

    print(single_gene_deletion(cobra_model, cobra_model.genes[:20]))  #subset
    print(single_reaction_deletion(cobra_model, cobra_model.reactions[:20]))
    print('Hello world!')
Esempio n. 2
0
def capitulo_5():
    file = open("resultados_capitulo_5.txt", "w")
    cobra_model = cobra.test.create_test_model("textbook")
    ecoli_model = cobra.test.create_test_model("ecoli")
    file.write(str(cobra_model.optimize()))
    file.write("\n")
    with cobra_model:
        cobra_model.reactions.PFK.knock_out()
        file.write(str(cobra_model.optimize()))
        file.write("\n")
    file.write(str(cobra_model.optimize()))
    file.write("\n")
    with cobra_model:
        cobra_model.genes.b1723.knock_out()
        file.write(str(cobra_model.optimize()))
        file.write("\n")
        cobra_model.genes.b3916.knock_out()
        file.write(str(cobra_model.optimize()))
        file.write("\n")
    deletion_results = single_gene_deletion(cobra_model)
    single_gene_deletion(cobra_model, cobra_model.genes[:20])
    single_reaction_deletion(cobra_model, cobra_model.reactions[:20])
    double_gene_deletion(cobra_model,
                         cobra_model.genes[-5:],
                         return_frame=True).round(4)
    start = time()
    double_gene_deletion(ecoli_model,
                         ecoli_model.genes[:300],
                         number_of_processes=2)
    t1 = time() - start
    file.write("Double gene deletions for 200 genes completed in "
               "%.2f sec with 2 cores" % t1)
    file.write("\n")
    start = time()
    double_gene_deletion(ecoli_model,
                         ecoli_model.genes[:300],
                         number_of_processes=1)
    t2 = time() - start
    file.write("Double gene deletions for 200 genes completed in "
               "%.2f sec with 1 core" % t2)
    file.write("\n")
    file.write("Speedup of %.2fx" % (t2 / t1))
    file.write("\n")
    double_reaction_deletion(cobra_model,
                             cobra_model.reactions[2:7],
                             return_frame=True).round(4)

    file.close()
Esempio n. 3
0
def eval_ind(individual, initial_pop, model, base_biomass, exp_ess, distance):
    # Set this as warning
    model.solver = 'gurobi'
    old_biomass = list(linear_reaction_coefficients(model).keys())[0]  # index removed
    old_biomass.remove_from_model()
    # Make a biomass reaction and optimize for it
    biomass = Reaction('BIOMASS')
    model.add_reaction(biomass)
    index = initial_pop.index
    for i in range(len(index)):
        if individual[i] == 1:
            biomass.add_metabolites({initial_pop.index[i]: -0.1})
    biomass.add_metabolites(base_biomass)
    biomass.objective_coefficient = 1.
    # Generate deletion results --> BOTTLENECK FOR SURE
    deletion_results = single_gene_deletion(model, model.genes, processes=1)

    # Filter the results to get a boolean result
    a = [(str(next(iter(i))), 1) for i in deletion_results[deletion_results['growth'] > 1e-3].index]
    b = [(str(next(iter(i))), 0) for i in deletion_results[deletion_results['growth'] <= 1e-3].index]
    c = a + b
    pred_ess = pd.DataFrame(c, columns=['Genes', 'Predicted_growth'])
    compare_df = pd.merge(right=exp_ess, left=pred_ess, on='Genes', how='inner')

    # Apply hamming distance
    u = np.array([f for f in compare_df.Measured_growth])
    v = np.array([x for x in compare_df.Predicted_growth])
    if distance == 'hd':
        dist = hamming(u, v)
    elif distance == 'mcc':
        dist = matthews_corrcoef(u, v)
    else:
        print('Error: Invalid distance metric')

    return dist, sum(individual)
Esempio n. 4
0
def ensemble_single_gene_deletion(ensemble,
                                  num_models=None,
                                  specific_models=[],
                                  specific_genes=[]):
    '''
    Performs single reaction deletions on models within an ensemble and
    returns the objective value after optimization with each reaction removed.

    Parameters
    ----------
    ensemble: medusa.core.Ensemble
        The ensemble with which to perform reaction deletions
    num_models: int, optional
        Number of models for which reaction deletions will be performed. The
        number of models indicated will be randomly sampled and reaction
        deletions will be performed on the sampled models. If None, all models
        will be selected (default), or the models specified by specific_models
        will be selected. Cannot be passed concurrently with specific_models.
    specific_models: list of str, optional
        List of member.id corresponding to the models for which reaction
        deletions will be performed. If None, all models will be selected
        (default), or num_models will be randomly sampled and selected.
        Cannot be passed concurrently with num_models.
    specific_genes: list of str, optionsl
        List of gene.id corresponding to the genes for which deletions
        should be performed. If none, all genes will be selected (default).
        We recommend identifying genes that are essential in all ensemble
        members first, then excluding those genes from specific_genes.
        This will generally speed up computation.

    Returns
    -------
    pandas.DataFrame
        A dataframe in which each row (index) represents a model within the
        ensemble, and each column represents a reaction for which values of
        objective when the reaction is deleted are returned.
    '''
    if not num_models:
        num_models = len(ensemble.members)

    if specific_models:
        model_list = specific_models
    else:
        model_list = sample(ensemble.members, num_models)

    deletion_results = {}
    with ensemble.base_model:
        for model in model_list:
            print('performing deletions for ' + model.id)
            ensemble.set_state(model)
            deletion_result = single_gene_deletion(ensemble.base_model,
                                                   specific_genes)
            deletion_results[model.id] = deletion_result

    return deletion_results
Esempio n. 5
0
def results_genes_flux(model, rxn_name, flux_gene_off_on):
    '''
        results_genes_flux utility:

            essential genes of model outputted nicely AND flux through the reactions being changed outputted

        Input: 

            (1) model: model structure
            (2) rxn_name: string reaction name in list e.g. ['rxn00123','rxn00456',....]
            (3) flux_gene_off_on is list containing two binary values to turn off the reaction functionalities either
                [1,0] [0,0] or [0,1] first element is flux second is gene deletions is on

        Output:

            essential genes in pandas data arrary
    '''
    #two models taken from the original
    try:
        if flux_gene_off_on[0] == 0:
            del_results = []
            print('no gene del results returned')
        else:
            #Essential Genes Before - Biomass Results Before - Gene Knockouts
            print('Single Gene Deletion of Model Entered')
            model_genes = copy.deepcopy(model)
            del_results = single_gene_deletion(model_genes)
        if flux_gene_off_on[1] == 0:
            fluxes = []
            print('No fluxes returned')
        else:
            model_flux = copy.deepcopy(model)
            #flux through reaction
            fluxes = []
            loop_reactions = []
            print('Rxn flux loop entered')

            for i in range(0, len(rxn_name)):
                loop_reactions.append(
                    model_flux.reactions.get_by_id(rxn_name[i]))
                if i % 10 == 0:
                    print('Flux reaction number', i)
            fluxes = (flux_variability_analysis(model_flux,
                                                reaction_list=loop_reactions,
                                                loopless=True)
                      )  #range of fluxes through the reaction
    except:
        print('You must enter a cobra model structure')
        raise

    return [del_results, fluxes]
Esempio n. 6
0
 def test_single_gene_deletion_fba(self, model):
     growth_dict = {
         "b0008": 0.87,
         "b0114": 0.80,
         "b0116": 0.78,
         "b2276": 0.21,
         "b1779": 0.00
     }
     rates, statuses = single_gene_deletion(model,
                                            gene_list=growth_dict.keys(),
                                            method="fba")
     for gene, expected_value in six.iteritems(growth_dict):
         assert statuses[gene] == 'optimal'
         assert abs(rates[gene] - expected_value) < 0.01
Esempio n. 7
0
def list_excluded_reactions(model):
    """
	Define a list of reactions that can be knocked-out. It excludes
	exchange reactions, reactions with no genes, essential reactions and reactions
	with essential genes. Objective function is set for biomass production to study if there
	is growth after single deletion strategy
	
	Input: model,  cobrapy model structure
	
	Output: list of reactions to be knocked-out
	"""

    essential_reactions = []
    essential_genes = []
    possible_reactions = []
    possible_reactionsygenes = []
    null_genes = []
    model.objective = 'EX_biomass'
    model.reactions.EX_glyc.lower_bound = -1
    model.reactions.EX_glc.lower_bound = 0
    # #model.reactions.get_by_id('EX_o2').lower_bound=0.
    # #smodel.reactions.get_by_id('EX_o2').upper_bound=0.

    # Calculating essential reactions
    reaction = single_reaction_deletion(model, model.reactions[0:])
    for x in reaction:
        for i in range(len(reaction[x])):
            if reaction.growth[i] < 10E-06:
                essential_reactions.append(model.reactions[i])
    gene = single_gene_deletion(model,
                                model.genes[0:])  #Calculate essential genes
    for x in gene:
        for i in range(len(gene[x])):
            if gene.growth[i] < 10E-06:
                essential_genes.append(model.genes[i])

    #print(essential_reactions,essential_genes)
    for i in range(len(model.reactions)):
        if model.reactions[i] not in essential_reactions:
            possible_reactions.append(model.reactions[i])

    for i in range(len(possible_reactions)):
        if possible_reactions[i].genes != frozenset(
            []):  #checks If there is no associated gene
            if possible_reactions[
                    i].genes not in essential_genes:  # Exclude the essential genes
                possible_reactionsygenes.append(possible_reactions[i].id)

    return possible_reactionsygenes
Esempio n. 8
0
 def evaluate(self, model):
     """Use the defined parameters to predict single gene essentiality."""
     with model:
         if self.medium is not None:
             self.medium.apply(model)
         if self.objective is not None:
             model.objective = self.objective
         model.add_cons_vars(self.constraints)
         essen = single_gene_deletion(model,
                                      gene_list=self.data["gene"],
                                      processes=1)
     essen["gene"] = [list(g)[0] for g in essen.index]
     essen["essential"] = (essen["growth"] < self.minimal_growth_rate) \
         | essen["growth"].isna()
     return essen
    def gene_deletion(self) -> pd.DataFrame:
        """Create pd.DataFrame with results of gene deletion.

        https://cobrapy.readthedocs.io/en/latest/deletions.html
        :return: pandas.DataFrame
        """
        model = self.read_model()
        df = single_gene_deletion(model, model.genes)
        print(df)
        return pd.DataFrame({
            "model": self.model_path.name,
            "objective": self.objective_id,
            "gene": [set(ids).pop() for ids in df.ids],
            "status": df.status,
            "value": df.growth,
        })
Esempio n. 10
0
 def evaluate(self, model):
     """Use the defined parameters to predict single gene essentiality."""
     with model:
         if self.medium is not None:
             self.medium.apply(model)
         if self.objective is not None:
             model.objective = self.objective
         model.add_cons_vars(self.constraints)
         max_val = model.slim_optimize()
         essen = single_gene_deletion(
             model, gene_list=self.data["gene"], processes=1)
     essen["gene"] = [list(g)[0] for g in essen.index]
     essen.index = essen["gene"]
     essen["essential"] = (essen["growth"] < (max_val * 0.1)) \
         | essen["growth"].isna()
     return essen
Esempio n. 11
0
def _eval_metab(metab, model, exp_ess):
    """
    This function is used to evaluate the fitness of each metabolite individually
    :param metab:
    :param model:
    :param exp_ess:
    :return:
    """
    # Set this as warning
    model.solver = 'gurobi'
    old_biomass = list(
        linear_reaction_coefficients(model).keys())[0]  # index removed
    old_biomass.remove_from_model()
    # Make a biomass reaction and optimize for it
    biomass = Reaction('BIOMASS')
    model.add_reaction(biomass)
    biomass.add_metabolites({model.metabolites.get_by_id(metab): -0.1})
    model.reactions.BIOMASS.objective_coefficient = 1.

    # Generate deletion results --> BOTTLENECK FOR SURE
    deletion_results = single_gene_deletion(model, model.genes, processes=1)

    # Filter the results to get a boolean result
    a = [(str(next(iter(i))), 1)
         for i in deletion_results[deletion_results['growth'] > 1e-3].index]
    b = [(str(next(iter(i))), 0)
         for i in deletion_results[deletion_results['growth'] <= 1e-3].index]
    c = a + b
    pred_ess = pd.DataFrame(c, columns=['Genes', 'Predicted_growth'])

    compare_df = pd.merge(right=exp_ess,
                          left=pred_ess,
                          on='Genes',
                          how='inner')

    # Apply mcc
    u = np.array([f for f in compare_df.Measured_growth])
    v = np.array([x for x in compare_df.Predicted_growth])

    return matthews_corrcoef(u, v)
Esempio n. 12
0
def run_example_6_2():
    model = cobra.test.create_test_model("textbook")
    single_deletion_results = single_gene_deletion(
        cobra_model=model, gene_list=[fba.ENO_gene, fba.PFK_B_gene])
    print(single_deletion_results)
def ko_tfa(model):
    return single_gene_deletion(model)
import os
import matplotlib.pyplot as plt

#Cargando datos
print('--------CARGANDO DATOS----------')
sbml_fname = './Datos/models/Recon2.2.1_RPMI_trimed_gene_symbols.xml'
reference_model = read_sbml_model(sbml_fname)

df_ceres = pd.read_csv("./Datos/depmap/M19Q2_ceres_metabol_python.csv", sep = '\t')
df_ceres = df_ceres.set_index('cell_line')


# Identificando los genes esenciales, como los genes cuya deleccion individual no producen crecimiento (Ecuación biomasa < 0.01).
print('------SINGLE GENE DELETION-----')
lista_genes = reference_model.genes
resultado_knocked_out = single_gene_deletion(reference_model, lista_genes)

rename_dict = {i:list(i)[0] for i in resultado_knocked_out.index} #Renonbro la frozen set para sea una lista
df_deletion_renamed = resultado_knocked_out.rename(rename_dict, axis=0)

threshold = 0.01 * df_deletion_renamed.growth.max() #Este es el threshold para luego poder seleccionar los que esten por debajo de el.

mask = df_deletion_renamed.growth < threshold 
essential = df_deletion_renamed.index[mask]

mask2 = df_deletion_renamed.growth >= threshold
non_essential = df_deletion_renamed.index[mask2]

df_ceres2 = df_ceres.T
essential_in_ceres = set(essential) & set(df_ceres2.index) 
non_essential_in_ceres = set(non_essential) & set(df_ceres2.index)
Esempio n. 15
0
 def run_single_gene_deletion(self, selected_solver):
     solution = single_gene_deletion(self.model)
     solution.method = "single_gene_deletion"
     self.add_solution(solution)
Esempio n. 16
0
def essen_test(model_tb, dic_return, dataset_name, dataset_excel,
               growth_thresh_mult):

    model = model_tb

    fal_pos_dic, fal_neg_dic = {}, {}
    true_neg_dic, true_pos_dic = {}, {}

    growth_rates = single_gene_deletion(model)
    indexes = []
    for x in growth_rates.index:
        indexes.append(list(i for i in x)[0])

    growth_rates["genes"] = indexes

    orig_growth_thres = growth_thresh_mult * model.optimize().objective_value

    true_pos, true_neg, fal_pos, fal_neg = 0, 0, 0, 0

    # set grif essen threshold -- iSM810 paper uses 0.1 as "confident essential"
    grif_thres = 0.1

    for index, row in dataset_excel.iterrows():

        if dataset_name == "griffin":
            gene = str(row["Locus"])
            try:
                growth = growth_rates[growth_rates.genes.isin([gene
                                                               ])].growth[0]
                try:
                    # True Positive - predicts that it grows (not essential) and is correct.
                    if float(row["p value"]
                             ) > grif_thres and growth > orig_growth_thres:
                        true_pos = true_pos + 1
                        true_pos_dic.update(
                            {gene: [growth, float(row["p value"])]})

                    # False Positive - predicts that it grows (not essential) when it actually essential
                    elif float(row["p value"]
                               ) < grif_thres and growth > orig_growth_thres:
                        fal_pos = fal_pos + 1
                        fal_pos_dic.update(
                            {gene: [growth, float(row["p value"])]})

                    # True Negative - predicts that the gene is essential (no growth) and is correct
                    elif float(row["p value"]
                               ) < grif_thres and growth < orig_growth_thres:
                        true_neg = true_neg + 1
                        true_neg_dic.update(
                            {gene: [growth, float(row["p value"])]})

                    elif float(row["p value"]
                               ) > grif_thres and growth < orig_growth_thres:
                        fal_neg = fal_neg + 1
                        fal_neg_dic.update(
                            {gene: [growth, float(row["p value"])]})
                    else:
                        # Hay algunos que tiene growth = nan
                        pass
                        #print(str(gene))

                except:
                    pass
                    # print ("Algo mas lo rompio")
                    # print(gene)
                    # print(row.get("p value"))
                    # print(grif_thres)
                    # print(growth)
                    # print(orig_growth_thres)
            except:
                pass
                # print("Ese gen no esta en el modelo: "+str(gene))

        elif dataset_name == "loerger":
            gene = str(row["ORF ID"])
            """
            ES being near 0
            NE being near the mean
            GD approximately 1/10 the mean
            GA 5 times the mean
            """
            try:
                growth = growth_rates[growth_rates.genes.isin([gene
                                                               ])].growth[0]
                # growth = growth_rates.loc[gene, "flux"]
                try:
                    # TP
                    if (row["Final Call"] == "NE" or row["Final Call"]
                            == "GA") and growth > orig_growth_thres:
                        true_pos = true_pos + 1
                        true_pos_dic.update({gene: [growth]})

                    # FN
                    elif (row["Final Call"] == "NE" or row["Final Call"]
                          == "GA") and growth < orig_growth_thres:
                        fal_neg = fal_neg + 1
                        fal_neg_dic.update({gene: [growth]})

                    # TN
                    elif (row["Final Call"] == "ES"
                          or row["Final Call"] == "ESD" or row["Final Call"]
                          == "GD") and growth < orig_growth_thres:
                        # if (row["Final Call"] == "ES") and growth < orig_growth_thres:
                        true_neg = true_neg + 1
                        true_neg_dic.update({gene: [growth]})

                    # FP
                    elif (row["Final Call"] == "ES"
                          or row["Final Call"] == "ESD" or row["Final Call"]
                          == "GD") and growth > orig_growth_thres:
                        # if (row["Final Call"] == "ES") and growth > orig_growth_thres:
                        fal_pos = fal_pos + 1
                        fal_pos_dic.update({gene: [growth]})
                    else:
                        # Hay algunos que tiene growth = nan
                        pass
                        # print(str(gene))
                except:
                    pass
                    # print("algo mas rompio a loerger")
            except:
                pass
                # print("Este gen no esta en el mdoelo: "+str(gene))

    # ---Analyze and Print results ---
    print("TP - TN - FP - FN")
    print(true_pos, true_neg, fal_pos, fal_neg)

    # percent of correct predictions
    perc_correct = (true_pos + true_neg) / (true_pos + true_neg + fal_pos +
                                            float(fal_neg))
    print("percent correct: ", perc_correct)

    # mcc calculation
    MCC_root = math.sqrt((true_pos + fal_pos) * (true_pos + fal_neg) *
                         (true_neg + fal_pos) * (true_neg + fal_neg))
    MCC = (true_pos * true_neg - fal_pos * fal_neg) / MCC_root
    print("Matthew Correlation Coefficient", MCC)
    if dic_return == "Yes":
        return fal_neg_dic, fal_pos_dic
    elif dic_return == "Yes both":
        return fal_neg_dic, fal_pos_dic, true_neg_dic, true_pos_dic
Esempio n. 17
0
                    print('------RECONSTRUYENDO EL MODELO-----')
                    # Reconstruyes el modelo celula_especifico
                    CORDA_builder = CORDA(reference_model, rxns_conf)
                    CORDA_builder.build()

                    csm2 = CORDA_builder.cobra_model()
                    write_sbml_model(
                        csm2, './thresholding/Th_upper_y_lower/' + cell_line +
                        '/csm2_%s_%s.sbml' % (th_l, th_u))
                    print('Numero de reacciones de modelo reconstruido:',
                          str(len(csm2.reactions)))

                    # Identificando los genes esenciales, como los genes cuya deleccion individual no producen crecimiento (Ecuación biomasa < 0.01).
                    print('------SINGLE GENE DELETION-----')
                    resultado_knocked_out = single_gene_deletion(
                        csm2, proprocesses=1)
                    rename_dict = {
                        i: list(i)[0]
                        for i in resultado_knocked_out.index
                    }

                    print('Seleccionando los genes esenciales y no esenciales')
                    df_deletion_renamed = resultado_knocked_out.rename(
                        rename_dict, axis=0)

                    threshold = 0.01 * df_deletion_renamed.growth.max()
                    mask = df_deletion_renamed.growth < threshold
                    essential = df_deletion_renamed.index[mask]

                    mask = df_deletion_renamed.growth >= threshold
                    non_essential = df_deletion_renamed.index[mask]