예제 #1
0
class PathwayHandler(object):

    def __init__(self,compounds_database = None, compounds_converter = None):
        self.target_pathways_map = {}
        self.__pathways_map = {}
        self.taxID_map = {}
        self.__home_path__ = ROOT_DIR

        if not compounds_converter:
            self.__compoundsIdConverter = CompoundsIDConverter()
        else:
            self.__compoundsIdConverter = compounds_converter

        if not compounds_database:
            self.__compounds_database = ModelSeedCompoundsDB()
        else:
            self.__compounds_database = compounds_database

        # if compounds_ontology:
        #     self.__compounds_ontology = compounds_ontology

        biocyc.set_organism('meta')

    @property
    def target_pathways_map(self):
        return self.__target_pathways_map

    @target_pathways_map.setter
    def target_pathways_map(self,target_pathways_map):
        self.__target_pathways_map = target_pathways_map

    def __set_compounds_ontology(self):
        """
        Load the compound ontology
        """
        self.__compounds_ontology = CompoundOntologyGraph()
        self.__compounds_ontology.load_ontology(self.__home_path__ + self.__configs["compounds_ontology_file"],
                                                self.__home_path__ + self.__configs["compounds_entities_file"])

    def get_targets(self, pathwayid):
        return self.target_pathways_map[pathwayid]

    def save_in_file(self,filename):
        with open(filename,"w") as file:
            for pathway in self.target_pathways_map:
                lst = self.target_pathways_map[pathway]
                new_lst = []
                temp_lst = []
                for elem in lst:
                    ms_number = elem.split("cpd")[1]
                    if int(ms_number)>100:
                        new_lst.append(elem)

                if not new_lst:
                    new_lst = temp_lst.copy()

                if new_lst:
                    file.write(">"+ ",".join(new_lst) + "\n")
                    path = self.get_path_from_pathway(pathway)

                    file.write(pathway+": " + ">".join(path) +"\n")



    def load_from_file(self,filename):
        with open(filename,"r") as file:
            line = file.readline()
            while line:
                targets = re.sub(">","",line).strip()
                targets = targets.split(",")
                line = file.readline()
                while not re.search("^>",line) and line:

                    pathwayid_path = line.split(": ")
                    path = pathwayid_path[1].strip().split(">")
                    self.target_pathways_map[pathwayid_path[0]] = targets
                    network = MetabolicNetwork()
                    if len(path) == 1:
                        network.add_vertex(path[0])
                    else:
                        for i in range(0,len(path)-1):
                            network.add_edge(path[i],path[i+1])

                    self.__pathways_map[pathwayid_path[0]] = network
                    line=file.readline()


    def get_networks_from_target(self,target_modelseedid):
        return self.target_pathways_map[target_modelseedid]

    def get_target_pathways(self):
        return self.target_pathways_map

    def get_network_from_target_and_pathway_id(self,target_modelseedid,metacyc_pathway_id):
        return self.target_pathways_map.get(target_modelseedid).get(metacyc_pathway_id)


    def add_metacyc_pathway(self,metacyc_pathway_id,generic = False):
        biocyc.set_organism('meta')
        ptw = biocyc.get(metacyc_pathway_id)

        network = self.scrap_metacyc_path(ptw,generic)
        self.convert_compounds_into_modelseedid(network)


        pathway = network.get_pathway()
        if pathway:
            target_reaction = pathway[-1]
            last_reaction = biocyc.get(target_reaction)
            products = last_reaction.compounds_right
            targets = []

            for product in products:
                modelseedids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId("MetaCyc",product.id)
                if modelseedids is not None:
                    targets.extend(modelseedids)

            self.__pathways_map[ptw.id] = network
            self.target_pathways_map[ptw.id]=targets

    def get_pathways(self):
        return self.__pathways_map

    def get_network_by_metacyc_pathway_id(self,id):
        return self.__pathways_map[id]

    def scrap_metacyc_path(self,ptw,generic):
        if generic:
            return self.__scrap_generic_metacyc_path(ptw)
        else:
            return self.__scrap_metacyc_path(ptw)

    def scrap_kegg_pathway(self,kegg_pathway):

        pathway = KeggPathway(kegg_pathway, False)
        return self.__scrap_generic_kegg_path(pathway)

    def __scrap_generic_kegg_path(self,pathway):

        network = MetabolicNetwork()

        for reaction in pathway.get_reactions():
            reaction = KeggReaction(reaction)
            reaction_id = reaction.id

            reactants = reaction.get_reactants()
            products = reaction.get_products()

            for reactant in reactants:
                reactant = KeggCompound(reactant)
                ids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId("KEGG", reactant.id)
                for id in ids:
                    compound_container = self.__compounds_database.get_compound_by_id(id)
                    if "R" in compound_container.getFormula():
                        network.add_vertex(reaction_id)
                        network.node_types["reaction"].append(reaction_id)


                        network.node_types["metabolite"].append(reactant.id)
                        network.add_vertex(reactant.id)
                        network.add_edge(reactant.id, reaction_id)
                        network.add_edge(reaction_id, reactant.id)
                        break

            for product in products:
                product = KeggCompound(product)
                ids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId("KEGG", product.id)
                for id in ids:
                    compound_container = self.__compounds_database.get_compound_by_id(id)
                    if "R" in compound_container.getFormula():
                        network.node_types["metabolite"].append(product.id)
                        network.add_vertex(product.id)
                        network.add_edge(reaction_id, product.id)
                        network.add_edge(product.id, reaction_id)
                        break

        metabolite_network = MetabolicNetwork()
        metabolite_network.convert_reaction_graph(network)
        return metabolite_network

    def __get_reactants_and_products_by_r_groups(self,reaction):

        products = reaction.get_products()
        reactants = reaction.get_reactants()

        products_r_groups = 0
        reactants_r_groups = 0

        for product in products:
            product = KeggCompound(product)
            formula = product.get_formula()
            if "R" in formula:
                match = re.match(formula, "R")
                start = match.start()
                number = formula[start+1]

                if re.match(number,"[2-9]"):
                    number= int(number)
                    products_r_groups+=number
                else:
                    products_r_groups+=1

        for reactant in reactants:
            reactant = KeggCompound(reactant)
            formula = reactant.get_formula()
            if "R" in formula:
                match = re.match(formula, "R")
                start = match.start()
                number = formula[start + 1]

                if re.match(number, "[2-9]"):
                    number = int(number)
                    reactants_r_groups += number
                else:
                    reactants_r_groups += 1

        if reactants_r_groups >= products_r_groups:

            return reactants,products

        else:
            return products,reactants

    def __scrap_metacyc_path(self,ptw):
        network = MetabolicNetwork()

        for reaction in ptw.reactions:
            reaction_id = reaction.id
            network.add_vertex(reaction_id)
            network.node_types["reaction"].append(reaction_id)

            reactants = reaction.compounds_left
            products = reaction.compounds_right

            for reactant in reactants:
                if reactant.id != "PROTON":
                    network.node_types["metabolite"].append(reactant.id)
                    network.add_vertex(reactant.id)
                    network.add_edge(reactant.id,reaction_id)

            for product in products:
                if product.id != "PROTON":
                    network.node_types["metabolite"].append(product.id)
                    network.add_vertex(product.id)
                    network.add_edge(reaction_id, product.id)

        metabolite_network = MetabolicNetwork()
        metabolite_network.convert_reaction_graph(network)
        return metabolite_network

    def __scrap_generic_metacyc_path(self,ptw):
        network = MetabolicNetwork()

        for reaction in ptw.reactions:
            reaction_id = reaction.id



            network.add_vertex(reaction_id)
            network.node_types["reaction"].append(reaction_id)

            reactants = reaction.compounds_left
            products = reaction.compounds_right

            if reaction_id == "RXN-17728":
                reactants = reaction.compounds_right
                products = reaction.compounds_left

            for reactant in reactants:
                ids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId("MetaCyc",reactant.id)
                for id in ids:
                    compound_container = self.__compounds_database.get_compound_by_id(id)
                    if "R" in compound_container.getFormula():
                        network.node_types["metabolite"].append(reactant.id)
                        network.add_vertex(reactant.id)
                        network.add_edge(reactant.id, reaction_id)
                        break


            for product in products:
                ids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId("MetaCyc", product.id)
                for id in ids:
                    compound_container = self.__compounds_database.get_compound_by_id(id)
                    if "R" in compound_container.getFormula():
                        network.node_types["metabolite"].append(product.id)
                        network.add_vertex(product.id)
                        network.add_edge(reaction_id, product.id)

        metabolite_network = MetabolicNetwork()
        metabolite_network.convert_reaction_graph(network)
        return metabolite_network

    def convert_compounds_into_modelseedid(self,network,db="MetaCyc"):
        res = {}
        # self.__babel = {}
        for compound in network.get_nodes():
            modelseedids = self.__compoundsIdConverter.convert_dbID_into_modelSeedId(db,compound)
            if modelseedids is not None:
                # self.__babel[compound] = modelseedid
                res[compound] = modelseedids
        return res


    def get_path_from_pathway(self,pathway_id):
        network = self.__pathways_map[pathway_id]
        return network.get_complete_pathway()


    def get_generic_targets_and_found_pathways(self,taxid):

        paths = self.get_pathway_by_taxID(taxid)
        targets_paths ={}
        for path in paths:
            targets = self.target_pathways_map[path]
            for target in targets:
                entity_id = self.__compounds_ontology.get_entity_id_by_model_seed_id(target)
                if entity_id:
                    # if target in targets_paths.keys():
                    targets_paths[target] = path

        targets = list(targets_paths.keys())

        res = {}

        for i in range(len(targets)):
            for j in range(i+1,len(targets)):

                entityid1 = self.__compounds_ontology.get_entity_id_by_model_seed_id(targets[i])
                entityid2 = self.__compounds_ontology.get_entity_id_by_model_seed_id(targets[j])

                container1 = self.__compounds_ontology.get_entity_container_by_id(entityid1)
                container2 = self.__compounds_ontology.get_entity_container_by_id(entityid2)

                if not container1.isGeneric() and not container2.isGeneric():
                    parent1 = self.__compounds_ontology.get_successors_by_relationship_type(entityid1,"is_a")
                    parent2 = self.__compounds_ontology.get_successors_by_relationship_type(entityid2,"is_a")

                    if parent1[0] == parent2[0]:

                        parent_modelseedid = self.__compounds_ontology.get_entity_modelseedid(parent1[0])
                        if parent_modelseedid not in res.keys():
                            res[parent_modelseedid] = [targets_paths[targets[i]]]
                            res[parent_modelseedid].append(targets_paths[targets[j]])
                        else:
                            res[parent_modelseedid].append(targets_paths[targets[i]])
                            res[parent_modelseedid].append(targets_paths[targets[j]])

        values_res = []
        for key in res:
            values_res.extend(res[key])

        values_res = self.get_unique(values_res)
        for target in targets:
            if targets_paths[target] not in values_res:
                if target not in res.keys():
                    res[target] = [targets_paths[target]]
                elif targets_paths[target] not in res[target]:
                    res[target].append(targets_paths[target])

        for key in res:
            res[key] = self.get_unique(res[key])

        return res

    def get_unique(self,lst):
        res=[]
        for elem in lst:
            if elem not in res: res.append(elem)

        return res

    def get_pathway_by_target(self,target):
        pathways = []
        for path in self.target_pathways_map:
            if target in self.target_pathways_map[path]:
                pathways.append(path)
        return pathways

    def get_pathway_by_taxID(self,taxID):
        # paths_species = []
        paths_tax_range = []
        taxa = NCBITaxa()
        lineage = taxa.get_lineage(taxID)
        for path in self.target_pathways_map:
            pathway = biocyc.get(path)

            # species = pathway.species
            # for s in species:
            #     if re.search("TAX-",s.id):
            #         id = int(s.id.replace("TAX-", ""))
            #         if id in lineage:
            #             paths_species.append((path,pathway.name))

            taxonomic_range = pathway.taxonomic_range
            for s in taxonomic_range:
                taxid = int(s.id.replace("TAX-",""))
                if taxid in lineage:
                    paths_tax_range.append(path)

        # res

        return paths_tax_range

    def get_all_paths_using_pathway_id(self,pathway_id,accept_cycles=False):
        network = self.get_network_by_metacyc_pathway_id(pathway_id)
        if not accept_cycles:
            network.prune_redundant_cycles()
        pathways = network.get_all_target_pathways()
        return pathways

    def get_path_model_seed_ids(self,ptw):
        network = self.__pathways_map[ptw]
        dict = self.convert_compounds_into_modelseedid(network)
        return dict

    def get_pathway_by_taxid_and_target(self,taxid,target):
        paths_tax_range = []
        taxa = NCBITaxa()
        lineage = taxa.get_lineage(taxid)
        for path in self.target_pathways_map:
            pathway = biocyc.get(path)

            taxonomic_range = pathway.taxonomic_range
            for s in taxonomic_range:
                taxid = int(s.id.replace("TAX-", ""))
                if taxid in lineage:
                    paths_tax_range.append(path)

        pathways = self.get_pathway_by_target(target)
        for p in paths_tax_range:
            if p in pathways:
                return p
예제 #2
0
class ReactionsChanger:

    def __init__(self, model : Model, type:int , model_database : str,
                 universal_model: Model,compounds_converter=None):
        """
        Class constructor

        :param cobrapy.Model model: COBRApy model
        :param int type: type of change (0,1,2 or 3)
        :param string model_database: database format of metabolites and reactions
        :param cobrapy.Model universal_model: universal model
        :param (optional) ReactionsIDConverter reactions_converter: a reaction identifier converter
        """

        if not compounds_converter:
            self.__compoundsIDConverter = CompoundsIDConverter()

        else:
            self.__compoundsIDConverter = compounds_converter

        if not universal_model:
            self.__universal_model = Model()
        else:
            self.__universal_model = universal_model

        self.report_material = {}
        self.__type = type
        self.__model_database = model_database
        self.__model = model
        self.__modelseed_hydrogen = "cpd00067"
        self.__reactionBalancer = CompoundsRevisor(self.__model)
        self.__not_found_reactions_num = 0

        self.__home_path__ = ROOT_DIR

        self.__configs = file_utilities.read_conf_file(
            TOOL_CONFIG_PATH)
        self.__compoundsAnnotationConfigs = file_utilities.read_conf_file(
            COMPOUNDS_ANNOTATION_CONFIGS_PATH)
        self.__reactionsAnnotationConfigs = file_utilities.read_conf_file(
            REACTIONS_ANNOTATION_CONFIGS_PATH)


    @property
    def model(self):
        return self.__model

    @model.setter
    def model(self,value):
        self.__model = value


    def swap_reactions_by_compounds(self, compounds_with_reactions_to_change : list) -> list:
        """
        This method swap all the reactions associated to a list of compounds.
        Firstly, it tries to balance the reactions and then tries to change the reaction.

        :param list compounds_with_reactions_to_change: list of Model.metabolites with reactions to change.
        :return list: reactions changed
        """
        past = []
        reaction_changed = []
        for compound in compounds_with_reactions_to_change:
            reactions = self.get_reactions_by_compound(compound)
            for reaction in reactions:
                if reaction not in past and reaction.annotation.get("sbo") != "SBO:0000629" \
                        and "Biomass" not in reaction.id:

                    past.append(reaction)

                    self.change_reaction(reaction)

                    reaction_changed.append(reaction)

        return reaction_changed

    def get_reactions_by_compound(self,compound):
        """
        Return all the reactions associated with a given compound

        :param Metabolite compound:
        :return list<Reaction>: all the reactions associated with a given compound
        """
        res=[]
        for reaction in self.__model.reactions:
            metabolites_ids = [m.id for m in reaction.metabolites]

            if compound.id in metabolites_ids:
                res.append(reaction)

        return res

    def swap_reactions(self, reactions_to_change) -> list:
        """
        This method swap all the reactions associated to a list of compounds.
        Firstly, it tries to balance the reactions and then tries to change the reaction.

        :param list reactions_to_change: list of Model.reactions with reactions to change.
        :return list: list of changed reactions
        """
        past = []
        reaction_changed = []
        for reaction in reactions_to_change:
            if reaction not in past and reaction.annotation.get("sbo") != "SBO:0000629" and "Biomass" not in reaction.id:
                past.append(reaction.copy())

                self.change_reaction(reaction)

                reaction_changed.append(reaction)

        return reaction_changed


    def change_reaction(self, reaction):
        """
        This method changes a given reaction's name, id, delta G, etc...
        It is worth noting that the reaction in the model is no longer the true one, since the metabolites were swapped
        1º start by searching in the ontology whether there is any entity representing the reaction in the model
        2º - if the reaction is found then other reaction in the ontology is searched to replace it
        3º - otherwise the reaction will be searched in a reaction database
        4º - if the reaction was not found, then a cannonical name and id will be set.

        :param Model.reaction reaction: reaction to be changed.

        """

        # if self.__model_database in self.__reactionsAnnotationConfigs:
        #     database = self.__reactionsAnnotationConfigs[self.__model_database]
        # else:
        #     raise Exception


        self.__change_not_found_reaction(reaction)

    def __change_not_found_reaction(self, reaction):
        """
        This method will change a not found reaction in the database

        :param Model.reaction reaction: reaction to be changed.

        """

        self.change_boimmg_reaction_id(reaction)

        reaction.name = "Changed - "+ reaction.name
        reaction.annotation = {}

    def change_boimmg_reaction_id(self,reaction : Reaction):

        new_id = self.__reactionsAnnotationConfigs["BOIMMG_ID_CONSTRUCTION"]
        metabolites = reaction.metabolites

        metabolites = sorted([metabolite.id for metabolite in metabolites])
        new_id += "_".join(metabolites)

        self.report_material[reaction.id] = new_id
        reaction.id = new_id

        return reaction.id


    def get_database_ids_compounds_in_model(self,compounds_in_model):
        """
        This method searchs in a given database the ids of the compounds in the model

        :param dict compounds_in_model: model compounds assigned to a model reaction

        :returns list: list with database ids of the metabolites assigned to a model reaction

        """

        res = []
        for compound in compounds_in_model.keys():
            modelseed_id_annotation = compound.annotation.get("seed.compound")

            if modelseed_id_annotation and type(modelseed_id_annotation) == str:
                modelseed_id=[modelseed_id_annotation]

            else:
                modelseed_id=modelseed_id_annotation

            if self.__model_database!="ModelSEED" and not modelseed_id:
                db_id = compound.annotation.get(self.__compoundsAnnotationConfigs[self.__model_database])
                if type(db_id) == list:

                    i=0
                    found=False

                    while not found and i<len(db_id):
                        modelseed_id = self.__compoundsIDConverter.convert_dbID_into_modelSeedId(self.__model_database,
                                                                                                 db_id[i])

                        if modelseed_id:
                            found=True
                else:
                    modelseed_id = self.__compoundsIDConverter.convert_dbID_into_modelSeedId(self.__model_database,
                                                                                             db_id)

            if not modelseed_id:
                return []

            else:
                if not res:
                    for id in modelseed_id:
                        res.append([id])
                else:
                    if len(modelseed_id) == 1:
                        for reaction in res:
                            reaction.append(modelseed_id[0])
                    else:
                        previous = res.copy()
                        res=[]
                        for id in modelseed_id:
                            for reaction in previous:
                                new_reaction = reaction.copy()
                                new_reaction.append(id)
                                res.append(new_reaction)

        final_res = []
        for reaction in res:
            final_res.append(sorted(reaction))
        return final_res

    def set_type2_is_generic_in_model(self, isGenericInModel):
        """
        This method sets the state of the metabolite in the model.

        :param boolean isGenericInModel: if True -> Generic; if False -> Complete
        :return:
        """
        self.__set_type2_is_generic_in_model = isGenericInModel

    def set_type(self, new_type):

        """
        This method sets the type of swap.
        :param int new_type: Type of swap (0,1,2,3)
        :return:
        """

        self.__type = new_type