Ejemplo n.º 1
0
def map_sbml(sbml_file, to_map, db_out, output, verbose = False, mnx_reac_file = None, mnx_chem_file = None, mnx_folder = None):
    """
    map a sbml and obtain a file of mapping ids to a given database.

    Parameters
    ----------
    sbml_file: str
        path to the sbml file to convert
    to_map: str
        select the part of the sbml to check must be in ['all', 'reaction', 'species']
    db_out: str
        the name of the database target: ['metacyc', 'bigg', 'kegg'] only
    output: str
        path to the file containing the mapping, sep = "\t"
    verbose: bool
        if true: more info during process
    mnx_reac_file: str
        path to the flat file for reactions (can be None if given mnx_folder)
    mnx_chem_file: str
        path to the flat file for chemical compounds (species) (can be None if given mnx_folder)
    mnx_folder: str
        the path to a folder containing MetaNetx flat files

    Returns
    -------
    tuple:
        (name of the best matching database, dict of matching)

    """
    map_from_cpd = False
    if to_map not in ["all", "reaction", "species"]:
        raise ValueError("%s must be in [all, reaction, species]" %to_map)
    if mnx_folder:
        mnx_reac_file = os.path.join(mnx_folder, "reac_xref.tsv")
        mnx_chem_file = os.path.join(mnx_folder, "chem_xref.tsv")

    if mnx_reac_file:
        if not os.path.exists(mnx_reac_file):
            raise FileNotFoundError("No MetaNetX file for reactions accessible at " + mnx_reac_file)

    if mnx_chem_file:
        if not os.path.exists(mnx_chem_file):
            raise FileNotFoundError("No MetaNetX file for compounds accessible at " + mnx_chem_file)

    if not os.path.exists(sbml_file):
        raise FileNotFoundError("No SBML file accessible at " + sbml_file)

    reader = libsbml.SBMLReader()
    document = reader.readSBML(sbml_file)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    listOfSpecies = model.getListOfSpecies()
                
    if db_out not in ["BIGG","METACYC","KEGG"]:
        raise ValueError('Please choose a database id in ["BIGG","METACYC","KEGG"]')


    #k: orignial id, v = ref id
    dict_sbml_id_mapped_id = {}
    if to_map in ["all", "reaction"]:
        #For reactions: k = MNXid, v = {k=db_id,v=[list of ids]}
        mnx_reac_dict = mnx_reader(mnx_reac_file, db_out)
        reaction_with_more_one_mapping = 0
        count_nb_stric_reac_mapped = 0

        for reaction in listOfReactions:
            reaction_id = reaction.id
            match_ids = None
            all_reaction_id_decoded = get_all_decoded_version(reaction_id, "reaction")
            for reaction_id_decoded in all_reaction_id_decoded:
                if not match_ids:
                    #first check in intern dict mapping
                    match_ids = intern_mapping(reaction_id_decoded, db_out, "reaction")
                    if match_ids:
                        dict_sbml_id_mapped_id[reaction_id] = match_ids
                        count_nb_stric_reac_mapped += 1
                        if verbose:
                            print("reaction: original id:%s; decoded id:%s; match_with:%s from intern mapping" %(reaction_id, reaction_id_decoded, match_ids))
                    #check if in mnx_reac_dict
                    else:
                        match_ids = get_from_mnx(mnx_reac_dict, reaction_id_decoded, db_out)
                        if match_ids:
                            if len(match_ids) > 1: 
                                reaction_with_more_one_mapping += 1
                                if verbose:
                                    print("reaction: original id:%s; decoded id:%s; More than one mapping:%s" %(reaction_id, reaction_id_decoded, match_ids))
                            else:
                                dict_sbml_id_mapped_id[reaction_id] = match_ids[0]
                                count_nb_stric_reac_mapped += 1
                                if verbose:
                                    print("reaction: original id:%s; decoded id:%s; match_with:%s from MNX mapping" %(reaction_id, reaction_id_decoded, match_ids[0]))
            if verbose and not match_ids:
                print("reaction: original id:%s; all decoded id:%s; not mapped" %(reaction_id, all_reaction_id_decoded))

    if to_map in ["all", "species"]:
        #For species: k = MNXid, v = {k=db_id,v=[list of ids]} 
        mnx_chem_dict = mnx_reader(mnx_chem_file, db_out)
        species_with_more_one_mapping = 0
        count_nb_stric_species_mapped = 0
  
        for species in listOfSpecies:
            species_id = species.id
            match_ids = None
            all_species_id_decoded = get_all_decoded_version(species_id, "species")
    
            for species_id_decoded in all_species_id_decoded:
                #first check in intern dict mapping
                match_ids = intern_mapping(species_id_decoded, db_out, "species")
        
                if match_ids:
                    dict_sbml_id_mapped_id[species_id] = match_ids
                    count_nb_stric_species_mapped += 1
                    if verbose:
                        print("species: original id:%s; decoded id:%s; match_with:%s from intern mapping" %(species_id, species_id_decoded, match_ids))
                    break
                #check if in mnx_chem_dict
                else:
                    match_ids = get_from_mnx(mnx_chem_dict, species_id_decoded, db_out)
                    if match_ids:
                        if len(match_ids) > 1:
                            species_with_more_one_mapping += 1
                            if verbose:
                                print("species: original id:%s; decoded id:%s; More than one mapping:%s" %(species_id, species_id_decoded, match_ids))
                        else: 
                            dict_sbml_id_mapped_id[species_id] = match_ids[0]
                            count_nb_stric_species_mapped += 1
                            if verbose:
                                print("species: original id:%s; decoded id:%s; match_with:%s from MNX mapping" %(species_id, species_id_decoded, match_ids))
            if verbose and not match_ids:
                print("species: original id:%s; all decoded id:%s; not mapped" %(species_id, all_species_id_decoded))

    if map_from_cpd:
        reaction_mapped_with_cpds = []

        #for all non mapped rxn, check if able to map all speices
        for sbml_rxn in [i for i in listOfReactions if i.id not in list(dict_sbml_id_mapped_id.keys())]:
            all_cpds = set([r.getSpecies() for r in sbml_rxn.getListOfReactants()] + [r.getSpecies() for r in sbml_rxn.getListOfProducts()])
            match_cpd_in_rxn = set([cpd_id for cpd_id in all_cpds if cpd_id in list(dict_sbml_id_mapped_id.keys())])
    
            if len(match_cpd_in_rxn) == len(all_cpds):
                reaction_mapped_with_cpds.append(sbml_rxn.id)

    if verbose:
        print("#######")
        if to_map in ["all", "reaction"]:
            print("Mapped reactions: %s/%s" %(count_nb_stric_reac_mapped,len(listOfReactions)))
            print("Reactions with more than one mapping: %s" %reaction_with_more_one_mapping)
        if to_map in ["all", "species"]:
            print("Mapped species: %s/%s" %(count_nb_stric_species_mapped,len(listOfSpecies)))
            print("Species with more than one mapping: %s" %species_with_more_one_mapping)

        if map_from_cpd:
            print("Mapped reactions from species: %s" %(len(reaction_mapped_with_cpds)))
            for i in reaction_mapped_with_cpds:
                print("\t%s" %i)
            print("Total reactions mapped:%s/%s" %(count_nb_stric_reac_mapped+len(reaction_mapped_with_cpds),len(listOfReactions)))
        print("#######")

    with open(output, 'w') as f:
        for k,v in list(dict_sbml_id_mapped_id.items()):
            f.write(k+"\t"+v+"\n")
Ejemplo n.º 2
0
def check_sbml_db(sbml_file, to_map, verbose = False, mnx_reac_file = None, mnx_chem_file = None, mnx_folder = None):
    """
    Check sbml database of a given sbml.

    Parameters
    ----------
    sbml_file: str
        path to the sbml file to convert
    to_map: str
        select the part of the sbml to check must be in ['all', 'reaction', 'species']
    verbose: bool
        if true: more info during process
    mnx_reac_file: str
        path to the flat file for reactions (can be None if given mnx_folder)
    mnx_chem_file: str
        path to the flat file for chemical compounds (species) (can be None if given mnx_folder)
    mnx_folder: str
        the path to a folder containing MetaNetx flat files

    Returns
    -------
    tuple:
        (name of the best matching database, dict of matching)
    """
    if to_map not in ["all", "reaction", "species"]:
        raise ValueError("%s must be in [all, reaction, species]" %to_map)
    if mnx_folder:
        mnx_reac_file = os.path.join(mnx_folder, "reac_xref.tsv")
        mnx_chem_file = os.path.join(mnx_folder, "chem_xref.tsv")

    if mnx_reac_file:
        if not os.path.exists(mnx_reac_file):
            raise FileNotFoundError("No MetaNetX file for reactions accessible at " + mnx_reac_file)

    if mnx_chem_file:
        if not os.path.exists(mnx_chem_file):
            raise FileNotFoundError("No MetaNetX file for compounds accessible at " + mnx_chem_file)

    if not os.path.exists(sbml_file):
        raise FileNotFoundError("No SBML file accessible at " + sbml_file)

    reader = libsbml.SBMLReader()
    document = reader.readSBML(sbml_file)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    listOfSpecies = model.getListOfSpecies()

    unknown_db = "Unknown"
    db_found = {unknown_db: 0}
    if to_map == "all":
        db_found["total_reaction_species"] = 0
    if verbose:
        print("Check from which database is this sbml:")
    if to_map in ["all", "reaction"]:
        db_found['total_reaction'] = 0
        with open(mnx_reac_file, "r") as f:
            dict_reaction_id_db = dict([(line.split("\t")[0].split(":")[1], line.split("\t")[0].split(":")[0])  for line in f.read().splitlines()
            if not line.startswith("#") and ":" in line.split("\t")[0]])

        for reaction in listOfReactions:
            reaction_id = reaction.id
            db_found['total_reaction'] += 1
            if to_map == "all":
                db_found["total_reaction_species"] += 1
            all_reaction_id_decoded = get_all_decoded_version(reaction_id, "reaction")
   
            for reaction_id_decoded in all_reaction_id_decoded:
                db_match = dict_reaction_id_db.get(reaction_id_decoded, unknown_db)
                if db_match != unknown_db:
                    break
            if verbose:
                if db_match != unknown_db:
                    print("reaction: original id: %s; decoded id:%s; db_match:%s" %(reaction_id, reaction_id_decoded, db_match))
                else:
                    print("reaction: original id:%s; all decoded id:%s; no db found" %(reaction_id, all_reaction_id_decoded))
            try:
                db_found[db_match] += 1
            except KeyError:
                db_found[db_match] = 1

    if to_map in ["all", "species"]:
        db_found["total_species"] = 0
        with open(mnx_chem_file, "r") as f:
            dict_species_id_db = dict([(line.split("\t")[0].split(":")[1], line.split("\t")[0].split(":")[0])  for line in f.read().splitlines()
            if not line.startswith("#") and ":" in line.split("\t")[0]])

        for species in listOfSpecies:
            species_id = species.id
            db_found['total_species'] += 1
            if to_map == "all":
                db_found["total_reaction_species"] += 1

            all_species_id_decoded = get_all_decoded_version(species_id, "species")
    
            for species_id_decoded in all_species_id_decoded:
                db_match = dict_species_id_db.get(species_id_decoded, unknown_db)
                if db_match != unknown_db:
                    break
            if verbose:
                if db_match != unknown_db:
                    print("species: original id: %s; decoded id:%s; db_match:%s" %(species_id, species_id_decoded, db_match))
                else:
                    print("species: original id:%s; all decoded id:%s; no db found" %(species_id, all_species_id_decoded))
            try:
                db_found[db_match] += 1
            except KeyError:
                db_found[db_match] = 1

    if to_map == "all":
        db_select = [k for k, v in list(db_found.items())
                     if v == max([j for i,j in list(db_found.items()) if i != 'total_reaction_species'])][0]
    elif to_map == "reaction":
        db_select = [k for k, v in list(db_found.items())
                     if v == max([j for i,j in list(db_found.items()) if i != 'total_reaction'])][0]
    elif to_map == "species":
        db_select = [k for k, v in list(db_found.items())
                     if v == max([j for i,j in list(db_found.items()) if i != 'total_species'])][0]

    return (db_select, db_found)
Ejemplo n.º 3
0
def enhanced_meneco_output(meneco_output_file,
                           padmetRef,
                           output,
                           verbose=False):
    """
    The standard output of meneco return ids of reactions corresponding to the solution for gapfilling.
    The ids are those from the sbml and so they are encoded.
    This script extract the solution corresponding to the union of reactions
    "Computing union of reactions from all completion"
    Based on padmetRef return a file with more information for each reaction.

    ex: RXN__45__5
    RXN-5, common_name, ec-number, Formula (with id),Formula (with cname),Action,Comment
    Also, the output can be used as input for manual_curation
    In the column Action: 'add' => To add the reaction, '' => to do nothing
    Comment: the reason of adding the reaction (ex: added for gap-filling by meneco)
    
    Parameters
    ----------
    meneco_output_file: str
        pathname of a meneco run' result
    padmetRef: padmet.padmetRef
        path to padmet file corresponding to the database of reference (the repair network)
    output: str
        path to tsv output file
    verbose: bool
        if True print information    
    """
    with open(meneco_output_file, 'r') as f:
        #recovering union reactions
        file_in_array = f.read().splitlines()
        start_index = None
        for line in file_in_array:
            if line.startswith(
                    "Computing union of reactions from all completion"):
                start_index = file_in_array.index(line) + 1
        #recover reactions, delete ' " ' and space.
        if start_index is None:
            print(
                "No line starting with: Computing union of reactions from all completion. Enable to extracts reactions"
            )
            #return
        encoded_reactions = [
            line.strip().replace("\"", "")
            for line in file_in_array[start_index:]
        ]
        nb_reactions = len(encoded_reactions)
    if verbose: print("%s reactions to check" % nb_reactions)
    with open(output, 'w') as f:
        header = [
            "idRef", "Common name", "EC-number", "Formula (with id)",
            "Formula (with cname)", "Action", "Comment", "Genes"
        ]
        header = "\t".join(header) + "\n"
        f.write(header)
        for encoded_id in encoded_reactions:
            decoded_reactions = get_all_decoded_version(encoded_id, "reaction")
            reaction_id = set(decoded_reactions).intersection(
                set(padmetRef.dicOfNode.keys()))
            if reaction_id:
                reaction_id = list(reaction_id)[0]
                reac_node = padmetRef.dicOfNode[reaction_id]
                try:
                    ec = reac_node.misc["EC_NUMBER"][0]
                except KeyError:
                    ec = "Unknown"
                try:
                    common_name = reac_node.misc["COMMON_NAME"][0]
                except KeyError:
                    common_name = "Unknown"

                direction = reac_node.misc["DIRECTION"][0]
                if direction == "REVERSIBLE":
                    direction = " <=> "
                elif direction == "LEFT-TO-RIGHT":
                    direction = " => "
                else:
                    direction = " =>/<=> "

                id_reactants = [
                    rlt.misc["STOICHIOMETRY"][0] + " " + rlt.id_out + "[" +
                    rlt.misc["COMPARTMENT"][0] + "]"
                    for rlt in padmetRef.dicOfRelationIn.get(
                        reaction_id, None) if rlt.type == "consumes"
                ]
                id_products = [
                    rlt.misc["STOICHIOMETRY"][0] + " " + rlt.id_out + "[" +
                    rlt.misc["COMPARTMENT"][0] + "]"
                    for rlt in padmetRef.dicOfRelationIn.get(
                        reaction_id, None) if rlt.type == "produces"
                ]
                idRef_formula = " + ".join(
                    id_reactants) + direction + " + ".join(id_products)

                try:
                    cname_reactants = [
                        rlt.misc["STOICHIOMETRY"][0] + " " +
                        padmetRef.dicOfNode[rlt.id_out].misc["COMMON_NAME"][0]
                        + "[" + rlt.misc["COMPARTMENT"][0] + "]"
                        for rlt in padmetRef.dicOfRelationIn.get(
                            reaction_id, None) if rlt.type == "consumes"
                    ]
                    cname_products = [
                        rlt.misc["STOICHIOMETRY"][0] + " " +
                        padmetRef.dicOfNode[rlt.id_out].misc["COMMON_NAME"][0]
                        + "[" + rlt.misc["COMPARTMENT"][0] + "]"
                        for rlt in padmetRef.dicOfRelationIn.get(
                            reaction_id, None) if rlt.type == "produces"
                    ]
                    cname_formula = " + ".join(
                        cname_reactants) + direction + " + ".join(
                            cname_products)
                except KeyError:
                    cname_formula = ""

                line = [
                    reaction_id, common_name, ec, idRef_formula, cname_formula,
                    "add", "Added for gapfilling", ""
                ]
                line = "\t".join(line) + "\n"
                f.write(line)
            else:
                print("%s not found in padmetRef" % reaction_id)
                pass