Esempio n. 1
0
def _setType(types, current_id, padmet):
    """
    For id current_id, create relation 'is_a_class' to each id in types
    add new nodes in padmet and relation in list_of_relation
    Parameters
    ----------
    types: list
        list of classe id associated to id 'current_id'
    current_id: str
        element id to link to all classe id in types
    padmet: padmet.PadmetRef
        padmet instance
    """        
    for subClass_id in types:
        #Type allow the hierachization of the current
        #XX is_a_class type
        #if type is not already in padmet, create a new class node (subClass_node)
        #subClass_node id == type
        #create a relation xx is_a_class type
        try:
            subClass_node = padmet.dicOfNode[subClass_id]
        except KeyError:
            subClass_node = Node("class", subClass_id)
            padmet.dicOfNode[subClass_id] = subClass_node
        is_a_class_rlt = Relation(current_id, "is_a_class", subClass_id)
        list_of_relation.append(is_a_class_rlt)
Esempio n. 2
0
def _setSyns(syns, current_id, padmet):
    """
    For id current_id, create relation 'has_name' to a node name 'current_id'_names
    store a list of synonymous from syns list in the node name 'current_id'_names
    add new node in padmet and relation in list_of_relation

    Parameters
    ----------
    syns: list
        list of synonymous of current_id
    current_id: str
        element id to link to node name 'current_id'_names
    padmet: padmet.PadmetRef
        padmet instance
    """        
    name_id = current_id+"_names"
    try:
        name_node = padmet.dicOfNode[name_id]
    except KeyError:
        #create node name
        name_node = Node("name", name_id, {"LABEL":[]})
        padmet.dicOfNode[name_id] = name_node
        has_name_rlt = Relation(current_id, "has_name", name_id)
        list_of_relation.append(has_name_rlt)
    [name_node.misc["LABEL"].append(syn) for syn in syns 
    if syn not in name_node.misc["LABEL"]]
Esempio n. 3
0
def _setXrefs(xrefs, current_id, padmet):
    """
    For id current_id, create relation 'has_xref' to a node xref 'current_id'_xrefs
    store a list of external reference from xrefs list in the node xref 'current_id'_xrefs
    parse each external reference with regex_xref
    add new node in padmet and relation in list_of_relation

    Parameters
    ----------
    xrefs: list
        list of external reference of current_id
    current_id: str
        element id to link to node xref 'current_id'_xref
    padmet: padmet.PadmetRef
        padmet instance
    """        
    xref_id = current_id+"_xrefs"
    try:
        xref_node = padmet.dicOfNode[xref_id]
    except KeyError:
        #create node xref
        xref_node = Node("xref", xref_id)
        padmet.dicOfNode[xref_id] = xref_node
        has_xref_rlt = Relation(current_id, "has_xref", xref_id)
        list_of_relation.append(has_xref_rlt)

    for xref in xrefs:
        #an xref is like: (REFSEQ "NP_417401" NIL NIL NIL NIL NIL)
        #in this example DB = REFSEQ and ID = NP_417401
        #update node xref, with in misc k = DB and v = [ID]
        #node id is created by incrementing meta_max_id
        xref_search = regex_xref.search(xref)
        if xref_search is not None:
            xref_dict = xref_search.groupdict()
            db = xref_dict["DB"]
            _id = xref_dict["ID"]
        else:
            db = "GO-TERMS"
            _id = xref
        if db in list(xref_node.misc.keys()) and _id not in xref_node.misc[db]:
            xref_node.misc[db].append(_id)
        else:
            xref_node.misc[db] = [_id]
Esempio n. 4
0
def compounds_parser(filePath, padmet, verbose = False):
    """
    Parameters
    ----------
    filePath: str
        path to compounds.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """        
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME","INCHI-KEY","MOLECULAR-WEIGHT","SMILES",\
                "TYPES", "SYNONYMS", "DBLINKS"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass

    count = 0
    nb_cpds = str(len(list(dict_data.keys())))
    for compound_id, dict_values in dict_data.items():
        count += 1
        if verbose:
            print("\r%s/%s: %s" %(count, nb_cpds, compound_id),end="", flush=True)
            #print(compound_id)
        compound_node = Node("compound", compound_id)
        padmet.dicOfNode[compound_id] = compound_node
        try:
            compound_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
        except KeyError:
            pass
        try:
            compound_node.misc["INCHI-KEY"] = dict_values["INCHI-KEY"]
        except KeyError:
            pass
        try:
            compound_node.misc["MOLECULAR-WEIGHT"] = dict_values["MOLECULAR-WEIGHT"]
        except KeyError:
            pass
        try:
            compound_node.misc["SMILES"] = dict_values["SMILES"]
        except KeyError:
            pass
        try:
            types = dict_values["TYPES"]
            _setType(types,compound_id, padmet)
        except KeyError:
            pass
        try:
            syns = dict_values["SYNONYMS"]
            _setSyns(syns, compound_id, padmet)
        except KeyError:
            pass
        try:
            xrefs = dict_values["DBLINKS"]
            _setXrefs(xrefs, compound_id, padmet)
        except KeyError:
            pass
    if verbose: print("")
Esempio n. 5
0
def genes_parser(filePath, padmet, verbose = False):
    """
    Parameters
    ----------
    filePath: str
        path to genes.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """        
    dict_data = {}
    #k='ACCESSION-1', v ='PRODUCT'
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME","ACCESSION-1","CENTISOME-POSITION","LEFT-END-POSITION","RIGHT-END-POSITION","SYNONYMS","TRANSCRIPTION-DIRECTION","PRODUCT"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass

    count = 0
    #nb_genes = str(len(list(dict_data.keys())))
    map_gene_ids = {}
    for current_id, dict_values in dict_data.items():
        try:
            gene_id = dict_values.get("ACCESSION-1",[current_id])[0]
            map_gene_ids[current_id] = gene_id
            count += 1
            """
            if verbose: 
                print("\r%s/%s: %s" %(count, nb_genes, gene_id), end="", flush=True)
                #print(current_id, gene_id)
            """
            gene_node = Node("gene", gene_id)
            padmet.dicOfNode[gene_id] = gene_node
            try:
                if dict_values["COMMON-NAME"][0] != gene_id:
                    gene_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
            except KeyError:
                pass
            try:
                if dict_values["TRANSCRIPTION-DIRECTION"][0] == "-":
                    gene_node.misc["TRANSCRIPTION-DIRECTION"] = ["NEGATIVE"]
                elif dict_values["TRANSCRIPTION-DIRECTION"][0] == "+":
                    gene_node.misc["TRANSCRIPTION-DIRECTION"] = ["POSITIVE"]
            except KeyError:
                pass
            try:
                gene_node.misc["CENTISOME-POSITION"] = dict_values["CENTISOME-POSITION"]
            except KeyError:
                pass
            try:
                gene_node.misc["LEFT-END-POSITION"] = dict_values["LEFT-END-POSITION"]
            except KeyError:
                pass
            try:
                gene_node.misc["RIGHT-END-POSITION"] = dict_values["RIGHT-END-POSITION"]
            except KeyError:
                pass
            try:
                gene_node.misc["RIGHT-END-POSITION"] = dict_values["RIGHT-END-POSITION"]
            except KeyError:
                pass
            try:
                syns = dict_values["SYNONYMS"]
                _setSyns(syns, gene_id, padmet)
            except KeyError:
                pass
        except KeyError:
            pass
    if verbose: print("")

    return map_gene_ids
Esempio n. 6
0
def pathways_parser(filePath, padmet, verbose = False):
    """
    Parameters
    ----------
    filePath: str
        path to pathways.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """        
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME", "TAXONOMIC-RANGE",\
                "TYPES", "SYNONYMS", "DBLINKS", "IN-PATHWAY", "REACTION-LIST"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass
    
    count = 0
    nb_pathways = str(len(list(dict_data.keys())))
    for pathway_id, dict_values in dict_data.items():
        count += 1
        if verbose:
            print("\r%s/%s" %(count, nb_pathways), end="", flush=True)
            #print(pathway_id)
        pathway_node = Node("pathway", pathway_id)
        padmet.dicOfNode[pathway_id] = pathway_node
        try:
            pathway_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
        except KeyError:
            pass
        try:
            pathway_node.misc["TAXONOMIC-RANGE"] = dict_values["TAXONOMIC-RANGE"]
        except KeyError:
            pass
        try:
            types = dict_values["TYPES"]
            _setType(types, pathway_id, padmet)
        except KeyError:
            pass
        try:
            syns = dict_values["SYNONYMS"]
            _setSyns(syns, pathway_id, padmet)
        except KeyError:
            pass
        try:
            xrefs = dict_values["DBLINKS"]
            _setXrefs(xrefs, pathway_id, padmet)
        except KeyError:
            pass
        try:
            subPathways = dict_values["IN-PATHWAY"]
            for subPathway in subPathways:
                #add the hierachization info, current pathway is_in_pathway subpathway
                is_in_pathway_rlt = Relation(pathway_id, "is_in_pathway", subPathway)
                list_of_relation.append(is_in_pathway_rlt)
        except KeyError:
            pass
        try:
            subNodes = dict_values["REACTION-LIST"]
            for subNode in subNodes:
                #add the hierachization info, Reaction/pathway is_in_pathway current pathway
                is_in_pathway_rlt = Relation(subNode, "is_in_pathway", pathway_id)
                list_of_relation.append(is_in_pathway_rlt)
        except KeyError:
            pass
    if verbose: print("")
Esempio n. 7
0
def reactions_parser(filePath, padmet, extract_gene, source, verbose = False):
    """
    from reaction.dat: get for each reaction, the UNIQUE-ID, COMMON-NAME, TYPES, SYNONYMS, DBLINKS
    Create a reaction node with node.id = UNIQUE-ID,  node.misc = {COMMON-NAME:[COMMON-NAMES]}
    - For each types:
    A type is in fact a class. this information is stocked in padmet as: is_a_class relation btw a node and a class_node
    check if the type is already in the padmet
    if not create a new class_node (var: subClass) with subClass_node.id = type
    Create a relation current node is_a_class type
    - For each Synonyms:
    this information is stocked in padmet as: has_name relation btw a node and a name_node
    create a new name_node with name_node.id = reaction_id+"_names" name_node.misc = {LABEL:[synonyms]}
    Create a relation current node has_name name_node.id
    - For each DBLINKS:
    DBLINKS is parsed with regex_xref to get the db and the id
    this information is stocked in padmet as: has_xref relation btw a node and a xref_node
    create a new xref_node with xref_node.id = reaction_id+"_xrefs" and xref_node.misc = {db:[id]}
    Create a relation current node has_xref xref_node.id

    Parameters
    ----------
    filePath: str
        path to reactions.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = [line for line in f.read().splitlines() if not line.startswith("#") and not line == "//"]
        index = -1        
        for line in data:
            index += 1
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME", "EC-NUMBER", "REACTION-DIRECTION",\
                "TYPES", "SYNONYMS", "DBLINKS"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]

                elif attrib in ["LEFT", "RIGHT"]:
                    #set default values
                    compartment = def_compart_in
                    stoichiometry = "1"
                    #check if information about stoechiometry and compartment in line + 1 and line + 2
                    try:
                        first_next_line = data[index+1]
                    #last line of the file, just add the LEFT/RIGHT
                    except IndexError:
                        first_next_line = ""
                    try:
                        second_next_line = data[index+2]
                    except IndexError:
                        second_next_line = ""

                    if first_next_line.startswith("^COEFFICIENT"):
                        stoichiometry = first_next_line.split(" - ")[1]
                        if second_next_line.startswith("^COMPARTMENT"):
                            compartment = second_next_line.split(" - ")[1]
                    if first_next_line.startswith("^COMPARTMENT"):
                        compartment = first_next_line.split(" - ")[1]
                        if second_next_line.startswith("^COEFFICIENT"):
                            stoichiometry = second_next_line.split(" - ")[1]                            
                    
                    #delete all tags
                    compartment = re.sub(regex_purge, "", compartment)
                    stoichiometry = re.sub(regex_purge, "", stoichiometry)
                    try:
                        dict_data[current_id][attrib].append((value, stoichiometry, compartment))
                    except KeyError:
                        dict_data[current_id][attrib] = [(value, stoichiometry, compartment)]

            except ValueError:
                pass

    count = 0
    nb_rxn = str(len(list(dict_data.keys())))
    for rxn_id, dict_values in dict_data.items():
        if "LEFT" in list(dict_values.keys()) or "RIGHT" in list(dict_values.keys()):
            count += 1
            if verbose:
                print("\r%s/%s: %s" %(count, nb_rxn, rxn_id), end="", flush=True)
                #print(rxn_id)
            rxn_node = Node("reaction", rxn_id)
            padmet.dicOfNode[rxn_id] = rxn_node
            try:
                rxn_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
            except KeyError:
                pass
            try:
                rxn_node.misc["EC-NUMBER"] = dict_values["EC-NUMBER"]
            except KeyError:
                pass
            try:
                rxn_dir = dict_values["REACTION-DIRECTION"][0]
                if rxn_dir == "REVERSIBLE":
                    rxn_node.misc["DIRECTION"] = ["REVERSIBLE"]
                elif "LEFT-TO-RIGHT" in rxn_dir:
                    #if:LEFT-TO-RIGHT, IRREVERSIBLE-LEFT-TO-RIGHT, PHYSIOL-RIGHT-TO-LEFT
                    rxn_node.misc["DIRECTION"] = ["LEFT-TO-RIGHT"]
                elif "RIGHT-TO-LEFT" in rxn_dir:
                    #Temporarily set direaction as RIGHT-TO-LEFT
                    #then, RIGHT' metabolites will be LEFT and LEFT -> RIGHT
                    #To finish set back DIRECTION to LEFT-TO-RIGHT
                    rxn_node.misc["DIRECTION"] = ["RIGHT-TO-LEFT"] 
            except KeyError:
                rxn_node.misc["DIRECTION"] = ["REVERSIBLE"]
            """
            try:
                rxn_node.misc["COMPARTMENT"] = dict_values["RXN-LOCATIONS"]
            except KeyError:
                pass
            """
            try:
                types = dict_values["TYPES"]
                _setType(types, rxn_id, padmet)
            except KeyError:
                pass
            try:
                syns = dict_values["SYNONYMS"]
                _setSyns(syns, rxn_id, padmet)
            except KeyError:
                pass
            try:
                xrefs = dict_values["DBLINKS"]
                _setXrefs(xrefs, rxn_id, padmet)
            except KeyError:
                pass
            if extract_gene:
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                list_of_relation.append(reconstructionData_rlt)
            try:
                reactants_data = dict_values["LEFT"]
                for reactant_id, stoichiometry, compartment in reactants_data:
                    if compartment == "CCO-OUT":
                        compartment = def_compart_out
                    else:
                        compartment = def_compart_in
                    try:
                        reactant_node = padmet.dicOfNode[reactant_id]
                    except KeyError:
                        reactant_node = Node("compound", reactant_id)
                        padmet.dicOfNode[reactant_id] = reactant_node
    
                    #if the reaction direction was set to RIGHT-TO-LEFT, then this compound is in fact a product
                    if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                        produces_rlt = Relation(rxn_id, "produces", reactant_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(produces_rlt)
                    else:
                        consumes_rlt = Relation(rxn_id, "consumes", reactant_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(consumes_rlt)
            except KeyError:
                pass
            try:
                products_data = dict_values["RIGHT"]
                for product_id, stoichiometry, compartment in products_data:
                    if compartment == "CCO-OUT":
                        compartment = def_compart_out
                    else:
                        compartment = def_compart_in
                    try:
                        product_node = padmet.dicOfNode[product_id]
                    except KeyError:
                        product_node = Node("compound", product_id)
                        padmet.dicOfNode[product_id] = product_node

                    #if the reaction direction was set to RIGHT-TO-LEFT, then this compound is in fact a reactant
                    if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                        consumes_rlt = Relation(rxn_id, "consumes", product_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(consumes_rlt)
                    else:
                        produces_rlt = Relation(rxn_id, "produces", product_id, {"STOICHIOMETRY": [stoichiometry], "COMPARTMENT": [compartment]})
                        list_of_relation.append(produces_rlt)
            except KeyError:
                pass
            if rxn_node.misc["DIRECTION"][0] == "RIGHT-TO-LEFT":
                rxn_node.misc["DIRECTION"] = ["LEFT-TO-RIGHT"]
    if verbose: print("")
Esempio n. 8
0
def classes_parser(filePath, padmet, verbose = False):
    """
    from class.dat: get for each class, the UNIQUE-ID, COMMON-NAME, TYPES, SYNONYMS, DBLINKS
    Create a class node with node.id = UNIQUE-ID,  node.misc = {COMMON-NAME:[COMMON-NAMES]}
    - For each types:
    A type is in fact a class. this information is stocked in padmet as: is_a_class relation btw a node and a class_node
    check if the type is already in the padmet
    if not create a new class_node (var: subClass) with subClass_node.id = type
    Create a relation current node is_a_class type
    - For each Synonyms:
    this information is stocked in padmet as: has_name relation btw a node and a name_node
    create a new name_node with name_node.id = class_id+"_names" and name_node.misc = {LABEL:[synonyms]}
    Create a relation current node has_name name_node.id
    - For each DBLINKS:
    DBLINKS is parsed with regex_xref to get the db and the id
    this information is stocked in padmet as: has_xref relation btw a node and a xref_node
    create a new xref_node with xref_node.id = class_id+"_xrefs" and xref_node.misc = {db:[id]}
    Create a relation current node has_xref xref_node.id

    Parameters
    ----------
    filePath: str
        path to classes.dat
    padmet: padmet.PadmetRef
        padmet instance
    verbose: bool
        if True print information
    """
    dict_data = {}
    with open(filePath, 'r', encoding='windows-1252') as f:
        data = (line for line in f.read().splitlines() if not line.startswith("#") and not line == "//")
        for line in data:
            try:
                #if len of value is 0 then ValueError raised
                attrib, value = line.split(" - ")
                #delete all tags
                value = re.sub(regex_purge,"",value)
                if attrib == "UNIQUE-ID":
                    current_id = value
                    dict_data[current_id] = {}
                if attrib in ["COMMON-NAME",\
                "TYPES", "SYNONYMS", "DBLINKS"]:
                    try:
                        dict_data[current_id][attrib].append(value)
                    except KeyError:
                        dict_data[current_id][attrib] = [value]
            except ValueError:
                pass
    count = 0
    nb_classes = str(len(list(dict_data.keys())))
    for class_id, dict_values in dict_data.items():
        count += 1
        if verbose:
            print("\r%s/%s" %(count, nb_classes), end="", flush=True)
            #print(class_id)
        class_node = Node("class", class_id)
        padmet.dicOfNode[class_id] = class_node
        try:
            class_node.misc["COMMON-NAME"] = dict_values["COMMON-NAME"]
        except KeyError:
            pass
        try:
            types = dict_values["TYPES"]
            _setType(types, class_id, padmet)
        except KeyError:
            pass
        try:
            syns = dict_values["SYNONYMS"]
            _setSyns(syns, class_id, padmet)
        except KeyError:
            pass
        try:
            xrefs = dict_values["DBLINKS"]
            _setXrefs(xrefs, class_id, padmet)
        except KeyError:
            pass
    if verbose: print("")
Esempio n. 9
0
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False):
    """
    Parameters
    ----------
    pgdb_folder: str
        path to pgdb
    db: str
        pgdb name, default is 'NA'
    version: str
        pgdb version, default is 'NA'
    source: str
        tag reactions for traceability, default is 'GENOME'
    extract_gene: bool
        if true extract genes information
    no_orphan: bool
        if true, remove reactions without genes associated
    enhanced_db: bool
        if true, read metabolix-reactions.xml sbml file and add information in final padmet
    padmetRef_file: str
        path to padmetRef corresponding to metacyc in padmet format
    verbose: bool
        if True print information
    
    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb data
    """
    global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out
    regex_purge = re.compile("<.*?>|\|")
    regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"')
    list_of_relation = []
    def_compart_in = "c"
    def_compart_out = "e"
    #parsing args
    source = source.upper()
    
    classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \
    [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]]
    if enhanced_db:
        metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml")
    else:
        metabolic_reactions = None
    if extract_gene:
        genes_file = os.path.join(pgdb_folder,"genes.dat")
    else:
        genes_file = None

    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    if padmetRef_file:
        padmet = PadmetSpec()
        padmetRef = PadmetRef(padmetRef_file)
        version = padmetRef.info["DB_info"]["version"]
        db = padmetRef.info["DB_info"]["DB"]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet.setInfo(dbNotes)

        padmet.setPolicy(padmetRef)
        with open(reactions_file, 'r') as f:
            rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")]
        count = 0
        for rxn_id in rxns_id:
            count += 1
            if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id))
            try:
                padmet.copyNode(padmetRef, rxn_id)
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                padmet._addRelation(reconstructionData_rlt)

            except TypeError:
                print("%s not in padmetRef" %(rxn_id))

        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)

    else:
        POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'],
                        ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'],
                        ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'],
                        ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'],
                        ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'],
                        ['protein','is_in_species','class'],
                        ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'],
                        ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet = PadmetRef()
        if verbose: print("setting policy")
        padmet.setPolicy(POLICY_IN_ARRAY)
        if verbose: print("setting dbInfo")
        padmet.setInfo(dbNotes)
    
    
        if verbose: print("parsing classes")
        classes_parser(classes_file, padmet, verbose)
    
        if verbose: print("parsing compounds")
        compounds_parser(compounds_file, padmet, verbose)
    
        if verbose: print("parsing reactions")
        reactions_parser(reactions_file, padmet, extract_gene, source, verbose)
    
        if verbose: print("parsing pathways")
        pathways_parser(pathways_file, padmet, verbose)
    
        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)
    
        if metabolic_reactions is not None:
            if verbose: print("enhancing db from metabolic-reactions.xml")
            padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose)
    
    for rlt in list_of_relation:
        try:
            padmet.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmet.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmet.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmet.dicOfRelationOut[rlt.id_out] = [rlt]

    if extract_gene and no_orphan:
        all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
        rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])]
        for rxn in rxn_to_del:
            padmet.delNode(rxn.id)
        if verbose:
            print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions)))
        all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"])
        all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"])
        count = 0
        for gene_id in [g for g in all_genes if g not in all_genes_linked]:
            count += 1
            #if verbose: print("Removing gene without gene assoc %s" %gene_id)
            padmet.dicOfNode.pop(gene_id)
        if verbose:
            print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes)))

    rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
    for rxn_id in rxns:
        cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]])
        if len(cp_rlts) == 1:
            print("rxn only consume or produce, transport ???: %s" %rxn_id)
            padmet.delNode(rxn_id)

    return padmet
Esempio n. 10
0
def enhance_db(metabolic_reactions, padmet, with_genes, verbose = False):
    """
    Parse sbml metabolic_reactions and add reactions in padmet
    if with_genes: add also genes information

    Parameters
    ----------
    metabolic_reactions: str
        path to sbml metabolic-reactions.xml
    padmet: padmet.PadmetRef
        padmet instance
    with_genes: bool
        if true alos add genes information.

    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb + metabolic-reactions.xml data
    """        
    
    print("loading sbml file: %s" %metabolic_reactions)
    reader = libsbml.SBMLReader()
    document = reader.readSBML(metabolic_reactions)
    for i in range(document.getNumErrors()):
        print(document.getError(i).getMessage())
    model = document.getModel()
    listOfReactions = model.getListOfReactions()
    #recovere the reactions that are not in the basic metacyc but in the sbml file
    #use the reactions_name instead of ids because the ids are encoded, the name is the non-encoded version of the id
    padmet_reactions_id = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"])
    reaction_to_add = [reaction for reaction in listOfReactions 
    if reaction.getName() not in padmet_reactions_id]
    count = 0
    if verbose: print(str(len(reaction_to_add))+" reactions to add")
    for reactionSBML in reaction_to_add:
        count += 1
        reaction_id = reactionSBML.getName()
        if verbose: print(str(count)+"/"+str(len(reaction_to_add))+"\t"+reaction_id)
        if reactionSBML.getReversible():
            reaction_dir = "REVERSIBLE"
        else:
            reaction_dir = "LEFT-TO-RIGHT"
        try:
            reaction_node = padmet.dicOfNode[reaction_id]
        except KeyError:
            reaction_node = Node("reaction", reaction_id, {"DIRECTION": [reaction_dir]})
            padmet.dicOfNode[reaction_id] = reaction_node
        reactants = reactionSBML.getListOfReactants()
        for reactant in reactants: #convert ids
            reactant_id, _type, reactant_compart = sbmlPlugin.convert_from_coded_id(reactant.getSpecies())
            if reactant_id not in list(padmet.dicOfNode.keys()):
                reactant_node = Node("compound",reactant_id)
                padmet.dicOfNode[reaction_id] = reactant_node
            reactant_stoich = reactant.getStoichiometry()
            consumes_rlt = Relation(reaction_id,"consumes",reactant_id, {"STOICHIOMETRY":[reactant_stoich], "COMPARTMENT": [reactant_compart]})
            list_of_relation.append(consumes_rlt)

        products = reactionSBML.getListOfProducts()
        for product in products:
            product_id, _type, product_compart = sbmlPlugin.convert_from_coded_id(product.getSpecies())
            if product_id not in list(padmet.dicOfNode.keys()):
                product_node = Node("compound",product_id)
                padmet.dicOfNode[product_id] = product_node
            product_stoich = product.getStoichiometry()
            produces_rlt = Relation(reaction_id,"produces",product_id,{"STOICHIOMETRY": [product_stoich], "COMPARTMENT": [product_compart]})
            list_of_relation.append(produces_rlt)
        
        if with_genes:
            notes = sbmlPlugin.parseNotes(reactionSBML)
            if "GENE_ASSOCIATION" in list(notes.keys()):
                #Using sbmlPlugin to recover all genes associated to the reaction
                listOfGenes = sbmlPlugin.parseGeneAssoc(notes["GENE_ASSOCIATION"][0])
                if len(listOfGenes) != 0:
                    for gene in listOfGenes:
                        try:
                            #check if gene already in the padmet
                            padmet.dicOfNode[gene]
                        except TypeError:
                            gene_node = Node("gene",gene)
                            padmet.dicOfNode[gene] = gene_node
                        is_linked_rlt = Relation(reaction_id, "is_linked_to", gene)
                        list_of_relation.append(is_linked_rlt)
    return padmet