Esempio n. 1
0
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False):
    """Create a sbml starting with the desired reactions.

    Args:
        padmet_file (str): path to padmet containing all reactions
        empty_padmet (str): path to empty padmet that will be filled
        reaction_list (list): list of reactions to be retrieved
        sbml_output (str): path to sbml file to be written
    """
    p_ref = PadmetRef(padmet_file)
    p_spec = PadmetSpec(empty_padmet)

    # retrieve reactions from a given pathway
    # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"]

    reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list]

    for rxn_id in reaction_list:
        p_spec.copyNode(p_ref, rxn_id)
    # p_spec.generateFile("plop.padmet")

    cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS]

    if del_cof:
        for rxn_id in reaction_list:
            cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list]
            for rel in cof_linked_rlt:
                p_spec._delRelation(rel)

    padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True)

    return 
Esempio n. 2
0
def add_spontaneous_reactions(padmet_path, padmet_ref_path, output_padmet_path, only_complete_pathways=True):
    number_spontaneous_reactions = 0

    padmetSpec = PadmetSpec(padmet_path)
    padmetRef = PadmetRef(padmet_ref_path)

    all_spontaneous_rxns = set([node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" and "SPONTANEOUS" in node.misc])

    for spontaneous_rxn_id in all_spontaneous_rxns:
        in_pwys = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn.get(spontaneous_rxn_id,None) if rlt.type == "is_in_pathway"])
        for pwy_id in in_pwys:
            if pwy_id in padmetSpec.dicOfNode.keys():
                padmet_ref_in_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
                padmet_spec_in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])

                if only_complete_pathways:
                    difference_rxns = padmet_ref_in_rxns.difference(padmet_spec_in_rxns)

                    if difference_rxns != set():
                        if difference_rxns.issubset(all_spontaneous_rxns):
                            for difference_rxn in difference_rxns:
                                if difference_rxn not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]):
                                    padmetSpec.copyNode(padmetRef, difference_rxn)
                                    number_spontaneous_reactions += 1
                else:
                    if spontaneous_rxn_id not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]):
                        padmetSpec.copyNode(padmetRef, spontaneous_rxn_id)
                        number_spontaneous_reactions += 1

    padmetSpec.generateFile(output_padmet_path)

    print('Add {0} spontaneous reactions to {1}'.format(number_spontaneous_reactions, output_padmet_path))
Esempio n. 3
0
def reaction_to_sbml(reactions, output, padmetRef, verbose = False):
    """
    convert a list of reactions to sbml format based on a given padmet of reference.
    - ids are encoded for sbml using functions sbmlPlugin.convert_to_coded_id
    
    Parameters
    ----------
    reactions: list
        list of reactions ids
    padmetRef: padmet.classes.PadmetRef
        padmet of reference
    output: str
        the pathname to the sbml file to create
    """
    if os.path.isfile(reactions):
        with open(reactions, 'r') as f:
            reactions = set(f.read().splitlines())

    #check if all rxn id are in padmetRef.
    all_rxn = set([k for k,v in padmetRef.dicOfNode.items() if v.type == "reaction"])
    rxn_not_in_ref = reactions.difference(all_rxn)
    if len(rxn_not_in_ref) == len(reactions):
        raise KeyError("None of the reactions is in padmetRef")
    else:
        for rxn_id in rxn_not_in_ref:
            if verbose: print("%s not in padmetRef" % rxn_id)
            reactions.remove(rxn_id)
    padmet = PadmetSpec()
    [padmet.copyNode(padmetRef, rxn_id) for rxn_id in reactions]
    padmet_to_sbml(padmet, output, sbml_lvl=2, verbose = verbose)
Esempio n. 4
0
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False):
    """
    Parameters
    ----------
    pgdb_folder: str
        path to pgdb
    db: str
        pgdb name, default is 'NA'
    version: str
        pgdb version, default is 'NA'
    source: str
        tag reactions for traceability, default is 'GENOME'
    extract_gene: bool
        if true extract genes information
    no_orphan: bool
        if true, remove reactions without genes associated
    enhanced_db: bool
        if true, read metabolix-reactions.xml sbml file and add information in final padmet
    padmetRef_file: str
        path to padmetRef corresponding to metacyc in padmet format
    verbose: bool
        if True print information
    
    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb data
    """
    global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out
    regex_purge = re.compile("<.*?>|\|")
    regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"')
    list_of_relation = []
    def_compart_in = "c"
    def_compart_out = "e"
    #parsing args
    source = source.upper()
    
    classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \
    [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]]
    if enhanced_db:
        metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml")
    else:
        metabolic_reactions = None
    if extract_gene:
        genes_file = os.path.join(pgdb_folder,"genes.dat")
    else:
        genes_file = None

    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    if padmetRef_file:
        padmet = PadmetSpec()
        padmetRef = PadmetRef(padmetRef_file)
        version = padmetRef.info["DB_info"]["version"]
        db = padmetRef.info["DB_info"]["DB"]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet.setInfo(dbNotes)

        padmet.setPolicy(padmetRef)
        with open(reactions_file, 'r') as f:
            rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")]
        count = 0
        for rxn_id in rxns_id:
            count += 1
            if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id))
            try:
                padmet.copyNode(padmetRef, rxn_id)
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                padmet._addRelation(reconstructionData_rlt)

            except TypeError:
                print("%s not in padmetRef" %(rxn_id))

        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)

    else:
        POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'],
                        ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'],
                        ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'],
                        ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'],
                        ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'],
                        ['protein','is_in_species','class'],
                        ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'],
                        ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet = PadmetRef()
        if verbose: print("setting policy")
        padmet.setPolicy(POLICY_IN_ARRAY)
        if verbose: print("setting dbInfo")
        padmet.setInfo(dbNotes)
    
    
        if verbose: print("parsing classes")
        classes_parser(classes_file, padmet, verbose)
    
        if verbose: print("parsing compounds")
        compounds_parser(compounds_file, padmet, verbose)
    
        if verbose: print("parsing reactions")
        reactions_parser(reactions_file, padmet, extract_gene, source, verbose)
    
        if verbose: print("parsing pathways")
        pathways_parser(pathways_file, padmet, verbose)
    
        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)
    
        if metabolic_reactions is not None:
            if verbose: print("enhancing db from metabolic-reactions.xml")
            padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose)
    
    for rlt in list_of_relation:
        try:
            padmet.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmet.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmet.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmet.dicOfRelationOut[rlt.id_out] = [rlt]

    if extract_gene and no_orphan:
        all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
        rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])]
        for rxn in rxn_to_del:
            padmet.delNode(rxn.id)
        if verbose:
            print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions)))
        all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"])
        all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"])
        count = 0
        for gene_id in [g for g in all_genes if g not in all_genes_linked]:
            count += 1
            #if verbose: print("Removing gene without gene assoc %s" %gene_id)
            padmet.dicOfNode.pop(gene_id)
        if verbose:
            print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes)))

    rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
    for rxn_id in rxns:
        cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]])
        if len(cp_rlts) == 1:
            print("rxn only consume or produce, transport ???: %s" %rxn_id)
            padmet.delNode(rxn_id)

    return padmet
Esempio n. 5
0
    parser.add_argument("-s",
                        "--species",
                        help="padmet species",
                        required=True)
    parser.add_argument("-p", "--pathway", help="pathway name", required=True)
    parser.add_argument("-o",
                        "--output",
                        help="SBML output filename",
                        required=False)

    args = parser.parse_args()

    if args.output:
        outfile = args.output
    else:
        outfile = args.pathway.lower() + ".sbml"

    p_ref = PadmetRef(args.reference)  #reads reference padmet
    p_spec = PadmetSpec(args.species)  #reads organism padmet

    rxn_list = [
        rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway]
        if rlt.type == "is_in_pathway"
        and p_ref.dicOfNode[rlt.id_in].type == "reaction"
    ]

    for rxn_id in rxn_list:
        print(rxn_id)
        p_spec.copyNode(p_ref, rxn_id)
    padmet_to_sbml(p_spec, outfile, sbml_lvl=2, verbose=True)