Ejemplo n.º 1
0
def padmet_medium_cli(command_args):
    args = docopt.docopt(__doc__, argv=command_args)
    if args["--seeds"]:
        seeds_file = args["--seeds"]
        if not os.path.exists(seeds_file):
            raise FileNotFoundError("No seeds file (--seeds) accessible at " +
                                    seeds_file)

        with open(seeds_file, 'r') as f:
            seeds = [line.split("\t")[0] for line in f.read().splitlines()]
    else:
        seeds = None
    padmet = PadmetSpec(args["--padmetSpec"])
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    output = args["--output"]
    verbose = args["-v"]
    remove = args["-r"]

    if output is None:
        output = args["--padmetSpec"]

    if not remove and not seeds:
        g_m = padmet.get_growth_medium()
        print("List of growth medium:")
        if g_m:
            print(list(g_m))
        else:
            print("[]")
    else:
        manage_medium(padmet, seeds, padmetRef, verbose)
        padmet.generateFile(output)
Ejemplo n.º 2
0
def create_padmet_instance(padmet_file, padmet_type, db, version, padmetRef=None):
    """
    #TODO
    """
    if padmet_type not in ["PadmetRef","PadmetSpec"]:
        raise TypeError('padmet_type must be in ["PadmetRef","PadmetSpec"], given:%s' %padmet_type)
    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")

    if padmet_type == "PadmetSpec":
        padmet = PadmetSpec()
    elif padmet_type == "PadmetRef":
        padmet = PadmetRef()
        
    if padmetRef:
        padmet.setInfo(padmetRef)
        padmet.info["PADMET"]["creation"] = today_date
        padmet.setPolicy(padmetRef)
    else:
        POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'],
                        ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'],
                        ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'],
                        ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'],
                        ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'],
                        ['protein','is_in_species','class'],
                        ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'],
                        ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet.setInfo(dbNotes)
        padmet.setPolicy(POLICY_IN_ARRAY)
    return padmet
Ejemplo n.º 3
0
def manual_curation_cli(command_args):
    args = docopt.docopt(__doc__, argv=command_args)
    data_file = args["--data"]
    output = args["--output"]
    verbose = args["-v"]

    if data_file:
        if not os.path.exists(data_file):
            raise FileNotFoundError("No form curation file (--data/data_file) accessible at " + data_file)

        filename = os.path.splitext(os.path.basename(data_file))[0]
        source = filename

    category = args["--category"]
    tool = args["--tool"]
    if args["--template_new_rxn"]:
        output = args["--template_new_rxn"]
        template_new_rxn(output)
    elif args["--template_add_delete_rxn"]:
        output = args["--template_add_delete_rxn"]
        template_add_delete(output)
    else:
        padmetSpec = PadmetSpec(args["--padmetSpec"])
        if not output:
            output = args["--padmetSpec"]
        if args["--padmetRef"]:
            padmetRef = PadmetRef(args["--padmetRef"])
        else:
            padmetRef = None
        to_do = sniff_datafile(data_file)

        if to_do == "rxn_creator":
            rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
        elif to_do == "add_delete_rxn":
            add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
Ejemplo n.º 4
0
def add_spontaneous_reactions(padmet_path, padmet_ref_path, output_padmet_path, only_complete_pathways=True):
    number_spontaneous_reactions = 0

    padmetSpec = PadmetSpec(padmet_path)
    padmetRef = PadmetRef(padmet_ref_path)

    all_spontaneous_rxns = set([node.id for node in list(padmetRef.dicOfNode.values()) if node.type == "reaction" and "SPONTANEOUS" in node.misc])

    for spontaneous_rxn_id in all_spontaneous_rxns:
        in_pwys = set([rlt.id_out for rlt in padmetRef.dicOfRelationIn.get(spontaneous_rxn_id,None) if rlt.type == "is_in_pathway"])
        for pwy_id in in_pwys:
            if pwy_id in padmetSpec.dicOfNode.keys():
                padmet_ref_in_rxns = set([rlt.id_in for rlt in padmetRef.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])
                padmet_spec_in_rxns = set([rlt.id_in for rlt in padmetSpec.dicOfRelationOut.get(pwy_id,[]) if rlt.type == "is_in_pathway"])

                if only_complete_pathways:
                    difference_rxns = padmet_ref_in_rxns.difference(padmet_spec_in_rxns)

                    if difference_rxns != set():
                        if difference_rxns.issubset(all_spontaneous_rxns):
                            for difference_rxn in difference_rxns:
                                if difference_rxn not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]):
                                    padmetSpec.copyNode(padmetRef, difference_rxn)
                                    number_spontaneous_reactions += 1
                else:
                    if spontaneous_rxn_id not in set([node.id for node in list(padmetSpec.dicOfNode.values()) if node.type == "reaction"]):
                        padmetSpec.copyNode(padmetRef, spontaneous_rxn_id)
                        number_spontaneous_reactions += 1

    padmetSpec.generateFile(output_padmet_path)

    print('Add {0} spontaneous reactions to {1}'.format(number_spontaneous_reactions, output_padmet_path))
Ejemplo n.º 5
0
def reduce_network(padmet_file:str, empty_padmet:str, reaction_list:list, sbml_output:str, del_cof:bool=False):
    """Create a sbml starting with the desired reactions.

    Args:
        padmet_file (str): path to padmet containing all reactions
        empty_padmet (str): path to empty padmet that will be filled
        reaction_list (list): list of reactions to be retrieved
        sbml_output (str): path to sbml file to be written
    """
    p_ref = PadmetRef(padmet_file)
    p_spec = PadmetSpec(empty_padmet)

    # retrieve reactions from a given pathway
    # rxn_list = [rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway] if rlt.type == "is_in_pathway" and p_ref.dicOfNode[rlt.id_in].type == "reaction"]

    reaction_list = [convert_from_coded_id(i)[0] for i in reaction_list]

    for rxn_id in reaction_list:
        p_spec.copyNode(p_ref, rxn_id)
    # p_spec.generateFile("plop.padmet")

    cofactor_list = [convert_from_coded_id(i)[0] for i in COFACTORS]

    if del_cof:
        for rxn_id in reaction_list:
            cof_linked_rlt = [rlt for rlt in p_spec.dicOfRelationIn[rxn_id] if rlt.id_out in cofactor_list]
            for rel in cof_linked_rlt:
                p_spec._delRelation(rel)

    padmet_to_sbml(p_spec, sbml_output, sbml_lvl=3, verbose=True)

    return 
Ejemplo n.º 6
0
def sbml_to_padmetRef(sbml, padmetRef_file, output=None, db="NA", version="NA", verbose=False):
    """
    if padmetRef, not padmetSpec:
        if padmetRef exist, instance PadmetRef
        else init PadmetRef
        update padmetRef
    if padmetSpec:
        if padmetRef, check if exist else raise Error
        if padmetSpec exist, instance PadmetSpec
        else init PadmetSpec
        update padmetSpec using padmetRef if padmetRef
    
    #TODO
    """
    if output is None:
        output = padmetRef_file
    if os.path.isdir(sbml):
        sbml_files = [os.path.join(sbml,_f) for _f in next(os.walk(sbml))[2] if _f.endswith(".sbml") or _f.endswith(".xml")]
    else:
        sbml_files = sbml.split(";")

    if os.path.isfile(padmetRef_file):
        padmet_to_update = PadmetRef(padmetRef_file)
    else:
        padmet_to_update = create_padmet_instance(padmetRef_file, "PadmetRef", db, version)

    for sbml_file in sbml_files:
        if verbose:
            print("Updating padmet from %s" %os.path.basename(sbml_file))
        padmet_to_update.updateFromSbml(sbml_file, verbose)

    padmet_to_update.generateFile(output)
Ejemplo n.º 7
0
def main():
    args = docopt.docopt(__doc__)
    data_file = args["--data"]
    output = args["--output"]
    verbose = args["-v"]
    if data_file:
        filename = os.path.splitext(os.path.basename(data_file))[0]
        source = filename

    category = args["--category"]
    tool = args["--tool"]
    if args["--template_new_rxn"]:
        output = args["--template_new_rxn"]
        manual_curation.template_new_rxn(output)
    elif args["--template_add_delete_rxn"]:
        output = args["--template_add_delete_rxn"]
        manual_curation.template_add_delete(output)
    else:
        padmetSpec = PadmetSpec(args["--padmetSpec"])
        if not output:
            output = args["--padmetSpec"]
        if args["--padmetRef"]:
            padmetRef = PadmetRef(args["--padmetRef"])
        else:
            padmetRef = None
        to_do = manual_curation.sniff_datafile(data_file)

        if to_do == "rxn_creator":
            manual_curation.rxn_creator(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
        elif to_do == "add_delete_rxn":
            manual_curation.add_delete_rxn(data_file, padmetSpec, output, padmetRef, source, tool, category, verbose)
Ejemplo n.º 8
0
def enhanced_meneco_output_cli(command_args):
    args = docopt.docopt(__doc__, argv=command_args)

    meneco_output_file = args["--meneco_output"]
    output = args["--output"]
    verbose = args["-v"]
    padmetRef = PadmetRef(args["--padmetRef"])
    enhanced_meneco_output(meneco_output_file, padmetRef, output, verbose)
Ejemplo n.º 9
0
def main():
    args = docopt.docopt(__doc__)

    meneco_output_file = args["--meneco_output"]
    output = args["--output"]
    verbose = args["-v"]
    padmetRef = PadmetRef(args["--padmetRef"])
    enhanced_meneco_output.enhanced_meneco_output(meneco_output_file,
                                                  padmetRef, output, verbose)
Ejemplo n.º 10
0
def main():
    args = docopt.docopt(__doc__)
    output = args["--output"]
    verbose = args["-v"]
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    padmet_path = args["--padmet"]
    compare_padmet.compare_padmet(padmet_path, output, padmetRef, verbose)
Ejemplo n.º 11
0
def main():
    args = docopt.docopt(__doc__)
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    output = args["--output"]
    verbose = args["-v"]
    to_add = args["--to_add"]
    padmet_to_padmet.padmet_to_padmet(to_add, output, padmetRef, verbose)
Ejemplo n.º 12
0
def compare_padmet_cli(command_args):
    args = docopt.docopt(__doc__, argv=command_args)
    output = args["--output"]
    verbose = args["-v"]
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    padmet_path = args["--padmet"]
    number_cpu = args["--cpu"]
    compare_padmet(padmet_path, output, padmetRef, verbose, number_cpu)
Ejemplo n.º 13
0
def main():
    #files to upload: folder genomic_data, all sbml in output ortho, annot, external, seeds, targets
    args = docopt.docopt(__doc__)
    padmet = args["--padmet"]
    verbose = args["-v"]
    if args["--padmetRef"]:
        padmetRef = PadmetRef(args["--padmetRef"])
    else:
        padmetRef = None
    wiki_id = args["--wiki_id"]
    output = args["--output"]
    log_file = args["--log_file"]
    database = args["--database"]

    wikiGenerator.wikiGenerator(padmet, output, wiki_id, padmetRef, database,
                                log_file, verbose)
Ejemplo n.º 14
0
def sbmlGenerator_cli(command_args):
    args = docopt.docopt(__doc__, argv=command_args)
    output = args["--output"]
    obj_fct = args["--obj_fct"]
    mnx_chem_xref = args["--mnx_chem_xref"]
    mnx_chem_prop = args["--mnx_chem_prop"]
    sbml_lvl = args["--sbml_lvl"]
    model_id = args["--model_id"]
    verbose = args["-v"]

    if args["--padmet"]:
        padmet_file = args["--padmet"]
        if args["--init_source"]:
            init_source = args["--init_source"]
            from_init_source(padmet_file, init_source, output, verbose)
        else:
            padmet_to_sbml(padmet_file, output, model_id, obj_fct, sbml_lvl, mnx_chem_prop, mnx_chem_xref, verbose)
    elif args["--reaction"]:
        padmetRef = PadmetRef(args["--padmetRef"])
        reactions = args["--reaction"]
        reaction_to_sbml(reactions, output, padmetRef, verbose)
    elif args["--compound"]:
        species_compart = args["--compound"]
        compound_to_sbml(species_compart, output, verbose)
Ejemplo n.º 15
0
def padmet_to_tsv(padmetSpec_file, padmetRef_file, output_dir, verbose=False):
    """
    #TODO
    """
    global all_rxn_nodes, all_cpd_nodes, all_pwy_nodes

    #check if output_dir exist, else create it
    if not output_dir.endswith("/"):
        output_dir += "/"
    if not os.path.isdir(output_dir):
        if verbose: print("Creating folder %s" % output_dir)
        os.makedirs(output_dir)
    #loading padmetSpec
    if padmetSpec_file:
        if verbose: print("Loading %s" % padmetSpec_file)
        padmetSpec = PadmetSpec(padmetSpec_file)
        padmetSpec_name = os.path.splitext(
            os.path.basename(padmetSpec_file))[0]
        padmetSpec_folder = output_dir + padmetSpec_name + "/"
        if not os.path.isdir(padmetSpec_folder):
            if verbose: print(("Creating folder %s" % padmetSpec_folder))
            os.makedirs(padmetSpec_folder)

    #if padmetRef given, create folder for padmetRef
    if padmetRef_file:
        if verbose: print("Loading %s" % padmetRef_file)
        padmetRef = PadmetRef(padmetRef_file)
        padmetRef_name = os.path.splitext(os.path.basename(padmetRef_file))[0]
        padmetRef_folder = output_dir + padmetRef_name + "/"
        if not os.path.isdir(padmetRef_folder):
            if verbose: print("Creating folder %s" % padmetRef_folder)
            os.makedirs(padmetRef_folder)

    #NODES
    #Converting nodes data to tsv format
    #Except for names nodes: adding them as attribu
    if padmetRef_file:
        if verbose: print("Extracting nodes from %s" % padmetRef_name)
        with open(padmetRef_folder + "metabolic_network.tsv", 'w') as f:
            fieldnames = ["metabolic_network", "name"]
            writer = csv.writer(f, delimiter="\t")
            writer.writerow(fieldnames)
            writer.writerow([padmetRef_name, padmetRef_name])

        if verbose: print("\tExtracting reactions")
        all_rxn_nodes = [
            node for node in list(padmetRef.dicOfNode.values())
            if node.type == "reaction"
        ]
        if all_rxn_nodes:
            extract_nodes(padmetRef, all_rxn_nodes, "reaction",
                          padmetRef_folder + "rxn.tsv",
                          {"in@metabolic_network": [padmetRef_name]})
        if verbose: print("\t%s reactions" % len(all_rxn_nodes))

        if verbose: print("\tExtracting compounds")
        all_cpd_nodes = set([
            padmetRef.dicOfNode[rlt.id_out]
            for rlt in padmetRef.getAllRelation()
            if rlt.type in ["consumes", "produces"]
        ])
        if all_cpd_nodes:
            extract_nodes(padmetRef, all_cpd_nodes, "compound",
                          padmetRef_folder + "cpd.tsv")
        if verbose: print("\t%s compounds" % len(all_cpd_nodes))

        if verbose: print("\tExtracting pathways")
        all_pwy_nodes = [
            node for node in list(padmetRef.dicOfNode.values())
            if node.type == "pathway"
        ]
        if all_pwy_nodes:
            extract_nodes(padmetRef, all_pwy_nodes, "pathway",
                          padmetRef_folder + "pwy.tsv")
        if verbose: print("\t%s pathways" % len(all_pwy_nodes))

        if verbose: print("\tExtracting xrefs")
        all_xrefs_nodes = [
            node for node in list(padmetRef.dicOfNode.values())
            if node.type == "xref"
        ]
        if all_xrefs_nodes:
            extract_nodes(padmetRef, all_xrefs_nodes, "xref",
                          padmetRef_folder + "xref.tsv")
        if verbose: print("\t%s xrefs" % len(all_xrefs_nodes))

        #RELATIONS
        #Converting relations data to tsv format
        if verbose: print("Extracting relations from %s" % padmetRef_name)
        rxn_cpd_data = []
        rxn_pwy_data = []
        entity_xref_data = []
        if verbose:
            print(
                "\tExtracting relations reaction-[consumes/produces]-compound")
        if verbose:
            print("\tExtracting relations reaction-is_inclued_in-pathway")
        if verbose: print("\tExtracting relations reactions-has_xref-xref")

        for rxn_node in all_rxn_nodes:
            rxn_id = rxn_node.id
            #if verbose: print("Reaction %s" %rxn_id)

            #all consumes/produces relations
            cp_rlt = [
                rlt for rlt in padmetRef.dicOfRelationIn[rxn_id]
                if rlt.type in ["consumes", "produces"]
            ]
            rxn_cpd_data += extract_rxn_cpd(cp_rlt)
            #all is_in_pathway relations
            pwy_rlt = [
                rlt for rlt in padmetRef.dicOfRelationIn[rxn_id]
                if rlt.type == "is_in_pathway"
            ]
            if pwy_rlt: rxn_pwy_data += extract_rxn_pwy(pwy_rlt)
            #all has_xref relations
            rxn_xref_rlt = [
                rlt for rlt in padmetRef.dicOfRelationIn[rxn_id]
                if rlt.type == "has_xref"
            ]
            if rxn_xref_rlt:
                entity_xref_data += extract_entity_xref(
                    rxn_xref_rlt, padmetRef)

        if verbose: print("\tExtracting relations compound-has_xref-xref")
        for cpd_node in all_cpd_nodes:
            cpd_id = cpd_node.id
            try:
                cpd_xref_rlt = [
                    rlt for rlt in padmetRef.dicOfRelationIn[cpd_id]
                    if rlt.type == "has_xref"
                ]
                if cpd_xref_rlt:
                    entity_xref_data += extract_entity_xref(
                        cpd_xref_rlt, padmetRef)
            except KeyError:
                pass

        if verbose: print("\tExtracting relations pwy-has_xref-xref")
        for pwy_node in all_pwy_nodes:
            pwy_id = pwy_node.id
            try:
                pwy_xref_rlt = [
                    rlt for rlt in padmetRef.dicOfRelationIn[pwy_id]
                    if rlt.type == "has_xref"
                ]
                if pwy_xref_rlt:
                    entity_xref_data += extract_entity_xref(
                        pwy_xref_rlt, padmetRef)
            except KeyError:
                pass

        if rxn_cpd_data:
            if verbose: print("\t\tCreating rxn_cpd.tsv")
            rxn_cpd_file(rxn_cpd_data, padmetRef_folder + "rxn_cpd.tsv")
        if rxn_pwy_data:
            if verbose: print("\t\tCreating rxn_pwy.tsv")
            rxn_pwy_file(rxn_pwy_data, padmetRef_folder + "rxn_pwy.tsv")
        if entity_xref_data:
            if verbose: print("\t\tCreating entity_xref.tsv")
            entity_xref_file(entity_xref_data,
                             padmetRef_folder + "entity_xref.tsv")

    else:
        if verbose: print("No given padmetRef")
        all_rxn_nodes, all_cpd_nodes, all_pwy_nodes, all_xref_nodes = [[]] * 4

    if padmetSpec_file:
        if verbose: print("Extracting nodes from %s" % padmetSpec_name)
        with open(padmetSpec_folder + "metabolic_network.tsv", 'w') as f:
            fieldnames = ["metabolic_network", "name"]
            writer = csv.writer(f, delimiter="\t")
            writer.writerow(fieldnames)
            writer.writerow([padmetSpec_name, padmetSpec_name])

        if verbose: print("\tExtracting reactions")
        spec_rxn_nodes = [
            node for node in list(padmetSpec.dicOfNode.values())
            if node.type == "reaction"
        ]
        if all_rxn_nodes:
            extract_nodes(padmetSpec, spec_rxn_nodes, "reaction",
                          padmetSpec_folder + "rxn.tsv",
                          {"in@metabolic_network": [padmetSpec_name]})
        if verbose: print("\t%s reactions" % len(spec_rxn_nodes))

        if verbose: print("\tExtracting compounds")
        spec_cpd_nodes = set([
            padmetSpec.dicOfNode[rlt.id_out]
            for rlt in padmetSpec.getAllRelation()
            if rlt.type in ["consumes", "produces"]
        ])
        if all_cpd_nodes:
            extract_nodes(padmetSpec, spec_cpd_nodes, "compound",
                          padmetSpec_folder + "cpd.tsv")
        if verbose: print("\t%s compounds" % len(spec_cpd_nodes))

        if verbose: print("\tExtracting pathways")
        spec_pwy_nodes = [
            node for node in list(padmetSpec.dicOfNode.values())
            if node.type == "pathway"
        ]
        if all_pwy_nodes:
            extract_nodes(padmetSpec, spec_pwy_nodes, "pathway",
                          padmetSpec_folder + "pwy.tsv")
        if verbose: print("\t%s pathways" % len(spec_pwy_nodes))

        if verbose: print("\tExtracting xrefs")
        spec_xrefs_nodes = [
            node for node in list(padmetSpec.dicOfNode.values())
            if node.type == "xref"
        ]
        if spec_xrefs_nodes:
            extract_nodes(padmetSpec, spec_xrefs_nodes, "xref",
                          padmetSpec_folder + "xref.tsv")
        if verbose: print("\t%s xrefs" % len(spec_xrefs_nodes))

        if verbose: print("\tExtracting all genes")
        spec_genes_nodes = [
            node for node in list(padmetSpec.dicOfNode.values())
            if node.type == "gene"
        ]
        if spec_genes_nodes:
            extract_nodes(padmetSpec,
                          spec_genes_nodes,
                          "gene",
                          padmetSpec_folder + "gene.tsv",
                          opt_col={"in@metabolic_network": [padmetSpec_name]})
        if verbose: print("\t%s genes" % len(spec_genes_nodes))

        if verbose: print("\tExtracting all reconstructionData")
        spec_recData_nodes = [
            node for node in list(padmetSpec.dicOfNode.values())
            if node.type == "reconstructionData"
        ]
        if spec_genes_nodes:
            extract_nodes(padmetSpec, spec_recData_nodes, "reconstructionData",
                          padmetSpec_folder + "reconstructionData.tsv")
        if verbose: print("\t%s reconstructionData" % len(spec_recData_nodes))

        if verbose: print("Extracting relations from %s" % padmetSpec_name)
        rxn_cpd_data = []
        rxn_pwy_data = []
        rxn_gene_data = []
        entity_xref_data = []
        rxn_rec_data = []
        """
        fieldnames = ["rxn_reconstruction_info","concers@reaction","has_metadata@reconstruction_information","concerns@metabolic_network"]
        with open(padmetSpec_folder+"rxn_sources.tsv", 'w') as f:
            writer = csv.writer(f, delimiter="\t")
            writer.writerow(fieldnames)
            for rxn_node in spec_all_rxn_nodes:
                for src in rxn_node.misc.get("SOURCE",[]):
                    line = [rxn_node.id, src, padmetSpec_name]
                    line.insert(0,"_".join(line))
                    writer.writerow(line)            
        """
        if verbose:
            print(
                "\tExtracting relations reaction-[consumes/produces]-compound")
        if verbose:
            print("\tExtracting relations reaction-is_in_pathway-pathway")
        if verbose: print("\tExtracting relations reactions-has_xref-xref")
        if verbose: print("\tExtracting relations reactions-is_linked_to-gene")
        if verbose:
            print(
                "\tExtracting relations reactions-has_metadata-reconstructionData"
            )

        for rxn_node in spec_rxn_nodes:
            rxn_id = rxn_node.id
            #if verbose: print("Reaction %s" %rxn_id)
            #all consumes/produces relations
            rxn_cpd_rlt = [
                rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id]
                if rlt.type in ["consumes", "produces"]
            ]
            rxn_cpd_data += extract_rxn_cpd(rxn_cpd_rlt)
            #all is_in_pathway relations
            rxn_pwy_rlt = [
                rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id]
                if rlt.type == "is_in_pathway"
            ]
            rxn_pwy_data += extract_rxn_pwy(rxn_pwy_rlt)
            #all has_xref relations
            rxn_xref_rlt = [
                rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id]
                if rlt.type == "has_xref"
            ]
            entity_xref_data += extract_entity_xref(rxn_xref_rlt, padmetSpec)
            #all is_linked_to relations
            rxn_gene_rlt = [
                rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id]
                if rlt.type == "is_linked_to"
            ]
            rxn_gene_data += extract_rxn_gene(rxn_gene_rlt)
            #all reconstructionData
            rxn_rec_rlt = [
                rlt for rlt in padmetSpec.dicOfRelationIn[rxn_id]
                if rlt.type == "has_reconstructionData"
            ]
            rxn_rec_data += extract_rxn_rec(rxn_rec_rlt)

        if verbose: print("\tExtracting relations compound-has_xref-xref")
        for cpd_node in spec_cpd_nodes:
            cpd_id = cpd_node.id
            #if verbose: print("Compound %s" %cpd_id)
            try:
                cpd_xref_rlt = [
                    rlt for rlt in padmetSpec.dicOfRelationIn[cpd_id]
                    if rlt.type == "has_xref"
                ]
                entity_xref_data += extract_entity_xref(
                    cpd_xref_rlt, padmetSpec)
            except KeyError:
                pass

        if padmetRef_file:
            if verbose:
                print(
                    "\tExtracting pathways's completion rate and creating pwy_rate.tsv"
                )
            pwy_rate(padmetRef, padmetSpec, padmetSpec_name,
                     padmetSpec_folder + "pwy_rate.tsv")
        else:
            if verbose:
                print("No padmetRef given unable to calculate pathway ratio")

        if rxn_cpd_data:
            if verbose: print("\t\tCreating rxn_cpd.tsv")
            rxn_cpd_file(rxn_cpd_data, padmetSpec_folder + "rxn_cpd.tsv")
        if rxn_pwy_data:
            if verbose: print("\t\tCreating rxn_pwy.tsv")
            rxn_pwy_file(rxn_pwy_data, padmetSpec_folder + "rxn_pwy.tsv")
        if entity_xref_data:
            if verbose: print("\t\tCreating entity_xref.tsv")
            entity_xref_file(entity_xref_data,
                             padmetSpec_folder + "entity_xref.tsv")
        if rxn_gene_data:
            if verbose: print("\t\tCreating rxn_gene.tsv")
            rxn_gene_file(rxn_gene_data, padmetSpec_folder + "rxn_gene.tsv")
        if rxn_rec_data:
            if verbose: print("\t\tCreating rxn_reconstructionData.tsv")
            rxn_rec_file(rxn_rec_data,
                         padmetSpec_folder + "rxn_reconstructionData.tsv")
Ejemplo n.º 16
0
def main():
    args = docopt.docopt(__doc__)
    
    padmet_ref = PadmetRef(args["--padmetRef"])
    padmet = PadmetSpec(args["--padmetSpec"])
    pathway_id = args["--pathway"]
    
    #get all reactions in pathway
    try:
        all_reactions = [rlt.id_in for rlt in padmet_ref.dicOfRelationOut.get(pathway_id,None)
        if rlt.type == "is_in_pathway"]
    except TypeError:
        print("%s not in padmetRef" %pathway_id)
        exit()
    reactions_in_network = []
    for reaction_id in all_reactions:
        try:
            padmet.dicOfNode[reaction_id]
            reactions_in_network.append(reaction_id)
        except KeyError:
            pass
    
    
    DG=nx.DiGraph()
    
    custom_node_color = {}
    for reaction_id in all_reactions:
    
        # Reaction colors
        if reaction_id in reactions_in_network:
            custom_node_color[reaction_id] = "lightgreen"
        else:
            custom_node_color[reaction_id] = "red"
    
        # Reactants & products for each reaction
        reactants = [rlt.id_out
            for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None)
                if rlt.type == "consumes"]
        products = [rlt.id_out
            for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None)
                if rlt.type == "produces"]
    
        for reac in reactants:
            custom_node_color[reac] = "skyblue"
            DG.add_edge(reac, reaction_id)
        for prod in products:
            custom_node_color[prod] = "skyblue"
            DG.add_edge(reaction_id, prod)

    # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html
    # apt-get install graphviz graphviz-dev (python-pygraphviz)
    # pip install pygraphviz
    
    nx.draw_networkx(DG,
                     pos=graphviz_layout(DG, prog='neato'), # Layout from graphviz
                     node_size=1600,
                     arrows=True,
                     font_size=11,      # font-size for labels
                     node_shape='s',    # shape of nodes
                     alpha=0.6,         # node & edge transparency
                     width=1.5,         # line width for edges
                     node_list=list(custom_node_color.keys()),
                     node_color=list(custom_node_color.values()))
    plt.axis('off')
    
    #save_plot(plt, 'pathway_' + pathway_id)
    plt.show()
Ejemplo n.º 17
0
def pathway_production(padmet_path,
                       output,
                       verbose=None,
                       number_cpu=None,
                       padmet_ref_path=None,
                       pathway_completion_ratio=None):
    """ Create two files degradation_matrix.tsv and biosynthesis_matrix.tsv.
    These files have metabolite as row and organism as column.
    It shows the input (degradation_matrix.tsv) and output (biosynthesis_matrix.tsv) of pathways in the organism.

    Parameters
    ----------
    padmet_path: str
        pathname of the padmet files, sep all files by ',', ex: /path/padmet1.padmet;/path/padmet2.padmet OR a folder
    output: str
        pathname of the output folder
    verbose: bool
        if True print information
    number_cpu: bool
        Number of CPU
    """
    if not os.path.exists(output):
        if verbose: print("Creating %s" % output)
        os.makedirs(output)
    else:
        if verbose:
            print(
                "%s already exist, old comparison output folders will be overwritten"
                % output)

    if os.path.isdir(padmet_path):
        all_files = [
            os.path.join(padmet_path, f) for f in next(os.walk(padmet_path))[2]
        ]
    else:
        all_files = padmet_path.split(",")

    if number_cpu:
        try:
            number_cpu_to_use = int(number_cpu)
        except ValueError:
            raise ValueError('The number of CPU must be an integer.')
    else:
        number_cpu_to_use = 1

    if padmet_ref_path is None and pathway_completion_ratio is not None:
        sys.exit(
            'pathway_completion_ratio option needs a padmetRef to compute the pathway completness ratio.'
        )

    if pathway_completion_ratio is not None:
        try:
            pathway_completion_ratio = float(pathway_completion_ratio)
        except ValueError:
            sys.exit('pathway_completion_ratio must be a float')
        if pathway_completion_ratio < 0 or pathway_completion_ratio > 1:
            sys.exit('pathway_completion_ratio must be < ' + str(0) +
                     ' and > ' + str(1))

    if padmet_ref_path:
        padmet_ref_pathways = {}
        padmetRef = PadmetRef(padmet_ref_path)
        all_pwys = [
            node for node in list(padmetRef.dicOfNode.values())
            if node.type == 'pathway'
        ]
        for pwy in all_pwys:
            all_rxns = set([
                rlt.id_in
                for rlt in padmetRef.dicOfRelationOut.get(pwy.id, [])
                if rlt.type == "is_in_pathway"
            ])
            padmet_ref_pathways[pwy.id] = all_rxns
    else:
        padmet_ref_pathways = None

    all_metabolites = []
    all_pathways = {}
    organisms = []
    for padmet_file_path in all_files:
        padmet_id = os.path.splitext(os.path.basename(padmet_file_path))[0]
        pathway_inputs, pathways_outputs = extract_pahways_inputs_outputs(
            padmet_file_path, padmet_ref_pathways, pathway_completion_ratio)
        all_pathways[padmet_id] = pathway_inputs, pathways_outputs
        all_metabolites.extend(pathway_inputs.keys())
        all_metabolites.extend(pathways_outputs.keys())
        organisms.append(padmet_id)

    all_metabolites = set(all_metabolites)

    degradation_matrix = []
    biosynthesis_matrix = []
    for metabolite in all_metabolites:
        degradation_matrix.append([metabolite] + [
            ','.join(all_pathways[organism][0][metabolite]) if metabolite in
            all_pathways[organism][0] else '' for organism in organisms
        ])
        biosynthesis_matrix.append([metabolite] + [
            ','.join(all_pathways[organism][1][metabolite]) if metabolite in
            all_pathways[organism][1] else '' for organism in organisms
        ])

    degradation_file = os.path.join(output, 'degradation_matrix.tsv')
    with open(degradation_file, 'w') as degradation_output_file:
        csvwriter = csv.writer(degradation_output_file, delimiter='\t')
        csvwriter.writerow(['Metaboltie', *organisms])
        for row in degradation_matrix:
            if ''.join(row[1:]) != '':
                csvwriter.writerow([*row])

    biosynthesis_file = os.path.join(output, 'biosynthesis_matrix.tsv')
    with open(biosynthesis_file, 'w') as biosynthesis_output_file:
        csvwriter = csv.writer(biosynthesis_output_file, delimiter='\t')
        csvwriter.writerow(['Metaboltie', *organisms])
        for row in biosynthesis_matrix:
            if ''.join(row[1:]) != '':
                csvwriter.writerow([*row])
Ejemplo n.º 18
0
def from_pgdb_to_padmet(pgdb_folder, db='NA', version='NA', source='GENOME', extract_gene=False, no_orphan=False, enhanced_db=False, padmetRef_file=None, verbose=False):
    """
    Parameters
    ----------
    pgdb_folder: str
        path to pgdb
    db: str
        pgdb name, default is 'NA'
    version: str
        pgdb version, default is 'NA'
    source: str
        tag reactions for traceability, default is 'GENOME'
    extract_gene: bool
        if true extract genes information
    no_orphan: bool
        if true, remove reactions without genes associated
    enhanced_db: bool
        if true, read metabolix-reactions.xml sbml file and add information in final padmet
    padmetRef_file: str
        path to padmetRef corresponding to metacyc in padmet format
    verbose: bool
        if True print information
    
    Returns
    -------
    padmet.padmetRef:
        padmet instance with pgdb within pgdb data
    """
    global regex_purge, regex_xref, list_of_relation, def_compart_in, def_compart_out
    regex_purge = re.compile("<.*?>|\|")
    regex_xref = re.compile('^\((?P<DB>\S*)\s*"(?P<ID>\S*)"')
    list_of_relation = []
    def_compart_in = "c"
    def_compart_out = "e"
    #parsing args
    source = source.upper()
    
    classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file = \
    [os.path.join(pgdb_folder,_file) for _file in ["classes.dat", "compounds.dat", "proteins.dat", "reactions.dat", "enzrxns.dat", "pathways.dat"]]
    if enhanced_db:
        metabolic_reactions = os.path.join(pgdb_folder,"metabolic-reactions.xml")
    else:
        metabolic_reactions = None
    if extract_gene:
        genes_file = os.path.join(pgdb_folder,"genes.dat")
    else:
        genes_file = None

    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    if padmetRef_file:
        padmet = PadmetSpec()
        padmetRef = PadmetRef(padmetRef_file)
        version = padmetRef.info["DB_info"]["version"]
        db = padmetRef.info["DB_info"]["DB"]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet.setInfo(dbNotes)

        padmet.setPolicy(padmetRef)
        with open(reactions_file, 'r') as f:
            rxns_id = [line.split(" - ")[1] for line in f.read().splitlines() if line.startswith("UNIQUE-ID")]
        count = 0
        for rxn_id in rxns_id:
            count += 1
            if verbose: print("%s/%s Copy %s" %(count, len(rxns_id), rxn_id))
            try:
                padmet.copyNode(padmetRef, rxn_id)
                reconstructionData_id = rxn_id+"_reconstructionData_"+source
                if reconstructionData_id in list(padmet.dicOfNode.keys()) and verbose:
                    print("Warning: The reaction %s seems to be already added from the same source %s" %(rxn_id, source))
                reconstructionData = {"SOURCE":[source],"TOOL":["PATHWAYTOOLS"],"CATEGORY":["ANNOTATION"]}
                reconstructionData_rlt = Relation(rxn_id,"has_reconstructionData",reconstructionData_id)
                padmet.dicOfNode[reconstructionData_id] = Node("reconstructionData", reconstructionData_id, reconstructionData)
                padmet._addRelation(reconstructionData_rlt)

            except TypeError:
                print("%s not in padmetRef" %(rxn_id))

        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)

    else:
        POLICY_IN_ARRAY = [['class','is_a_class','class'], ['class','has_name','name'], ['class','has_xref','xref'], ['class','has_suppData','suppData'],
                        ['compound','is_a_class','class'], ['compound','has_name','name'], ['compound','has_xref','xref'], ['compound','has_suppData','suppData'],
                        ['gene','is_a_class','class'], ['gene','has_name','name'], ['gene','has_xref','xref'], ['gene','has_suppData','suppData'], ['gene','codes_for','protein'],
                        ['pathway','is_a_class','class'], ['pathway','has_name','name'], ['pathway','has_xref','xref'], ['pathway','is_in_pathway','pathway'],
                        ['protein','is_a_class','class'], ['protein','has_name','name'], ['protein','has_xref','xref'], ['protein','has_suppData','suppData'], ['protein','catalyses','reaction'],
                        ['protein','is_in_species','class'],
                        ['reaction','is_a_class','class'], ['reaction','has_name','name'], ['reaction','has_xref','xref'], ['reaction','has_suppData','suppData'], ['reaction','has_reconstructionData','reconstructionData'], ['reaction','is_in_pathway','pathway'],
                        ['reaction','consumes','class','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','class','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','compound','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','compound','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','consumes','protein','STOICHIOMETRY','X','COMPARTMENT','Y'], ['reaction','produces','protein','STOICHIOMETRY','X','COMPARTMENT','Y'],
                        ['reaction','is_linked_to','gene','SOURCE:ASSIGNMENT','X:Y']]
        dbNotes = {"PADMET":{"creation":today_date,"version":"2.6"},"DB_info":{"DB":db,"version":version}}
        padmet = PadmetRef()
        if verbose: print("setting policy")
        padmet.setPolicy(POLICY_IN_ARRAY)
        if verbose: print("setting dbInfo")
        padmet.setInfo(dbNotes)
    
    
        if verbose: print("parsing classes")
        classes_parser(classes_file, padmet, verbose)
    
        if verbose: print("parsing compounds")
        compounds_parser(compounds_file, padmet, verbose)
    
        if verbose: print("parsing reactions")
        reactions_parser(reactions_file, padmet, extract_gene, source, verbose)
    
        if verbose: print("parsing pathways")
        pathways_parser(pathways_file, padmet, verbose)
    
        if extract_gene:
            if verbose: print("parsing genes")
            map_gene_ids = genes_parser(genes_file, padmet, verbose)
            if verbose: print("parsing proteins")
            dict_protein_gene_id = proteins_parser(proteins_file, padmet, verbose)
            mapped_dict_protein_gene_id = map_gene_id(dict_protein_gene_id, map_gene_ids)
            if verbose: print("parsing association enzrxns")
            enzrxns_parser(enzrxns_file, padmet, mapped_dict_protein_gene_id, source, verbose)
    
        if metabolic_reactions is not None:
            if verbose: print("enhancing db from metabolic-reactions.xml")
            padmet = enhance_db(metabolic_reactions, padmet, extract_gene, verbose)
    
    for rlt in list_of_relation:
        try:
            padmet.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmet.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmet.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmet.dicOfRelationOut[rlt.id_out] = [rlt]

    if extract_gene and no_orphan:
        all_reactions = [node for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
        rxn_to_del = [r for r in all_reactions if not any([rlt for rlt in padmet.dicOfRelationIn[r.id] if rlt.type == "is_linked_to"])]
        for rxn in rxn_to_del:
            padmet.delNode(rxn.id)
        if verbose:
            print("%s/%s orphan reactions (without gene association) deleted" %(len(rxn_to_del), len(all_reactions)))
        all_genes_linked = set([rlt.id_out for rlt in padmet.getAllRelation() if rlt.type == "is_linked_to"])
        all_genes = set([node.id for node in list(padmet.dicOfNode.values()) if node.type == "gene"])
        count = 0
        for gene_id in [g for g in all_genes if g not in all_genes_linked]:
            count += 1
            #if verbose: print("Removing gene without gene assoc %s" %gene_id)
            padmet.dicOfNode.pop(gene_id)
        if verbose:
            print("%s/%s orphan genes (not linked to any reactions) deleted" %(count, len(all_genes)))

    rxns = [node.id for node in list(padmet.dicOfNode.values()) if node.type == "reaction"]
    for rxn_id in rxns:
        cp_rlts = set([rlt.type for rlt in padmet.dicOfRelationIn[rxn_id] if rlt.type in ["consumes","produces"]])
        if len(cp_rlts) == 1:
            print("rxn only consume or produce, transport ???: %s" %rxn_id)
            padmet.delNode(rxn_id)

    return padmet
Ejemplo n.º 19
0
def visu_path_compounds(padmet_pathname,
                        padmet_ref_pathname,
                        pathway_ids,
                        output_file,
                        hide_currency_metabolites=None):
    """ Extract reactions from pathway and create a comppound/reaction graph.

    Parameters
    ----------
    padmet_pathname: str
        pathname of the padmet file or a folder containing multiple padmet
    padmet_ref_pathname: str
        pathname of the padmetRef file
    pathway_ids: str
        name of the pathway (can be multiple pathways separated by a ',')
    output_file: str
        pathname of the output picture (extension can be .png or .svg)
    hide_currency_metabolites: bool
        hide currency metabolites
    """
    if os.path.isfile(padmet_pathname):
        padmet = PadmetSpec(padmet_pathname)
    else:
        padmet = padmet_to_padmet.padmet_to_padmet(padmet_pathname)
    padmet_ref = PadmetRef(padmet_ref_pathname)

    pathway_ids = pathway_ids.split(',')
    pwy_all_reactions = []

    if hide_currency_metabolites:
        compounds_to_hide = [
            "PROTON", "WATER", "OXYGEN-MOLECULE", "NADP", "NADPH", "ATP",
            "PPI", "CARBON-DIOXIDE", "Pi", "ADP", "CO-A", "UDP", "NAD", "NADH",
            "AMP", "AMMONIA", "HYDROGEN-PEROXIDE", "Acceptor", "Donor-H2",
            "3-5-ADP", "GDP", "CARBON-MONOXIDE", "GTP", "FAD"
        ]
    else:
        compounds_to_hide = []

    def get_reactions(pathway_id, padmet_ref, pwy_all_reactions):
        all_reactions = [
            rlt.id_in
            for rlt in padmet_ref.dicOfRelationOut.get(pathway_id, None)
            if rlt.type == "is_in_pathway"
        ]
        for reaction_id in all_reactions:
            if reaction_id in padmet_ref.dicOfNode:
                node_reaction = padmet_ref.dicOfNode[reaction_id]
                if node_reaction.type == "pathway":
                    pwy_all_reactions = get_reactions(node_reaction.id,
                                                      padmet_ref,
                                                      pwy_all_reactions)
                else:
                    if reaction_id not in pwy_all_reactions:
                        pwy_all_reactions.append(reaction_id)

        return pwy_all_reactions

    for pathway_id in pathway_ids:
        if pathway_id in padmet_ref.dicOfNode:
            tmp_pwy_all_reactions = []
            tmp_pwy_all_reactions = get_reactions(pathway_id, padmet_ref,
                                                  tmp_pwy_all_reactions)
            pwy_all_reactions.extend(tmp_pwy_all_reactions)
        else:
            print("Pathway " + pathway_id + " not in PadmetRef " +
                  padmet_ref_pathname)

    reactions_in_network = []
    for reaction_id in pwy_all_reactions:
        if reaction_id in padmet.dicOfNode:
            reactions_in_network.append(reaction_id)

    DG = nx.DiGraph()

    custom_node_color = OrderedDict()
    for reaction_id in pwy_all_reactions:
        # Reaction colors
        if reaction_id in reactions_in_network:
            custom_node_color[reaction_id] = "lightgreen"
        else:
            custom_node_color[reaction_id] = "red"

        # Reactants & products for each reaction
        reactants = [
            rlt.id_out
            for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None)
            if rlt.type == "consumes"
        ]
        products = [
            rlt.id_out
            for rlt in padmet_ref.dicOfRelationIn.get(reaction_id, None)
            if rlt.type == "produces"
        ]

        for reac in reactants:
            if reac not in compounds_to_hide:
                if reac not in custom_node_color:
                    custom_node_color[reac] = "skyblue"
                DG.add_edge(reac, reaction_id)
                if 'REVERSIBLE' in padmet_ref.dicOfNode[reaction_id].misc[
                        'DIRECTION']:
                    DG.add_edge(reaction_id, reac)
        for prod in products:
            if prod not in compounds_to_hide:
                if prod not in custom_node_color:
                    custom_node_color[prod] = "skyblue"
                DG.add_edge(reaction_id, prod)
                if 'REVERSIBLE' in padmet_ref.dicOfNode[reaction_id].misc[
                        'DIRECTION']:
                    DG.add_edge(prod, reaction_id)

    # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html
    # apt-get install graphviz graphviz-dev (python-pygraphviz)
    # pip install pygraphviz

    nx.draw_networkx(
        DG,
        pos=graphviz_layout(DG, prog='neato'),  # Layout from graphviz
        node_size=1600,
        arrows=True,
        font_size=11,  # font-size for labels
        node_shape='s',  # shape of nodes
        alpha=0.6,  # node & edge transparency
        width=1.5,  # line width for edges
        nodelist=list(custom_node_color.keys()),
        node_color=[
            custom_node_color[node] for node in list(custom_node_color.keys())
        ])
    plt.axis('off')

    plt.savefig(output_file, bbox_inches='tight')
    plt.clf()
Ejemplo n.º 20
0
def visu_path_pathways(padmet_pathname, padmet_ref_pathname, pathway_ids,
                       output_file):
    """ Extract reactions from pathway and create a comppound/reaction graph.

    Parameters
    ----------
    padmet_pathname: str
        pathname of the padmet file or a folder containing multiple padmet
    padmet_ref_pathname: str
        pathname of the padmetRef file
    pathway_ids: str
        name of the pathway (can be multiple pathways separated by a ',')
    output_file: str
        pathname of the output picture (extension can be .png or .svg)
    hide_compounds: bool
        hide common compounds (like water or proton)
    """
    if os.path.isfile(padmet_pathname):
        padmet = PadmetSpec(padmet_pathname)
    else:
        padmet = padmet_to_padmet.padmet_to_padmet(padmet_pathname)
    padmet_ref = PadmetRef(padmet_ref_pathname)

    # Check if the padmets and padmetref contain the INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in pathway node.misc needed for this analysis.
    padmetref_input_compounds_in_pwys = [
        1 for node_pathway in padmet_ref.dicOfNode
        if padmet_ref.dicOfNode[node_pathway].type == 'pathway'
        and 'INPUT-COMPOUNDS' in padmet_ref.dicOfNode[node_pathway].misc
    ]
    padmetref_output_compounds_in_pwys = [
        1 for node_pathway in padmet_ref.dicOfNode
        if padmet_ref.dicOfNode[node_pathway].type == 'pathway'
        and 'OUTPUT-COMPOUNDS' in padmet_ref.dicOfNode[node_pathway].misc
    ]
    if sum(padmetref_input_compounds_in_pwys) == 0 or sum(
            padmetref_output_compounds_in_pwys) == 0:
        sys.exit(
            "The padmetref " + padmet_ref_pathname +
            " does not contain INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in the pathway node, can't produce the pathway visualization."
        )

    padmet_input_compounds_in_pwys = [
        1 for node_pathway in padmet.dicOfNode
        if padmet.dicOfNode[node_pathway].type == 'pathway'
        and 'INPUT-COMPOUNDS' in padmet.dicOfNode[node_pathway].misc
    ]
    padmet_output_compounds_in_pwys = [
        1 for node_pathway in padmet.dicOfNode
        if padmet.dicOfNode[node_pathway].type == 'pathway'
        and 'OUTPUT-COMPOUNDS' in padmet.dicOfNode[node_pathway].misc
    ]
    if sum(padmet_input_compounds_in_pwys) == 0 or sum(
            padmet_output_compounds_in_pwys) == 0:
        sys.exit(
            "The padmet " + padmet_pathname +
            " does not contain INPUT-COMPOUNDS and OUTPUT-COMPOUNDS in the pathway node, can't produce the pathway visualization."
        )

    # Extract pathway from superpathways.
    pathway_ids = pathway_ids.split(',')
    all_pathways = []

    def get_pathways(pathway_id, padmet_ref, pwy_all_reactions):
        all_reactions_pathways = [
            rlt.id_in
            for rlt in padmet_ref.dicOfRelationOut.get(pathway_id, None)
            if rlt.type == "is_in_pathway"
        ]
        for reaction_pathway_id in all_reactions_pathways:
            if reaction_pathway_id in padmet_ref.dicOfNode:
                node_reaction = padmet_ref.dicOfNode[reaction_pathway_id]
                if node_reaction.type == "pathway":
                    pwy_all_reactions.append(reaction_pathway_id)
                    pwy_all_reactions = get_pathways(node_reaction.id,
                                                     padmet_ref,
                                                     pwy_all_reactions)

        return pwy_all_reactions

    for pathway_id in pathway_ids:
        if pathway_id in padmet_ref.dicOfNode:
            tmp_pwy_all_pathways = []
            tmp_pwy_all_pathways = get_pathways(pathway_id, padmet_ref,
                                                tmp_pwy_all_pathways)
            all_pathways.extend(tmp_pwy_all_pathways)
        else:
            print("Pathway " + pathway_id + " not in PadmetRef " +
                  padmet_ref_pathname)

    # Find pathway in the padmet file.
    pathways_in_network = []
    for pathway_id in all_pathways:
        if pathway_id in padmet.dicOfNode:
            pathways_in_network.append(pathway_id)

    # Create the graph.
    DG = nx.DiGraph()
    custom_node_color = OrderedDict()

    for pwy in all_pathways:
        node_pathway = padmet_ref.dicOfNode[pwy]
        if pwy in pathways_in_network:
            custom_node_color[pwy] = "lightgreen"
        else:
            custom_node_color[pwy] = "red"
        if 'INPUT-COMPOUNDS' in node_pathway.misc and 'OUTPUT-COMPOUNDS' in node_pathway.misc:
            for reactant in node_pathway.misc['INPUT-COMPOUNDS'][0].split(','):
                if reactant not in custom_node_color:
                    custom_node_color[reactant] = "skyblue"
                DG.add_edge(reactant, pwy)
            for product in node_pathway.misc['OUTPUT-COMPOUNDS'][0].split(','):
                if product not in custom_node_color:
                    custom_node_color[product] = "skyblue"
                DG.add_edge(pwy, product)

    # https://networkx.github.io/documentation/latest/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html
    # apt-get install graphviz graphviz-dev (python-pygraphviz)
    # pip install pygraphviz

    nx.draw_networkx(
        DG,
        pos=graphviz_layout(DG, prog='neato'),  # Layout from graphviz
        node_size=1600,
        arrows=True,
        font_size=11,  # font-size for labels
        node_shape='s',  # shape of nodes
        alpha=0.6,  # node & edge transparency
        width=1.5,  # line width for edges
        nodelist=list(custom_node_color.keys()),
        node_color=[
            custom_node_color[node] for node in list(custom_node_color.keys())
        ])
    plt.axis('off')

    plt.savefig(output_file, bbox_inches='tight')
    plt.clf()
Ejemplo n.º 21
0
def reaction_figure_creation(reaction_file,
                             output_folder,
                             upset_cluster=None,
                             padmetRef_file=None,
                             pvclust=None,
                             verbose=False):
    """
    Create dendrogram, upset figure (if upset argument) and compare reactiosn in species.

    Parameters
    ----------
    reaction_file: str
        path to reaction file
    upset_cluster: int
        the number of cluster you want in the intervene figure
    output_folder: str
        path to output folder
    padmet_ref_file: str
        path to padmet ref file
    pvclust: bool
        boolean to launch or not R pvclust dendrogram
    """
    # Check if output_folder exists, if not create it.
    output_folder_tree_cluster = output_folder + '/tree_cluster/'
    output_folder_comparison = output_folder + '/tree_cluster/comparison_cluster/'
    output_folder_specific = output_folder_tree_cluster + 'specific_reactions/'
    output_folder_absent = output_folder_tree_cluster + 'absent_reactions/'
    if upset_cluster:
        output_folder_upset = output_folder + '/upset_graph'
        temp_data_folder = output_folder + '/upset_graph/temp_data/'
        folders = [
            output_folder, output_folder_tree_cluster,
            output_folder_comparison, output_folder_specific,
            output_folder_absent, output_folder_upset, temp_data_folder
        ]
    else:
        folders = [
            output_folder, output_folder_tree_cluster,
            output_folder_comparison, output_folder_specific,
            output_folder_absent
        ]

    for folder in folders:
        if not os.path.isdir(folder):
            os.mkdir(folder)

    if not os.path.exists(reaction_file):
        raise FileNotFoundError("No reactions.tsv file accessible at " +
                                reaction_file)

    # Read the reactions file with pandas.
    all_reactions_dataframe = pa.read_csv(reaction_file, sep='\t')
    # Keep column containing absence-presence of reactions.
    # (columns with (sep=;) are column with gene name linked to reactions)
    # (columns with _formula contain the reaction formula)
    columns = [
        column for column in all_reactions_dataframe.columns
        if '(sep=;)' not in column
    ]
    columns = [column for column in columns if '_formula' not in column]
    reactions_dataframe = all_reactions_dataframe[columns].copy()

    reactions_dataframe.set_index('reaction', inplace=True)

    # Transpose the matrix to have species as index and reactions as columns.
    absence_presence_matrix = reactions_dataframe.transpose()

    # Compute a distance matrix using the Jaccard distance between species and condense it.
    condensed_distance_matrix_jaccard = pdist(absence_presence_matrix,
                                              metric='jaccard')

    # Hierarchical clustering on the condensed distance matrix.
    linkage_matrix = linkage(condensed_distance_matrix_jaccard,
                             method='average',
                             metric='jaccard')

    # Draw a dendrogram of the clustering.
    reaction_dendrogram = dendrogram(linkage_matrix,
                                     labels=absence_presence_matrix.index,
                                     leaf_font_size=100,
                                     leaf_rotation=90)

    # Extract organisms.
    organisms = absence_presence_matrix.index.tolist()

    # Create Newick tree
    tree = to_tree(linkage_matrix, False)
    newick_tree = getNewick(tree, "", tree.dist, organisms)
    newick_path = os.path.join(output_folder, 'newick.txt')
    with open(newick_path, 'w') as f:
        f.write(newick_tree)

    # Specific reactions for each species.
    absent_and_specific_reactions(reactions_dataframe,
                                  output_folder_tree_cluster,
                                  output_folder_specific, output_folder_absent,
                                  organisms)

    if pvclust:
        pvclust_reactions_dataframe = all_reactions_dataframe[columns].copy()

        pvclust_reactions_dataframe.set_index('reaction', inplace=True)
        # Create pvclust dendrogram.
        pvclust_dendrogram(pvclust_reactions_dataframe, organisms,
                           output_folder)

    # Extract all the nodes inside the clustering.
    _, node_list = to_tree(linkage_matrix, rd=True)

    if padmetRef_file:
        padmet_ref = PadmetRef(padmetRef_file)
        metacyc_to_ecs = {
            node.id: node.misc['EC-NUMBER']
            for node in padmet_ref.dicOfNode.values()
            if node.type == "reaction" and 'EC-NUMBER' in node.misc
        }
    else:
        metacyc_to_ecs = {}

    # For each cluster, give the list of organisms in it.
    # Then write it in a file.
    len_longest_cluster_id = len(str(max([node.id for node in node_list])))
    cluster_leaf_species = {}
    for node in node_list:
        node_leafs = node.pre_order(lambda child: organisms[child.id]
                                    if child.is_leaf() else None)
        cluster_leaf_species[
            'cluster_' +
            str(node.id).zfill(len_longest_cluster_id)] = node_leafs

    species_clustered_df = pa.DataFrame(columns=organisms)
    for cluster_leaf in cluster_leaf_species:
        tmp_organism_cluster = [
            True if organism in cluster_leaf_species[cluster_leaf] else False
            for organism in species_clustered_df.columns
        ]
        species_clustered_df.loc[cluster_leaf] = tmp_organism_cluster

    species_clustered_df = species_clustered_df.replace(np.nan, False)
    species_clustered_df.to_csv(output_folder_tree_cluster +
                                'clustered_species.tsv',
                                sep='\t')

    # Create xml structure from hierarchical clustering.
    root = hclust_to_xml(linkage_matrix)

    # Post order traversal of the tree.
    d = {}
    for element in root.iter():
        d[element.tag] = [child.tag for child in element]

    post_order_clusters = {}
    for node in node_list:
        node_label = 'cluster_' + str(node.id).zfill(len_longest_cluster_id)
        if d[node_label] == []:
            species = cluster_leaf_species[node_label]
            tmp_reactions = reactions_dataframe[
                reactions_dataframe[species].all(1) == True]
            post_order_clusters[node_label] = tmp_reactions.index.tolist()
        else:
            if set(post_order_clusters[d[node_label][0]]).intersection(
                    set(post_order_clusters[d[node_label][1]])) != set():
                post_order_clusters[node_label] = set(
                    post_order_clusters[d[node_label][0]]).intersection(
                        set(post_order_clusters[d[node_label][1]]))
            else:
                post_order_clusters[node_label] = set(
                    post_order_clusters[d[node_label][0]]).union(
                        set(post_order_clusters[d[node_label][1]]))

    # Use xml structure to create intersection files.
    reactions_clust = create_intersection_files(root, cluster_leaf_species,
                                                reactions_dataframe,
                                                output_folder_tree_cluster,
                                                metacyc_to_ecs)

    comparison_cluster(reactions_clust, output_folder_comparison)

    # Add label contaning cluster name and reaction number to each node.
    check_label = add_dendrogram_node_label(reaction_dendrogram, node_list,
                                            reactions_clust,
                                            len_longest_cluster_id)

    if not check_label:
        print('Warning: no label for cluster name have been added.')

    # Create dendrogram, bbox option adjsut the figure size.
    plt.savefig(output_folder + '/reaction_dendrogram.png',
                bbox_inches='tight')
    plt.clf()

    if upset_cluster:
        dendrogram_fclusters = create_cluster(reactions_dataframe,
                                              absence_presence_matrix,
                                              linkage_matrix)
        create_supervenn(absence_presence_matrix, reactions_dataframe,
                         output_folder_upset, dendrogram_fclusters, k, verbose)
Ejemplo n.º 22
0
    parser.add_argument("-s",
                        "--species",
                        help="padmet species",
                        required=True)
    parser.add_argument("-p", "--pathway", help="pathway name", required=True)
    parser.add_argument("-o",
                        "--output",
                        help="SBML output filename",
                        required=False)

    args = parser.parse_args()

    if args.output:
        outfile = args.output
    else:
        outfile = args.pathway.lower() + ".sbml"

    p_ref = PadmetRef(args.reference)  #reads reference padmet
    p_spec = PadmetSpec(args.species)  #reads organism padmet

    rxn_list = [
        rlt.id_in for rlt in p_ref.dicOfRelationOut[args.pathway]
        if rlt.type == "is_in_pathway"
        and p_ref.dicOfNode[rlt.id_in].type == "reaction"
    ]

    for rxn_id in rxn_list:
        print(rxn_id)
        p_spec.copyNode(p_ref, rxn_id)
    padmet_to_sbml(p_spec, outfile, sbml_lvl=2, verbose=True)
Ejemplo n.º 23
0
def biggAPI_to_padmet(output, pwy_file=None, verbose=False):
    """
    Extract BIGG database using the api. Create a padmet file.
    Escape reactions of biomass.
    Require internet access !

    Allows to extract the bigg database from the API to create a padmet.

    1./ Get all reactions universal id from http://bigg.ucsd.edu/api/v2/universal/reactions, escape reactions of biomass.
    2./ Using async_list, extract all the informations for each reactions (compounds, stochio, name ...)
    3./ Need to use sleep time to avoid to lose the server access.
    4./ Because the direction fo the reaction is not set by default in bigg. 
    We get all the models where the reaction is and the final direction will the one found
    in more than 75%
    5./ Also extract xrefs

    Parameters
    ----------
    output: str
        path to output, the padmet file.
    pwy_file: str
        path to pathway file, add kegg pathways, line:'pwy_id, pwy_name, x, rxn_id'.
    verbose: bool
        if True print information
    """
    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file)
    policyInArray = [
        ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'],
        ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'],
        ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'],
        ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'],
        ['pathway',
         'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'],
        ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'],
        ['protein', 'has_suppData',
         'suppData'], ['protein', 'catalyses', 'reaction'],
        ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'],
        ['reaction', 'has_suppData', 'suppData'],
        ['reaction', 'has_reconstructionData', 'reconstructionData'],
        ['reaction', 'is_in_pathway', 'pathway'],
        [
            'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y']
    ]
    dbNotes = {
        "PADMET": {
            "Creation": today_date,
            "version": "2.6"
        },
        "DB_info": {
            "DB": "BIGG",
            "version": "1.5"
        }
    }
    padmetRef = PadmetRef()
    if verbose: print("setting policy")
    padmetRef.setPolicy(policyInArray)
    if verbose: print("setting dbInfo")
    padmetRef.setInfo(dbNotes)
    list_of_relation = []
    if verbose: print("Getting all reactions ids")
    url_bigg = 'http://bigg.ucsd.edu/api/v2/'
    raw_data = requests.get(url_bigg + "universal/reactions").json()['results']
    all_reactions_ids = [
        rxn_dict['bigg_id'] for rxn_dict in raw_data
        if not rxn_dict['bigg_id'].startswith("BIOMASS")
    ]
    if verbose: print("%s reactions to extract" % (len(all_reactions_ids)))
    """
    if verbose: print("Extracting informations... Wait")
    step = 100
    rxn_lower_index = -(step)
    rxn_upper_index = 0
    rxn_responses = []
    all_range = len(all_reactions_ids)/step

    for i in range(all_range):
        async_list = []
        rxn_lower_index += step
        rxn_upper_index += step

        for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]:
            action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id)
            async_list.append(action_item) 
        new_responses = [r.json() for r in grequests.map(async_list)]
        rxn_responses += new_responses
        print("%s/%s done" %(len(rxn_responses),len(all_reactions_ids)))

    if rxn_upper_index != len(all_reactions_ids):
        async_list = []
        last_index = len(all_reactions_ids) - rxn_upper_index
        rxn_lower_index += step
        rxn_upper_index += last_index
        for rxn_id in all_reactions_ids[rxn_lower_index:rxn_upper_index]:
            action_item = grequests.get(url_bigg + "universal/reactions/" +rxn_id)
            async_list.append(action_item) 
        new_responses = [r.json() for r in grequests.map(async_list)]
        rxn_responses += new_responses
    """
    if verbose: print("updating padmet")
    count = 0
    all_reactions_ids = [
        i for i in all_reactions_ids if 'biomass' not in i.upper()
    ]
    for rxn_id in [
            i for i in all_reactions_ids if not i.startswith("BIOMASS")
    ]:
        count += 1
        if verbose:
            print("reaction: %s, %s/%s" %
                  (rxn_id, count, len(all_reactions_ids)))
        if rxn_id not in list(padmetRef.dicOfNode.keys()):
            rxn_response = requests.get(url_bigg + "universal/reactions/" +
                                        rxn_id)
            rxn_dict = rxn_response.json()

            rxn_metabolites = rxn_dict["metabolites"]
            if len(rxn_metabolites) > 1:
                rxn_id = rxn_dict['bigg_id']
                rxn_name = rxn_dict["name"]

                all_models_id = [
                    i["bigg_id"]
                    for i in rxn_dict["models_containing_reaction"]
                ]
                async_list = []
                for model_id in all_models_id:
                    action_item = grequests.get(url_bigg + "models/" +
                                                model_id + "/reactions/" +
                                                rxn_id)
                    async_list.append(action_item)
                models_responses = [
                    r.json() for r in grequests.map(async_list)
                ]
                all_lower_bound = [
                    i["results"][0]["lower_bound"] for i in models_responses
                ]
                ratio_not_rev = float(all_lower_bound.count(0)) / float(
                    len(all_lower_bound))
                if verbose:
                    print("Reaction not reversible in %s/%s model(s)" %
                          (all_lower_bound.count(0), len(all_lower_bound)))
                if ratio_not_rev >= 0.75:
                    rxn_direction = "LEFT-TO-RIGHT"
                    if verbose: print("Reaction not reversible")
                else:
                    rxn_direction = "REVERSIBLE"
                    if verbose: print("Reaction reversible")
                padmetRef.createNode("reaction", rxn_id, {
                    "COMMON_NAME": [rxn_name],
                    "DIRECTION": [rxn_direction]
                })

                rxn_xrefs = rxn_dict["database_links"]

                xref_id = rxn_id + "_xrefs"
                xref_node = padmetRef.createNode("xref", xref_id)
                has_xref_rlt = Relation(rxn_id, "has_xref", xref_id)
                list_of_relation.append(has_xref_rlt)

                for db, k in list(rxn_xrefs.items()):
                    _id = k[0]["id"]
                    if db in list(xref_node.misc.keys()
                                  ) and _id not in xref_node.misc[db]:
                        xref_node.misc[db].append(_id)
                    else:
                        xref_node.misc[db] = [_id]

                for metabo_dict in rxn_metabolites:
                    metabo_id = metabo_dict["bigg_id"]
                    metabo_name = metabo_dict["name"]
                    metabo_compart = metabo_dict["compartment_bigg_id"]
                    metabo_stoich = metabo_dict["stoichiometry"]
                    try:
                        padmetRef.dicOfNode[metabo_id]
                    except KeyError:
                        padmetRef.createNode("compound", metabo_id,
                                             {"COMMON_NAME": [metabo_name]})
                    if metabo_stoich < 0:
                        consumes_rlt = Relation(
                            rxn_id, "consumes", metabo_id, {
                                "STOICHIOMETRY": [abs(metabo_stoich)],
                                "COMPARTMENT": [metabo_compart]
                            })
                        list_of_relation.append(consumes_rlt)
                    else:
                        produces_rlt = Relation(
                            rxn_id, "produces", metabo_id, {
                                "STOICHIOMETRY": [abs(metabo_stoich)],
                                "COMPARTMENT": [metabo_compart]
                            })
                        list_of_relation.append(produces_rlt)
        else:
            if verbose: print("%s already in padmet" % rxn_id)
            continue
    if verbose: print("Adding all relations")
    count = 0
    for rlt in list_of_relation:
        count += 1
        if verbose: print("relation %s/%s" % (count, len(list_of_relation)))
        try:
            padmetRef.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmetRef.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationOut[rlt.id_out] = [rlt]

    if pwy_file:
        add_kegg_pwy(pwy_file, padmetRef, verbose)
    if verbose: print("Generating file: %s" % output)
    padmetRef.generateFile(output)
Ejemplo n.º 24
0
def analysis_on_group(group_name, groups, config_data, pvclust, nb_cpu_to_use,
                      verbose):
    """Create reaction dendrogram and extract specific reactions using metabolic networks.

    Args:
        group_name (str): Name of the group from group_template.tsv.
        groups (list): All the species inside the group.
        config_data (dict): Dictionary with all configuration paths.
        pvclust (boolean): use also pvclust to create reaction dendrogram
        nb_cpu_to_use (int): number of CPU for multiprocessing
        verbose (bool): Verbose.
    """

    database_path = config_data['database_path']
    padmetRef = PadmetRef(database_path)
    padmet_from_networks_path = config_data['padmet_from_networks_path']
    analysis_path = config_data['analysis_path']

    all_padmet_path = [
        os.path.join(padmet_from_networks_path, name + ".padmet")
        for name in groups
    ]
    group_analysis_path = analysis_path + '/' + group_name

    if not os.path.isdir(group_analysis_path):
        if len(groups) == 1:
            sys.exit('A group must contain more than one member.')

        for padmet_path in all_padmet_path:
            if not os.path.exists(padmet_path):
                org_name = os.path.splitext(os.path.basename(padmet_path))[0]
                sys.exit(
                    "Padmet file of organism %s from group %s not found in %s"
                    % (org_name, group_name, padmet_from_networks_path))

        # Compare the padmet to create the reactions.tsv file needed to create the reaction dendrogram.
        compare_padmet.compare_padmet(padmet_path=",".join(all_padmet_path),
                                      output=group_analysis_path,
                                      padmetRef=padmetRef,
                                      verbose=verbose,
                                      number_cpu=nb_cpu_to_use)
        padmet_to_padmet.padmet_to_padmet(
            ",".join(all_padmet_path),
            group_analysis_path + '/' + group_name + '_panmetabolism.padmet')
        sbmlGenerator.padmet_to_sbml(padmet=group_analysis_path + '/' +
                                     group_name + '_panmetabolism.padmet',
                                     output=group_analysis_path + '/' +
                                     group_name + '_panmetabolism.sbml',
                                     verbose=verbose)

        dendrogram_reactions_distance.reaction_figure_creation(
            reaction_file=group_analysis_path + '/reactions.tsv',
            output_folder=group_analysis_path + '/dendrogram_output',
            padmetRef_file=database_path,
            pvclust=pvclust,
            verbose=verbose)

    else:
        print(
            group_analysis_path +
            ' already exists. Delete it if you want to relaunch the analysis.')
Ejemplo n.º 25
0
def sbml_to_padmetSpec(sbml,
                       padmetSpec_file,
                       padmetRef_file=None,
                       output=None,
                       mapping=None,
                       mapping_tag="_dict.csv",
                       source_tool=None,
                       source_category=None,
                       db="NA",
                       version="NA",
                       verbose=False):
    """
    Convert 1 - n sbml to padmet file.
    sbml var is file or dir
    padmetSpec_file: path to new padmet file to create or old padmet to update
    padmetRef_file: path to database of reference to use for data standardization
    output: path to new padmet file, if none, overwritte padmetSpec_file
    source_tool: tool used to create this sbml(s) ex Orthofinder
    source_category: Category of the tool ex: orthology
    if new padmet without padmetRef:
        db: database used ex: metacyc, bigg
        version: version of the database, 23, 18...
    


    if padmetRef, not padmetSpec:
        if padmetRef exist, instance PadmetRef
        else init PadmetRef
        update padmetRef
    if padmetSpec:
        if padmetRef, check if exist else raise Error
        if padmetSpec exist, instance PadmetSpec
        else init PadmetSpec
        update padmetSpec using padmetRef if padmetRef
    
    #TODO
    """
    if verbose:
        print(
            'sbml_to_padmet decodes reactions and metabolites using regular expression.'
        )
        print(
            'The reaction/metabolites IDs format used by sbml_to_padmet is: prefix + "_" + ID + "_" + optional_suffix. '
        )
        print(
            'prefix is a one character indicating the type, like R for reaction or M for metabolite.'
        )
        print(
            'optional_suffix is a one or two characters indicating the compartment.'
        )

    if output is None:
        output = padmetSpec_file
    #if sbml is a dir: sbml_files are all files with extension .sbml or .xml within dir
    #else: sbml = my_sbml.sbml or sbml= my_sbml1.sbml;my_sbml2.sml
    if os.path.isdir(sbml):
        sbml_files = [
            os.path.join(sbml, _f) for _f in next(os.walk(sbml))[2]
            if _f.endswith(".sbml") or _f.endswith(".xml")
        ]
    else:
        sbml_files = [sbml]

    #PadmetRef used for mapping and data standardization
    if padmetRef_file:
        padmetRef = PadmetRef(padmetRef_file)
    else:
        padmetRef = None

    if os.path.isfile(padmetSpec_file):
        padmet_to_update = PadmetSpec(padmetSpec_file)
    else:
        padmet_id = os.path.splitext(os.path.basename(output))[0]
        padmet_to_update = instantiate_padmet("PadmetSpec", padmetRef_file,
                                              padmet_id, db, version, verbose)

    #if sbml is a directory, recover all file path in a list. if no => only one file: create a list with only this file
    #sbml_mapping_dict = {'/path/to/my_sbml1.sbml': '/path/to/my_sbml1_dict.csv' // None}
    sbml_mapping_dict = {}
    if os.path.isdir(sbml):
        for sbml_file in sbml_files:
            mapping_file = os.path.splitext(sbml_file)[0] + mapping_tag
            if not os.path.isfile(mapping_file) or not padmetRef:
                mapping_file = None
            sbml_mapping_dict[sbml_file] = mapping_file
    else:
        sbml_mapping_dict[sbml] = mapping

    if len(list(sbml_mapping_dict.keys())) == 0:
        raise IOError("No sbml found based on %s" % sbml)

    for sbml_file, mapping_file in list(sbml_mapping_dict.items()):
        if mapping_file:
            force = False
        else:
            force = True

        if verbose:
            if mapping_file:
                print("Updating %s from %s using mapping dictionnary %s" %
                      (os.path.basename(padmetSpec_file),
                       os.path.basename(sbml_file),
                       os.path.basename(mapping_file)))
            else:
                print("Updating %s from %s" %
                      (os.path.basename(padmetSpec_file),
                       os.path.basename(sbml_file)))

        padmet_to_update.updateFromSbml(sbml_file=sbml_file,
                                        padmetRef=padmetRef,
                                        mapping_file=mapping_file,
                                        verbose=verbose,
                                        force=force,
                                        source_category=source_category,
                                        source_tool=source_tool)

    padmet_to_update.generateFile(output)
Ejemplo n.º 26
0
def modelSeed_to_padmet(rxn_file, pwy_file, output, verbose=False):
    """
    #TODO
    """
    global list_of_relation
    now = datetime.now()
    today_date = now.strftime("%Y-%m-%d")
    #print(verbose,today_date,version, output, classes_file, compounds_file, proteins_file, reactions_file, enzrxns_file, pathways_file)
    policyInArray = [
        ['compound', 'has_name', 'name'], ['compound', 'has_xref', 'xref'],
        ['compound', 'has_suppData', 'suppData'], ['gene', 'has_name', 'name'],
        ['gene', 'has_xref', 'xref'], ['gene', 'has_suppData', 'suppData'],
        ['gene', 'codes_for', 'protein'], ['pathway', 'has_name', 'name'],
        ['pathway',
         'has_xref', 'xref'], ['pathway', 'is_in_pathway', 'pathway'],
        ['protein', 'has_name', 'name'], ['protein', 'has_xref', 'xref'],
        ['protein', 'has_suppData',
         'suppData'], ['protein', 'catalyses', 'reaction'],
        ['reaction', 'has_name', 'name'], ['reaction', 'has_xref', 'xref'],
        ['reaction', 'has_suppData', 'suppData'],
        ['reaction', 'has_reconstructionData', 'reconstructionData'],
        ['reaction', 'is_in_pathway', 'pathway'],
        [
            'reaction', 'consumes', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'class', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'compound', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'consumes', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ],
        [
            'reaction', 'produces', 'protein', 'STOICHIOMETRY', 'X',
            'COMPARTMENT', 'Y'
        ], ['reaction', 'is_linked_to', 'gene', 'SOURCE:ASSIGNMENT', 'X:Y']
    ]
    dbNotes = {
        "PADMET": {
            "Creation": today_date,
            "version": "2.6"
        },
        "DB_info": {
            "DB": "MODELSEED",
            "version": "1.0"
        }
    }
    padmetRef = PadmetRef()
    if verbose: print("setting policy")
    padmetRef.setPolicy(policyInArray)
    if verbose: print("setting dbInfo")
    padmetRef.setInfo(dbNotes)
    list_of_relation = []

    rxn_data = json.load(open(rxn_file))
    #remove biomass rxn:
    rxn_data.pop("rxn12985")
    if verbose: print("updating padmet")
    count = 0
    for rxn_id, rxn_dict in list(rxn_data.items()):
        count += 1
        if verbose:
            print("reaction: %s, %s/%s" % (rxn_id, count, len(rxn_data)))
        try:
            if not rxn_dict["compound_ids"]:
                raise KeyError
        except KeyError:
            print(rxn_id)
            continue
        if rxn_id not in list(padmetRef.dicOfNode.keys()):
            if rxn_dict["reversibility"] == ">":
                rxn_direction = "LEFT-TO-RIGHT"
            else:
                rxn_direction = "REVERSIBLE"
            rxn_name = rxn_dict["name"]
            padmetRef.createNode("reaction", rxn_id, {
                "COMMON_NAME": [rxn_name],
                "DIRECTION": [rxn_direction]
            })

            rxn_metabolites = rxn_dict["stoichiometry"].split(";")

            for metabo_data in rxn_metabolites:
                metabo_data = metabo_data.replace("???", "\"")
                try:
                    metabo_temp, metabo_name = metabo_data.split("\"")[:2]
                    metabo_stoich, metabo_id, metabo_compart = metabo_temp.split(
                        ":")[:3]
                except ValueError:
                    metabo_stoich, metabo_id, metabo_compart, metabo_name = metabo_data.split(
                        ":")[:4]

                metabo_stoich = float(metabo_stoich)
                #from modelSeed github
                if metabo_compart == "0":
                    metabo_compart = "c"
                elif metabo_compart == "1":
                    metabo_compart = "e"
                elif metabo_compart == "2":
                    metabo_compart = "p"
                try:
                    padmetRef.dicOfNode[metabo_id]
                except KeyError:
                    padmetRef.createNode("compound", metabo_id,
                                         {"COMMON_NAME": [metabo_name]})
                if metabo_stoich < 0:
                    consumes_rlt = Relation(
                        rxn_id, "consumes", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(consumes_rlt)
                else:
                    produces_rlt = Relation(
                        rxn_id, "produces", metabo_id, {
                            "STOICHIOMETRY": [abs(metabo_stoich)],
                            "COMPARTMENT": [metabo_compart]
                        })
                    list_of_relation.append(produces_rlt)
        else:
            if verbose: print("%s already in padmet" % rxn_id)
            continue
    with open(pwy_file) as csvfile:
        reader = csv.DictReader(csvfile, delimiter='\t')
        pwy_raw_data = [row for row in reader]
    for pwy_raw in pwy_raw_data:
        pwy_id = pwy_raw["Source ID"]
        pwy_names = [pwy_raw["Name"], pwy_raw["Aliases"]]
        rxn_ids = pwy_raw["Reactions"].split("|")
        try:
            padmetRef.dicOfNode[pwy_id]
        except KeyError:
            padmetRef.createNode("pathway", pwy_id, {"COMMON_NAME": pwy_names})
        for rxn_id in rxn_ids:
            pwy_rlt = Relation(rxn_id, "is_in_pathway", pwy_id)
            list_of_relation.append(pwy_rlt)

    if verbose: print("Adding all relations")
    count = 0
    for rlt in list_of_relation:
        count += 1
        if verbose: print("relation %s/%s" % (count, len(list_of_relation)))
        try:
            padmetRef.dicOfRelationIn[rlt.id_in].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationIn[rlt.id_in] = [rlt]
        try:
            padmetRef.dicOfRelationOut[rlt.id_out].append(rlt)
        except KeyError:
            padmetRef.dicOfRelationOut[rlt.id_out] = [rlt]
    """
    if pwy_file:
        add_kegg_pwy(pwy_file, padmetRef, verbose)
    """
    if verbose: print("Generating file: %s" % output)
    padmetRef.generateFile(output)