Example #1
0
def pgdb_to_sbml(pgdb_dir, output_dir, noorphan_bool, padmet_bool, sbml_level,
                 cpu):
    """Turn Pathway Tools PGDBs into SBML2 files using Padmet
    
    Args:
        pgdb_dir (str): PGDB directory
        output_dir (str): results directory
        noorphan_bool (bool): ignores orphan reactions if True
        sbml_level (int): SBML level
        cpu (int): number of CPU for multi-process
    
    Returns:
        sbml_dir (str): SBML directory if successful
    """

    logger.info('######### Creating SBML files #########')
    sbml_dir = os.path.join(output_dir, 'sbml')

    if padmet_bool:
        padmet_dir = os.path.join(output_dir, 'padmet')
        if not utils.is_valid_dir(padmet_dir):
            logger.critical('Impossible to access/create output directory')
            sys.exit(1)
    if not utils.is_valid_dir(sbml_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)

    pgdb_to_sbml_pool = Pool(processes=cpu)

    multiprocess_data = []
    for species in os.listdir(pgdb_dir):
        pgdb_species_path = os.path.join(pgdb_dir, species)
        sbml_species_path = os.path.join(sbml_dir, species + '.sbml')
        padmet_species_path = os.path.join(padmet_dir, species + '.padmet')
        if padmet_bool:
            multiprocess_data.append([
                pgdb_species_path, sbml_species_path, sbml_level,
                noorphan_bool, padmet_species_path
            ])
        else:
            multiprocess_data.append([
                pgdb_species_path, sbml_species_path, sbml_level,
                noorphan_bool, padmet_bool
            ])

    sbml_checks = pgdb_to_sbml_pool.map(run_pgdb_to_sbml, multiprocess_data)

    pgdb_to_sbml_pool.close()
    pgdb_to_sbml_pool.join()

    if all(sbml_checks):
        return sbml_dir
    else:
        logger.critical('Error during padmet/sbml creation.')
        sys.exit(1)
Example #2
0
def instance_community(sbml_dir, seeds, output_dir, targets_file=None, host_mn=None):
    """Create ASP instance for community analysis.
    
    Args:
        sbml_dir (str): directory of symbionts SBML files
        seeds (str): seeds SBML file
        output_dir (str): directory for results
        targets_file (str): targets file
        host_mn (str): metabolic network file for host

    Returns:
        str: instance filepath
    """
    logger.info(
            "######### Creating metabolic instance for the whole community #########"
        )
    miscoto_dir = os.path.join(output_dir, 'community_analysis')
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)

    # create tempfile
    fd, outputfile = tempfile.mkstemp(suffix='.lp', prefix='miscoto_', dir=miscoto_dir)

    instance_filepath = run_instance(
        bacteria_dir=sbml_dir,
        seeds_file=seeds,
        host_file=host_mn,
        targets_file=targets_file,
        output=outputfile)

    logger.info("Created instance in " + instance_filepath)
    return instance_filepath
Example #3
0
def comm_scope_run(instance, output_dir, host_mn=None):
    """Run Miscoto_scope and analyse community metabolic capabilities
    
    Args:
        instance (str): instance filepath
        output_dir (str): directory for results
        host_mn (str): metabolic network file for host
    
    Returns:
        set: microbiota scope
    """
    miscoto_dir = os.path.join(output_dir, 'community_analysis')
    com_scopes_path = os.path.join(miscoto_dir, 'comm_scopes.json')

    if not utils.is_valid_dir(miscoto_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)
    microbiota_scope = run_scopes(lp_instance_file=instance)

    # Remove keys "host_prodtargets", "host_scope", "comhost_scope" and "host_unprodtargets" if there is no host:
    if host_mn is None:
        del microbiota_scope['host_prodtargets']
        del microbiota_scope['host_unprodtargets']
        del microbiota_scope['host_scope']
        del microbiota_scope['comhost_scope']

    with open(com_scopes_path, 'w') as dumpfile:
        json.dump(microbiota_scope, dumpfile, indent=4)
    logger.info('Community scopes for all metabolic networks available in ' +
                com_scopes_path)

    return set(microbiota_scope['com_scope'])
Example #4
0
def addedvalue(iscope_rm, cscope_rm, out_dir):
    """Compute the added value of considering interaction with microbiota metabolism rather than individual metabolisms.
    
    Args:
        iscope_rm (set): union of metabolites in all individual scopes
        cscope_rm (set): metabolites reachable by community/microbiota
    
    Returns:
        set: set of metabolites that can only be reached by a community
    """
    # Community targets = what can be produced only if cooperation occurs between species
    newtargets = cscope_rm - iscope_rm
    logger.info("\nAdded value of cooperation over individual metabolism: " +
                str(len(newtargets)) + " newly reachable metabolites: \n")
    logger.info('\n'.join(newtargets))
    logger.info("\n")
    miscoto_dir = out_dir + "/community_analysis"
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)
    dict_av = {"addedvalue": list(newtargets)}
    with open(miscoto_dir + "/addedvalue.json", 'w') as dumpfile:
        json.dump(dict_av, dumpfile, indent=4, default=lambda x: x.__dict__)
    logger.info(
        f"Added-value of cooperation written in {miscoto_dir}/addedvalue.json")
    return newtargets
Example #5
0
def check_sbml(inpt, outdir, folder = True):
    """Check whether one or several SBML level 3 files are in directory. If yes, convert them into a new directory and copy the SBML files that are correct into this same directory.
    
    Args:
        inpt (str): SBML files directory
        outdir (str): Results directory
        folder (bool): Defaults to True. Change function behavior is input is a file or a folder
    
    Returns:
        str: filepath of file or directory, same as input if all SBMLs are level2
    """
    if folder:
        all_files = [
            f for f in os.listdir(inpt)
            if os.path.isfile(os.path.join(inpt, f)) and utils.get_extension(
                os.path.join(inpt, f)).lower() in ["xml", "sbml"]
        ]
        sbml_levels = {}
        make_new_sbmls = False
        for f in all_files:
            sbml_levels[f] = sbml_management.get_sbml_level(
                os.path.join(inpt, f))
            if sbml_levels[f] != 2:
                make_new_sbmls = True
        if make_new_sbmls:
            sbml_dir = outdir + "/new_sbml/"
            logger.warning(
                "At least one SBML has not a suitable level for the tools. They will be transformed and created in "
                + sbml_dir + ". The others will be copied in this directory")
            if not utils.is_valid_dir(sbml_dir):
                logger.critical("Impossible to write in output directory")
                sys.exit(1)
            for f in all_files:
                if sbml_levels[f] != 2:
                    #create level 2 SBML in sbml_dir
                    sbml_management.transform_sbml_lvl(
                        os.path.join(inpt, f), os.path.join(sbml_dir, f), 2)
                else:
                    #copy the original SBML in sbml_dir
                    copyfile(os.path.join(inpt, f), os.path.join(sbml_dir, f))
        else:
            sbml_dir = inpt
        return sbml_dir
    else:
        if not utils.is_valid_file(inpt):
            logger.critical(inpt + " is not a correct filepath")
            sys.exit(1)
        else:
            sbml_level = sbml_management.get_sbml_level(inpt)
            if sbml_level != 2:
                newsbml = outdir + '/' + utils.get_basename(inpt) + "_lvl2.sbml"
                logger.warning(inpt + " was not in a suitable level for analysis. A converted file is created in " + newsbml)
                sbml_management.transform_sbml_lvl(inpt, newsbml, 2)
            else:
                newsbml = inpt
            return newsbml
Example #6
0
def indiv_scope_run(sbml_dir, seeds, output_dir):
    """Run Menetools and analyse individual metabolic capabilities.
    
    Args:
        sbml_dir (str): directory of SBML files
        seeds (str): SBML seeds file
        output_dir (str): directory for results
    
    Returns:
        str: output file for Menetools analysis
    """
    logger.info('######### Running individual metabolic scopes #########')

    menetools_dir = os.path.join(output_dir, 'indiv_scopes')
    indiv_scopes_path = os.path.join(menetools_dir, 'indiv_scopes.json')

    if not utils.is_valid_dir(menetools_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)

    all_files = [
        f for f in os.listdir(sbml_dir)
        if os.path.isfile(os.path.join(sbml_dir, f)) and utils.get_extension(
            os.path.join(sbml_dir, f)).lower() in ['xml', 'sbml']
    ]
    all_scopes = {}
    for f in all_files:
        bname = utils.get_basename(f)
        try:
            all_scopes[bname] = run_menescope(draft_sbml=os.path.join(
                sbml_dir, f),
                                              seeds_sbml=seeds)
        except:
            traceback_str = traceback.format_exc()
            # Don't print the traceback if the error is linked to SystemExit as the error has been hanled by menetools.
            if 'SystemExit: 1' not in traceback_str:
                logger.critical(traceback_str)
            logger.critical(
                '---------------Something went wrong running Menetools on " + f + "---------------'
            )
            sys.exit(1)

    with open(indiv_scopes_path, 'w') as dumpfile:
        json.dump(all_scopes, dumpfile, indent=4)

    return indiv_scopes_path
Example #7
0
def enumeration_analysis(sbml_folder,
                         target_folder_file,
                         seed_file,
                         output_dir,
                         host_file=None):
    """Run miscoto enumeration on input data

    Args:
        sbml_folder (str): sbml directory
        target_folder_file (str): targets file or folder containing multiple sbmls
        seed_file (str): seeds file
        output_dir (str): results directory
        host_file (str): metabolic network file for host

    Returns:
        dict: {target_filename_without_extension: json_output_path}
    """
    starttime = time.time()

    target_paths = utils.file_or_folder(target_folder_file)

    output_jsons = os.path.join(output_dir, 'json')
    if not utils.is_valid_dir(output_jsons):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)

    miscoto_jsons = {}
    for target_path in target_paths:
        logger.info('######### Enumeration of solution for: ' + target_path +
                    ' #########')
        target_pathname = target_paths[target_path]
        output_json = os.path.join(output_jsons, target_path + '.json')
        if os.path.exists(output_json):
            logger.info('######### Enumeration has already been done for ' +
                        target_path +
                        ', it will not be launched again. #########')
        else:
            miscoto_json = enumeration(sbml_folder, target_pathname, seed_file,
                                       output_json, host_file)
            miscoto_jsons[target_path] = miscoto_json

    logger.info("--- Enumeration runtime %.2f seconds ---\n" %
                (time.time() - starttime))

    return output_jsons
Example #8
0
def comm_scope_run(instance, output_dir):
    """Run Miscoto_scope and analyse individual metabolic capabilities
    
    Args:
        sbml_dir (str): directory of SBML files
        seeds (str): seeds SBML file
        output_dir (str): directory for results
    
    Returns:
        set: microbiota scope
    """
    miscoto_dir = output_dir + "/community_analysis"
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)
    microbiota_scope = run_scopes(instance)
    with open(miscoto_dir + "/comm_scopes.json", 'w') as dumpfile:
        json.dump(microbiota_scope, dumpfile, indent=4)
    logger.info("Community scopes for all metabolic networks available in " +
                miscoto_dir + "/comm_scopes.json")
    return set(microbiota_scope['com_scope'])
Example #9
0
def compute_mincom(instancefile, output_dir):
    """Run minimal community selection and analysis.
    
    Args:
        instancefile (str): filepath to instance file
        output_dir (str): directory with results
    
    Returns:
        dict: results of miscoto_mincom analysis
    """
    miscoto_dir = output_dir + "/community_analysis"
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)

    results_dic = run_mincom(option="soup",
                             lp_instance_file=instancefile,
                             optsol=True,
                             union=True,
                             intersection=True)
    return results_dic
Example #10
0
def create_gml(json_paths, target_paths, output_dir, taxon_file=None):
    """Create solution graph from miscoto output and compute stats

    Args:
        json_paths (str): {target: path_to_corresponding_json}
        target_paths (str): {target: path_to_corresponding_sbml}
        output_dir (str): results directory
        taxon_file (str): mpwt taxon file for species in sbml folder
    """
    miscoto_stat_output = output_dir + "/" + "miscoto_stats.txt"
    key_species_stats_output = output_dir + "/" + "keystone_species_stats.tsv"
    key_species_supdata_output = output_dir + "/" + "keystone_species_supdata.tsv"

    gml_output = output_dir + "/" + "gml" + "/"

    if not utils.is_valid_dir(gml_output):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)

    len_min_sol = {}
    len_union = {}
    len_intersection = {}
    len_solution = {}
    len_target = {}

    target_categories = {}
    for target in target_paths:
        target_categories[target] = sbml_management.get_compounds(
            target_paths[target])

    if taxon_file:
        phylum_named_species, all_phylums = get_phylum(taxon_file)
    else:
        phylum_named_species = None
        all_phylums = None

    with open(key_species_stats_output, "w") as key_stats_file, open(
            key_species_supdata_output,
            "w") as key_sup_file, open(miscoto_stat_output,
                                       "w") as stats_output:
        keystone_stats_writer = csv.writer(key_stats_file, delimiter="\t")
        if all_phylums:
            keystone_stats_writer.writerow([
                "target_categories", "keystones_group", *sorted(all_phylums),
                "Sum"
            ])
        else:
            keystone_stats_writer.writerow(
                ["target_categories", "keystones_group", "data", "Sum"])
        keystone_sup_writer = csv.writer(key_sup_file, delimiter="\t")
        for target_category in target_categories:
            with open(json_paths[target_category]) as json_data:
                dicti = json.load(json_data)
            create_stat_species(target_category, dicti, keystone_stats_writer,
                                keystone_sup_writer, phylum_named_species,
                                all_phylums)
            G = nx.Graph()
            added_node = []
            species_weight = {}
            if dicti["still_unprod"] != []:
                print("ERROR ", dicti["still_unprod"], " is unproducible")
            len_target[target_category] = len(dicti["newly_prod"]) + len(
                dicti["still_unprod"])
            len_min_sol[target_category] = len(dicti["bacteria"])
            len_union[target_category] = len(dicti["union_bacteria"])
            len_intersection[target_category] = len(dicti["inter_bacteria"])
            len_solution[target_category] = len(dicti["enum_bacteria"])
            for sol in dicti["enum_bacteria"]:
                for species_1, species_2 in combinations(
                        dicti["enum_bacteria"][sol], 2):
                    if species_1 not in added_node:
                        if taxon_file:
                            G.add_node(phylum_named_species[species_1])
                        else:
                            G.add_node(species_1)
                        added_node.append(species_1)
                    if species_2 not in added_node:
                        if taxon_file:
                            G.add_node(phylum_named_species[species_2])
                        else:
                            G.add_node(species_2)
                        added_node.append(species_2)
                    combination_species = "_".join(
                        sorted([species_1, species_2]))
                    if combination_species not in species_weight:
                        species_weight[combination_species] = 1
                    else:
                        species_weight[combination_species] += 1
                    if taxon_file:
                        G.add_edge(phylum_named_species[species_1],
                                   phylum_named_species[species_2],
                                   weight=species_weight[combination_species])
                    else:
                        G.add_edge(species_1,
                                   species_2,
                                   weight=species_weight[combination_species])

            statswriter = csv.writer(stats_output, delimiter="\t")
            statswriter.writerow([
                "categories", "nb_target", "size_min_sol", "size_union",
                "size_intersection", "size_enum"
            ])
            statswriter.writerow([
                target_category,
                str(len_target[target_category]),
                str(len_min_sol[target_category]),
                str(len_union[target_category]),
                str(len_intersection[target_category]),
                str(len_solution[target_category])
            ])
            logger.info('######### Graph of ' + target_category + ' #########')
            logger.info('Number of nodes: ' + str(G.number_of_nodes()))
            logger.info('Number of edges: ' + str(G.number_of_edges()))
            nx.write_gml(G, gml_output + "/" + target_category + ".gml")
Example #11
0
def main():
    """Run programm
    """
    start_time = time.time()
    parser = argparse.ArgumentParser(
        "m2m_analysis",
        description=MESSAGE + " For specific help on each subcommand use: m2m_analysis {cmd} --help",
        epilog=REQUIRES, formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument(
        "-v",
        "--version",
        action="version",
        version="%(prog)s " + VERSION + "\n" + LICENSE)

    # parent parser
    parent_parser_q = argparse.ArgumentParser(add_help=False)
    parent_parser_q.add_argument(
        "-q",
        "--quiet",
        dest="quiet",
        help="quiet mode",
        required=False,
        action="store_true",
        default=None,
    )
    parent_parser_c = argparse.ArgumentParser(add_help=False)
    parent_parser_c.add_argument(
        "-c",
        "--cpu",
        help="cpu number for multi-process",
        required=False,
        type=int,
        default=1)
    parent_parser_o = argparse.ArgumentParser(add_help=False)
    parent_parser_o.add_argument(
        "-o",
        "--out",
        dest="out",
        required=True,
        help="output directory path",
        metavar="OUPUT_DIR")
    parent_parser_s = argparse.ArgumentParser(add_help=False)
    parent_parser_s.add_argument(
        "-s",
        "--seeds",
        help="seeds (growth medium) for metabolic analysis",
        required=True)
    parent_parser_n = argparse.ArgumentParser(add_help=False)
    parent_parser_n.add_argument(
        "-n",
        "--networksdir",
        metavar="NETWORKS_DIR",
        help="Metabolic networks directory",
        required=True)
    parent_parser_t = argparse.ArgumentParser(add_help=False)
    parent_parser_t.add_argument(
		"-t",
		"--targets",
        metavar="TARGETS_DIR_OR_FILE",
		help="Folder containg sbml targets or single sbml file for metabolic analysis",
		required=True)
    parent_parser_m = argparse.ArgumentParser(add_help=False)
    parent_parser_m.add_argument(
        "-m",
        "--modelhost",
        help="Host metabolic model for community analysis",
        required=False,
        default=None)
    parent_parser_taxon = argparse.ArgumentParser(add_help=False)
    parent_parser_taxon.add_argument(
        "--taxon",
        help="Mpwt taxon file",
        required=False,
        default=None)
    parent_parser_j = argparse.ArgumentParser(add_help=False)
    parent_parser_j.add_argument(
        "-j",
        "--json",
        metavar="JSON_DIR_OR_FILE",
        help="Folder containing JSON file of single JSON file containing miscoto enumeration results",
        required=False,
        type = str)
    parent_parser_g = argparse.ArgumentParser(add_help=False)
    parent_parser_g.add_argument(
        "-g",
        "--graph",
        metavar="GML_DIR_OR_FILE",
        help="Folder containing Graph GML file or single GML file",
        required=True)
    parent_parser_jar = argparse.ArgumentParser(add_help=False)
    parent_parser_jar.add_argument(
		"--oog",
		help="OOG jar file for powergraph",
		required=True,
		type=str)

    # subparsers
    subparsers = parser.add_subparsers(
        title='subcommands',
        description='valid subcommands:',
        dest="cmd")
    enum_parser = subparsers.add_parser(
        "enum",
        help="enumeration using miscoto",
        parents=[
            parent_parser_s, parent_parser_n, parent_parser_t, parent_parser_m, parent_parser_o, parent_parser_q
        ],
        description=
        "Run miscoto enumeration on sbml species with seeds and targets"
    )
    stat_parser = subparsers.add_parser(
        "stats",
        help="statistics on keystone species",
        parents=[
            parent_parser_j, parent_parser_o, parent_parser_taxon, parent_parser_q
        ],
        description=
        "Compute statistics on keystone species in the community"
    )
    graph_parser = subparsers.add_parser(
        "graph",
        help="graph creation with enumeration solution",
        parents=[
            parent_parser_j, parent_parser_o, parent_parser_t, parent_parser_taxon, parent_parser_q
        ],
        description="Create the solution graph using the JSON from miscoto enumeration")
    powergraph_parser = subparsers.add_parser(
        "powergraph",
        help="powergraph creation and visualization",
        parents=[
            parent_parser_g, parent_parser_o, parent_parser_jar, parent_parser_q
        ],
        description=
        "Compress the GMl graph of solution and create a powergraph (bbl) and a svg of the graph"
    )
    wkf_parser = subparsers.add_parser(
        "workflow",
        help="whole workflow",
        parents=[
            parent_parser_s, parent_parser_n, parent_parser_t, parent_parser_m, parent_parser_o, parent_parser_jar,
            parent_parser_taxon, parent_parser_q
        ],
        description=
        "Run the whole workflow: miscoto enumeration, statistics on keystone species, graph on solution and powergraph creation"
    )

    args = parser.parse_args()

    # If no argument print the help.
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # set up the logger
    if args.quiet:
        root_logger.setLevel(logging.CRITICAL)
    else:
        root_logger.setLevel(logging.INFO)

    # test writing in out_directory if a subcommand is given else print version and help
    if args.cmd:
        if not utils.is_valid_dir(args.out):
            logger.critical("Impossible to access/create output directory")
            sys.exit(1)
    else:
        logger.info("m2m_analysis " + VERSION + "\n" + LICENSE)
        parser.print_help()
        sys.exit()

    #if modelhost is given as an arg: check the SBML level and turn it into 2 if needed
    if args.cmd in ["workflow", "enum"]:
        if not os.path.isdir(args.networksdir):
            logger.critical(args.networksdir + " is not a correct directory path")
            sys.exit(1)
        network_dir = check_sbml(args.networksdir, args.out)

        if not utils.is_valid_file(args.seeds):
            logger.critical(args.seeds + " is not a correct filepath")
            sys.exit(1)
        if not utils.is_valid_file(args.targets) and not utils.is_valid_dir(args.targets):
            logger.critical(args.targets + " is not a correct filepath")
            sys.exit(1)
        if args.modelhost:
            new_arg_modelhost = check_sbml(args.modelhost, args.out, folder=False)
        else:
            new_arg_modelhost = None


    # deal with given subcommand
    if args.cmd == "workflow":
        main_analysis_workflow(network_dir, args.targets, args.seeds, args.out, args.taxon,
                                args.oog, new_arg_modelhost)
    elif args.cmd == "enum":
        main_enumeration(network_dir, args.targets, args.seeds, args.out, new_arg_modelhost)
    elif args.cmd == "stats":
        main_stat(args.json, args.out, args.taxon)
    elif args.cmd == "graph":
        main_graph(args.json, args.targets, args.out, args.taxon)
    elif args.cmd == "powergraph":
        main_powergraph(args.graph, args.out, args.oog)

    logger.info("--- Total runtime %.2f seconds ---" % (time.time() - start_time))
Example #12
0
def mincom(instance_w_targets, out_dir):
    """Compute minimal community selection and show analyses.
    
    Args:
        instance_w_targets (str): ASP instance filepath
        out_dir (str): results directory
    """
    starttime = time.time()
    miscoto_dir = os.path.join(out_dir, 'community_analysis')
    miscoto_mincom_path = os.path.join(miscoto_dir, 'mincom.json')
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)
    # Compute community selection
    logger.info('Running minimal community selection')
    all_results = compute_mincom(instance_w_targets, miscoto_dir)

    for key in all_results:
        all_results[key] = list(all_results[key])

    producible_targets = all_results['producible']
    unproducible_targets = all_results['still_unprod']
    logger.info('\nIn the initial and minimal communities ' +
                str(len(producible_targets)) + ' targets are producible and ' +
                str(len(unproducible_targets)) + ' remain unproducible.')
    logger.info('\n' + str(len(producible_targets)) + ' producible targets:')
    logger.info('\n'.join(producible_targets))
    logger.info('\n' + str(len(unproducible_targets)) +
                ' still unproducible targets:')
    logger.info('\n'.join(unproducible_targets))

    logger.info(
        f'\nMinimal communities are available in {miscoto_mincom_path} \n')
    # Give one solution
    one_sol_bact = []
    for bact in all_results['bacteria']:
        one_sol_bact.append(bact)
    logger.info('######### One minimal community #########')
    logger.info(
        '# One minimal community enabling the producibility of the target metabolites given as inputs'
    )
    logger.info('Minimal number of bacteria in communities => ' +
                str(len(one_sol_bact)) + '\n')
    logger.info("\n".join(one_sol_bact))
    # Give union of solutions
    union = all_results['union_bacteria']
    logger.info(
        '######### Key species: Union of minimal communities #########')
    logger.info(
        '# Bacteria occurring in at least one minimal community enabling the producibility of the target metabolites given as inputs'
    )
    logger.info('Number of key species => ' + str(len(union)) + "\n")
    logger.info("\n".join(union))
    # Give intersection of solutions
    intersection = all_results['inter_bacteria']
    logger.info(
        '######### Essential symbionts: Intersection of minimal communities #########'
    )
    logger.info(
        '# Bacteria occurring in ALL minimal communities enabling the producibility of the target metabolites given as inputs'
    )
    logger.info('Number of essential symbionts => ' + str(len(intersection)) +
                "\n")
    logger.info("\n".join(intersection))
    # Give key species, essential and alternative symbionts
    alternative_symbionts = list(set(union) - set(intersection))
    logger.info(
        '######### Alternative symbionts: Difference between Union and Intersection #########'
    )
    logger.info(
        '# Bacteria occurring in at least one minimal community but not all minimal communities enabling the producibility of the target metabolites given as inputs'
    )
    logger.info('Number of alternative symbionts => ' +
                str(len(alternative_symbionts)) + '\n')
    logger.info('\n'.join(alternative_symbionts))
    logger.info('\n--- Mincom runtime %.2f seconds ---\n' %
                (time.time() - starttime))
Example #13
0
def main():
    """Run programm
    """
    start_time = time.time()
    parser = argparse.ArgumentParser(
        "m2m_analysis",
        description=MESSAGE +
        " For specific help on each subcommand use: m2m_analysis {cmd} --help",
        epilog=REQUIRES,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="%(prog)s " + VERSION + "\n" + LICENSE)

    # parent parser
    parent_parser_q = argparse.ArgumentParser(add_help=False)
    parent_parser_q.add_argument(
        "-q",
        "--quiet",
        dest="quiet",
        help="quiet mode",
        required=False,
        action="store_true",
        default=None,
    )
    parent_parser_c = argparse.ArgumentParser(add_help=False)
    parent_parser_c.add_argument("-c",
                                 "--cpu",
                                 help="cpu number for multi-process",
                                 required=False,
                                 type=int,
                                 default=1)
    parent_parser_o = argparse.ArgumentParser(add_help=False)
    parent_parser_o.add_argument("-o",
                                 "--out",
                                 dest="out",
                                 required=True,
                                 help="output directory path",
                                 metavar="OUPUT_DIR")
    parent_parser_s = argparse.ArgumentParser(add_help=False)
    parent_parser_s.add_argument(
        "-s",
        "--seeds",
        help="seeds (growth medium) for metabolic analysis",
        required=True)
    parent_parser_n = argparse.ArgumentParser(add_help=False)
    parent_parser_n.add_argument("-n",
                                 "--networksdir",
                                 metavar="NETWORKS_DIR",
                                 help="Metabolic networks directory",
                                 required=True)
    parent_parser_t = argparse.ArgumentParser(add_help=False)
    parent_parser_t.add_argument(
        "-t",
        "--targets",
        metavar="TARGETS_DIR_OR_FILE",
        help=
        "Folder containg sbml targets or single sbml file for metabolic analysis",
        required=True)
    parent_parser_m = argparse.ArgumentParser(add_help=False)
    parent_parser_m.add_argument(
        "-m",
        "--modelhost",
        help="Host metabolic model for community analysis",
        required=False,
        default=None)
    parent_parser_taxon = argparse.ArgumentParser(add_help=False)
    parent_parser_taxon.add_argument("--taxon",
                                     help="Mpwt taxon file",
                                     required=False,
                                     default=None)
    parent_parser_j = argparse.ArgumentParser(add_help=False)
    parent_parser_j.add_argument(
        "-j",
        "--json",
        metavar="JSON_DIR_OR_FILE",
        help=
        "Folder containing JSON files of single JSON file containing miscoto enumeration results",
        required=True,
        type=str)
    parent_parser_g = argparse.ArgumentParser(add_help=False)
    parent_parser_g.add_argument(
        "-g",
        "--gml",
        metavar="GML_DIR_OR_FILE",
        help=
        "Folder containing GML files of single GML file containing m2m_analysis graph results",
        required=True)
    parent_parser_jar = argparse.ArgumentParser(add_help=False)
    parent_parser_jar.add_argument(
        "--oog",
        help=
        "OOG jar file for powergraph svg creation using Power Graph Command Line Tool",
        required=False,
        type=str)
    parent_parser_level = argparse.ArgumentParser(add_help=False)
    parent_parser_level.add_argument(
        "--level",
        help=
        "Taxonomy level, must be: phylum, class, order, family, genus or species. By default, it is phylum.",
        required=False,
        type=str)

    # subparsers
    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands:',
                                       dest="cmd")
    enum_parser = subparsers.add_parser(
        "enum",
        help="enumeration using miscoto",
        parents=[
            parent_parser_s, parent_parser_n, parent_parser_t, parent_parser_m,
            parent_parser_o, parent_parser_q
        ],
        description=
        "Run miscoto enumeration on sbml species with seeds and targets")
    graph_parser = subparsers.add_parser(
        "graph",
        help="graph creation with enumeration solution",
        parents=[
            parent_parser_j, parent_parser_o, parent_parser_t,
            parent_parser_taxon, parent_parser_q, parent_parser_level
        ],
        description=
        "Create the solution graph using the JSON from miscoto enumeration")
    powergraph_parser = subparsers.add_parser(
        "powergraph",
        help="powergraph creation and visualization",
        parents=[
            parent_parser_g, parent_parser_jar, parent_parser_q,
            parent_parser_taxon, parent_parser_level, parent_parser_o
        ],
        description=
        "Compress the GMl graph of solution and create a powergraph (bbl), a website format of the powergraph and a svg of the graph (if you use the --oog option)"
    )
    wkf_parser = subparsers.add_parser(
        "workflow",
        help="whole workflow",
        parents=[
            parent_parser_s, parent_parser_n, parent_parser_t, parent_parser_m,
            parent_parser_o, parent_parser_jar, parent_parser_taxon,
            parent_parser_q, parent_parser_level
        ],
        description=
        "Run the whole workflow: miscoto enumeration, graph on solution and powergraph creation"
    )

    args = parser.parse_args()

    # If no argument print the help.
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # set up the logger
    if args.quiet:
        logger.setLevel(logging.CRITICAL)
    else:
        logger.setLevel(logging.INFO)

    # test writing in out_directory if a subcommand is given else print version and help
    if args.cmd:
        if not utils.is_valid_dir(args.out):
            logger.critical("Impossible to access/create output directory")
            sys.exit(1)
    else:
        logger.info("m2m_analysis " + VERSION + "\n" + LICENSE)
        parser.print_help()
        sys.exit()

    # add logger in file
    formatter = logging.Formatter('%(message)s')
    log_file_path = os.path.join(args.out, f'm2m_analysis_{args.cmd}.log')
    file_handler = logging.FileHandler(log_file_path, 'w+')
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    # set up the default console logger
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)
    if args.quiet:
        console_handler.setLevel(logging.WARNING)
    logger.addHandler(console_handler)

    # Check Oog.jar file
    if args.cmd in ["workflow", "powergraph"]:
        if args.oog:
            check_oog_jar_file(args.oog)

    #if modelhost is given as an arg: check the SBML level and turn it into 2 if needed
    if args.cmd in ["workflow", "enum"]:
        if not os.path.isdir(args.networksdir):
            logger.critical(args.networksdir +
                            " is not a correct directory path")
            sys.exit(1)
        network_dir = args.networksdir

        if not utils.is_valid_file(args.seeds):
            logger.critical(args.seeds + " is not a correct filepath")
            sys.exit(1)
        if not utils.is_valid_file(args.targets) and not utils.is_valid_dir(
                args.targets):
            logger.critical(args.targets + " is not a correct filepath")
            sys.exit(1)
        if args.modelhost:
            new_arg_modelhost = args.modelhost
        else:
            new_arg_modelhost = None

    if args.cmd in ["workflow", "graph", "powergraph"]:
        if args.level:
            if args.level not in [
                    'phylum', 'class', 'order', 'family', 'genus', 'species'
            ]:
                logger.critical(
                    "Error with --level arugment, it must be one among: phylum, class, order, family, genus or species"
                )
                sys.exit(1)
        if args.level is None:
            args.level = 'phylum'

    # deal with given subcommand
    if args.cmd == "workflow":
        main_analysis_workflow(network_dir, args.targets, args.seeds, args.out,
                               args.taxon, args.oog, new_arg_modelhost,
                               args.level)
    elif args.cmd == "enum":
        main_enumeration(network_dir, args.targets, args.seeds, args.out,
                         new_arg_modelhost)
    elif args.cmd == "graph":
        main_graph(args.json, args.targets, args.out, args.taxon, args.level)
    elif args.cmd == "powergraph":
        main_powergraph(args.gml, args.out, args.oog, args.taxon, args.level)

    logger.info("--- Total runtime %.2f seconds ---" %
                (time.time() - start_time))
Example #14
0
def genomes_to_pgdb(genomes_dir, output_dir, cpu, clean):
    """Run Pathway Tools on each genome of the repository
    
    Args:
        genomes_dir (str): genome repository
        output_dir (str): output repository
        cpu (int): number of CPUs to use
        clean (bool): delete PGDBs in ptools-local coresponding to the input data

    Returns:
        pgdb_dir (str): pgdb repository
    """
    logger.info(
        "######### Running metabolic network reconstruction with Pathway Tools #########"
    )
    if not os.path.isdir(genomes_dir):
        logger.critical("Genomes directory path does not exist.")
        sys.exit(1)

    pgdb_dir = output_dir + "/pgdb"
    log_dir = output_dir + "/pgdb_log"
    if not utils.is_valid_dir(pgdb_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)

    if not utils.check_program("pathway-tools"):
        logger.critical(
            "Pathway Tools is not in the PATH, please fix it before using the program"
        )
        sys.exit(1)

    if not utils.check_program("blastp"):
        logger.critical(
            "blastp is not in the PATH, please fix it before using the program"
        )
        sys.exit(1)

    if not utils.is_valid_file(os.path.expanduser("~") + "/.ncbirc"):
        logger.critical(
            "No ~/.ncbirc file, please fix it before using the program")
        sys.exit(1)

    genomes_pgdbs = [
        genome_dir.lower() + 'cyc' for genome_dir in os.listdir(genomes_dir)
    ]
    if clean:
        remove_pgdbs(to_delete_pgdbs=genomes_pgdbs, number_cpu=cpu)
        cleaning_input(genomes_dir, verbose=False)

    # Check whether PGDBs are already created. If yes and not --clean, pursue without running ptools again
    pgdb_dirs = [pgdb_dir.lower() + 'cyc' for pgdb_dir in os.listdir(pgdb_dir)]
    if set(pgdb_dirs) == set(genomes_pgdbs):
        logger.warning(
            "PGDBs are already created and will be used. To overrun them, run m2m with --clean option"
        )
        return pgdb_dir

    taxon_file = None
    if 'taxon_id.tsv' in set(next(os.walk(genomes_dir))[2]):
        taxon_file = True

    multiprocess_pwt(genomes_dir,
                     pgdb_dir,
                     patho_inference=True,
                     patho_hole_filler=False,
                     patho_operon_predictor=False,
                     no_download_articles=False,
                     dat_creation=True,
                     dat_extraction=True,
                     size_reduction=False,
                     number_cpu=cpu,
                     taxon_file=taxon_file,
                     patho_log=log_dir,
                     verbose=False)

    if len(os.listdir(pgdb_dir)) != len(os.listdir(genomes_dir)):
        if os.path.exists(log_dir + "/log_error.txt"):
            logger.critical(
                "Something went wrong running Pathway Tools. See the log file in "
                + log_dir + "/log_error.txt")
        else:
            logger.critical("Something went wrong running Pathway Tools.")
        sys.exit(1)

    return (pgdb_dir)
Example #15
0
def indiv_scope_run(sbml_dir, seeds, output_dir, cpu_number=1):
    """Run Menetools and analyse individual metabolic capabilities.
    
    Args:
        sbml_dir (str): directory of SBML files
        seeds (str): SBML seeds file
        output_dir (str): directory for results
        cpu_number (int): number of CPU to use for multiprocessing
    
    Returns:
        str: path to output file for scope from Menetools analysis
    """
    logger.info('######### Running individual metabolic scopes #########')

    menetools_dir = os.path.join(output_dir, 'indiv_scopes')
    indiv_scopes_path = os.path.join(menetools_dir, 'indiv_scopes.json')
    produced_seeds_path = os.path.join(menetools_dir,
                                       'indiv_produced_seeds.json')

    if not utils.is_valid_dir(menetools_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)

    all_files = [
        f for f in os.listdir(sbml_dir)
        if os.path.isfile(os.path.join(sbml_dir, f)) and utils.get_extension(
            os.path.join(sbml_dir, f)).lower() in ['xml', 'sbml']
    ]
    all_scopes = {}
    all_produced_seeds = {}
    multiprocessing_indiv_scopes = []
    for f in all_files:
        bname = utils.get_basename(f)
        sbml_path = os.path.join(sbml_dir, f)
        multiprocessing_indiv_scopes.append((sbml_path, bname, seeds))

    menescope_pool = Pool(cpu_number)
    results = menescope_pool.starmap(indiv_scope_on_species,
                                     multiprocessing_indiv_scopes)
    for result in results:
        error = result[0]
        if error is True:
            logger.critical(
                '------------An error occurred during M2M run of Menetools, M2M will stop-------------'
            )
            menescope_pool.close()
            menescope_pool.join()
            sys.exit(1)
        bname = result[1]
        menescope_results = result[2]
        all_scopes[bname] = menescope_results['scope']
        all_produced_seeds[bname] = menescope_results['produced_seeds']

    menescope_pool.close()
    menescope_pool.join()

    with open(indiv_scopes_path, 'w') as dumpfile:
        json.dump(all_scopes, dumpfile, indent=4)

    with open(produced_seeds_path, 'w') as dumpfile:
        json.dump(all_produced_seeds, dumpfile, indent=4)

    return indiv_scopes_path
Example #16
0
def powergraph_analysis(gml_input_file_folder, output_folder, oog_jar=None, taxon_file=None, taxonomy_level="phylum"):
    """Run the graph compression and picture creation

    Args:
        gml_input_file_folder (str): path to the gml folder or the gml file
        output_folder (str): path to the output folder
        oog_jar (str): path to OOG jar file
        taxon_file (str): mpwt taxon file for species
        taxonomy_level (str): taxonomy level, must be: phylum, class, order, family, genus or species.
    """
    starttime = time.time()

    gml_paths = utils.file_or_folder(gml_input_file_folder)

    bbl_path = os.path.join(output_folder, 'bbl')
    svg_path = os.path.join(output_folder, 'svg')
    html_output = os.path.join(output_folder, 'html')

    if not utils.is_valid_dir(bbl_path):
        logger.critical("Impossible to access/create output directory " + bbl_path)
        sys.exit(1)

    if oog_jar:
        if not utils.is_valid_dir(svg_path):
            logger.critical("Impossible to access/create output directory " +  svg_path)
            sys.exit(1)

    if not utils.is_valid_dir(html_output):
        logger.critical("Impossible to access/create output directory " + html_output)
        sys.exit(1)

    # 26 colours from Alphabet project (minus white):
    # https://en.wikipedia.org/wiki/Help:Distinguishable_colors
    alphabet_project_distinct_hex_colors = ["#F0A3FF",
        "#0075DC", "#993F00", "#4C005C", "#191919",
        "#005C31", "#2BCE48", "#FFCC99", "#808080",
        "#94FFB5", "#8F7C00", "#9DCC00", "#C20088",
        "#003380", "#FFA405", "#FFA8BB", "#426600",
        "#FF0010", "#5EF1F2", "#00998F", "#E0FF66",
        "#740AFF", "#990000", "#FFFF80", "#FFFF00",
        "#FF5005"]

    # 269 colors from:
    # https://graphicdesign.stackexchange.com/a/3815
    hex_colors = ["#000000","#FFFF00","#1CE6FF","#FF34FF",
    "#FF4A46","#008941","#006FA6","#A30059","#FFDBE5","#7A4900",
    "#0000A6","#63FFAC","#B79762","#004D43","#8FB0FF","#997D87",
    "#5A0007","#809693","#FEFFE6","#1B4400","#4FC601","#3B5DFF",
    "#4A3B53","#FF2F80","#61615A","#BA0900","#6B7900","#00C2A0",
    "#FFAA92","#FF90C9","#B903AA","#D16100","#DDEFFF","#000035",
    "#7B4F4B","#A1C299","#300018","#0AA6D8","#013349","#00846F",
    "#372101","#FFB500","#C2FFED","#A079BF","#CC0744","#C0B9B2",
    "#C2FF99","#001E09","#00489C","#6F0062","#0CBD66","#EEC3FF",
    "#456D75","#B77B68","#7A87A1","#788D66","#885578","#FAD09F",
    "#FF8A9A","#D157A0","#BEC459","#456648","#0086ED","#886F4C",
    "#34362D","#B4A8BD","#00A6AA","#452C2C","#636375","#A3C8C9",
    "#FF913F","#938A81","#575329","#00FECF","#B05B6F","#8CD0FF",
    "#3B9700","#04F757","#C8A1A1","#1E6E00","#7900D7","#A77500",
    "#6367A9","#A05837","#6B002C","#772600","#D790FF","#9B9700",
    "#549E79","#FFF69F","#201625","#72418F","#BC23FF","#99ADC0",
    "#3A2465","#922329","#5B4534","#FDE8DC","#404E55","#0089A3",
    "#CB7E98","#A4E804","#324E72","#6A3A4C","#83AB58","#001C1E",
    "#D1F7CE","#004B28","#C8D0F6","#A3A489","#806C66","#222800",
    "#BF5650","#E83000","#66796D","#DA007C","#FF1A59","#8ADBB4",
    "#1E0200","#5B4E51","#C895C5","#320033","#FF6832","#66E1D3",
    "#CFCDAC","#D0AC94","#7ED379","#012C58","#7A7BFF","#D68E01",
    "#353339","#78AFA1","#FEB2C6","#75797C","#837393","#943A4D",
    "#B5F4FF","#D2DCD5","#9556BD","#6A714A","#001325","#02525F",
    "#0AA3F7","#E98176","#DBD5DD","#5EBCD1","#3D4F44","#7E6405",
    "#02684E","#962B75","#8D8546","#9695C5","#E773CE","#D86A78",
    "#3E89BE","#CA834E","#518A87","#5B113C","#55813B","#E704C4",
    "#00005F","#A97399","#4B8160","#59738A","#FF5DA7","#F7C9BF",
    "#643127","#513A01","#6B94AA","#51A058","#A45B02","#1D1702",
    "#E20027","#E7AB63","#4C6001","#9C6966","#64547B","#97979E",
    "#006A66","#391406","#F4D749","#0045D2","#006C31","#DDB6D0",
    "#7C6571","#9FB2A4","#00D891","#15A08A","#BC65E9","#FFFFFE",
    "#C6DC99","#203B3C","#671190","#6B3A64","#F5E1FF","#FFA0F2",
    "#CCAA35","#374527","#8BB400","#797868","#C6005A","#3B000A",
    "#C86240","#29607C","#402334","#7D5A44","#CCB87C","#B88183",
    "#AA5199","#B5D6C3","#A38469","#9F94F0","#A74571","#B894A6",
    "#71BB8C","#00B433","#789EC9","#6D80BA","#953F00","#5EFF03",
    "#E4FFFC","#1BE177","#BCB1E5","#76912F","#003109","#0060CD",
    "#D20096","#895563","#29201D","#5B3213","#A76F42","#89412E",
    "#1A3A2A","#494B5A","#A88C85","#F4ABAA","#A3F3AB","#00C6C8",
    "#EA8B66","#958A9F","#BDC9D2","#9FA064","#BE4700","#658188",
    "#83A485","#453C23","#47675D","#3A3F00","#061203","#DFFB71",
    "#868E7E","#98D058","#6C8F7D","#D7BFC2","#3C3E6E","#D83D66",
    "#2F5D9B","#6C5E46","#D25B88","#5B656C","#00B57F","#545C46",
    "#866097","#365D25","#252F99","#00CCFF","#674E60","#FC009C",
    "#92896B"]

    if taxon_file:
        taxonomy_output_file = os.path.join(output_folder, 'taxonomy_species.tsv')
        tree_output_file = os.path.join(output_folder, 'taxon_tree.txt')
        if not os.path.exists(taxonomy_output_file):
            extract_taxa(taxon_file, taxonomy_output_file, tree_output_file, taxonomy_level)

        taxon_species, all_taxons = get_taxon(taxonomy_output_file)

        taxon_colors = {}

        if len(all_taxons) <= 26:
            used_colors = alphabet_project_distinct_hex_colors
        else:
            used_colors = hex_colors

        for index, taxon in enumerate(all_taxons):
            taxon_colors[taxon] = used_colors[index]

    for target_name in gml_paths:
        bbl_output = os.path.join(bbl_path, target_name + '.bbl')
        svg_file = os.path.join(svg_path, target_name + '.bbl.svg')

        html_target = os.path.join(html_output, target_name)
        if not utils.is_valid_dir(html_target):
            logger.critical("Impossible to access/create output directory " + html_target)
            sys.exit(1)

        # Compress gml file into a bbl file with PowerGrASP.
        gml_input_path = gml_paths[target_name]
        logger.info('######### Graph compression: ' + target_name + ' #########')
        compression(gml_input_path, bbl_output)
        logger.info('######### PowerGraph visualization: ' + target_name + ' #########')

        # Read gml file with networkx and extract the essential and alternative symbionts using the note of each node (organism).
        graph = nx.read_gml(gml_input_path)
        essentials = [organism for organism in graph.nodes if graph.nodes[organism]['note'] == 'ES']
        alternatives = [organism for organism in graph.nodes if graph.nodes[organism]['note'] == 'AS']
        if taxon_file:
            key_species = essentials + alternatives
            taxon_key_species = [organism.split('__')[0] for organism in key_species]
            if len(set(all_taxons).intersection(set(taxon_key_species))) == 0:
                logger.critical('Difference of taxonomy level between gml file ('+gml_input_path+') compared to '+taxonomy_output_file+'.')
                sys.exit(1)

        bbl_to_html(bbl_output, html_target)
        if taxon_file:
            if os.path.exists(html_target +'_taxon'):
                shutil.rmtree(html_target +'_taxon')
            shutil.copytree(html_target, html_target +'_taxon')
            update_js_taxonomy(html_target +'_taxon', taxon_colors)
            output_html_merged = os.path.join(html_output, target_name + '_powergraph_taxon.html')
            merge_html_css_js(html_target +'_taxon', output_html_merged)

        update_js(html_target, essentials, alternatives)
        output_html_merged = os.path.join(html_output, target_name + '_powergraph.html')
        merge_html_css_js(html_target, output_html_merged)

        if oog_jar:
            svg_file = os.path.join(svg_path, target_name + '.bbl.svg')
            if os.path.exists(svg_file):
                os.remove(svg_file)
            bbl_to_svg(oog_jar, bbl_output, svg_path)
            if taxon_file:
                taxonomy_svg_file = os.path.join(svg_path, target_name + '_taxon.bbl.svg')
                if os.path.exists(taxonomy_svg_file):
                    os.remove(taxonomy_svg_file)
                shutil.copyfile(svg_file, taxonomy_svg_file)
                update_svg_taxonomy(taxonomy_svg_file, taxon_colors)
            update_svg(svg_file, essentials, alternatives)

    logger.info(
        "--- Powergraph runtime %.2f seconds ---\n" % (time.time() - starttime))
Example #17
0
def main():
    """Run programm.
    """
    start_time = time.time()
    parser = argparse.ArgumentParser(
        "m2m",
        description=MESSAGE +
        " For specific help on each subcommand use: m2m {cmd} --help",
        epilog=REQUIRES)
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version="%(prog)s " + VERSION + "\n" + LICENSE)

    # parent parser
    parent_parser_q = argparse.ArgumentParser(add_help=False)
    parent_parser_q.add_argument(
        "-q",
        "--quiet",
        dest="quiet",
        help="quiet mode",
        required=False,
        action="store_true",
        default=None,
    )
    parent_parser_c = argparse.ArgumentParser(add_help=False)
    parent_parser_c.add_argument("-c",
                                 "--cpu",
                                 help="cpu number for multi-process",
                                 required=False,
                                 type=int,
                                 default=1)
    parent_parser_o = argparse.ArgumentParser(add_help=False)
    parent_parser_o.add_argument("-o",
                                 "--out",
                                 dest="out",
                                 required=True,
                                 help="output directory path",
                                 metavar="OUPUT_DIR")

    parent_parser_no = argparse.ArgumentParser(add_help=False)
    parent_parser_no.add_argument(
        "--noorphan",
        help=
        "use this option to ignore reactions without gene or protein association",
        required=False,
        action="store_true",
        default=False,
    )
    parent_parser_s = argparse.ArgumentParser(add_help=False)
    parent_parser_s.add_argument(
        "-s",
        "--seeds",
        help="seeds (growth medium) for metabolic analysis",
        required=True)
    parent_parser_n = argparse.ArgumentParser(add_help=False)
    parent_parser_n.add_argument("-n",
                                 "--networksdir",
                                 metavar="NETWORKS_DIR",
                                 help="metabolic networks directory",
                                 required=True)
    parent_parser_g = argparse.ArgumentParser(add_help=False)
    parent_parser_g.add_argument("-g",
                                 "--genomes",
                                 help="annotated genomes directory",
                                 required=True)
    parent_parser_cl = argparse.ArgumentParser(add_help=False)
    parent_parser_cl.add_argument("--clean",
                                  help="clean PGDBs if already present",
                                  required=False,
                                  action="store_true",
                                  default=None)
    parent_parser_m = argparse.ArgumentParser(add_help=False)
    parent_parser_m.add_argument(
        "-m",
        "--modelhost",
        help="host metabolic model for community analysis",
        required=False,
        default=None)
    parent_parser_l = argparse.ArgumentParser(add_help=False)
    parent_parser_l.add_argument("-l",
                                 "--level",
                                 help="Level for SBML creation, 2 or 3",
                                 required=False,
                                 type=int,
                                 choices=[2, 3],
                                 default=2)
    parent_parser_p = argparse.ArgumentParser(add_help=False)
    parent_parser_p.add_argument("-p",
                                 "--padmet",
                                 help="create padmet files",
                                 required=False,
                                 action="store_true",
                                 default=None)
    parent_parser_t_required = argparse.ArgumentParser(add_help=False)
    parent_parser_t_required.add_argument(
        "-t",
        "--targets",
        help="targets for metabolic analysis",
        required=True)
    parent_parser_t_optional = argparse.ArgumentParser(add_help=False)
    parent_parser_t_optional.add_argument(
        "-t",
        "--targets",
        help=
        "Optional targets for metabolic analysis, if not used metage2metabo will use the addedvalue of the community",
        required=False)

    # subparsers
    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands:',
                                       dest="cmd")
    ptools_parser = subparsers.add_parser(
        "recon",
        help="metabolic network reconstruction",
        parents=[
            parent_parser_g, parent_parser_o, parent_parser_c, parent_parser_q,
            parent_parser_l, parent_parser_no, parent_parser_p,
            parent_parser_cl
        ],
        description=
        "Run metabolic network reconstruction for each annotated genome of the input directory, using Pathway Tools"
    )
    indivscope_parser = subparsers.add_parser(
        "iscope",
        help="individual scope computation",
        parents=[
            parent_parser_n, parent_parser_s, parent_parser_o, parent_parser_q
        ],
        description=
        "Compute individual scopes (reachable metabolites from seeds) for each metabolic network of the input directory"
    )
    comscope_parser = subparsers.add_parser(
        "cscope",
        help="community scope computation",
        parents=[
            parent_parser_n, parent_parser_s, parent_parser_o, parent_parser_m,
            parent_parser_q, parent_parser_t_optional
        ],
        description="Compute the community scope of all metabolic networks")
    added_value_parser = subparsers.add_parser(
        "addedvalue",
        help="added value of microbiota's metabolism over individual's",
        parents=[
            parent_parser_n, parent_parser_s, parent_parser_o, parent_parser_m,
            parent_parser_q
        ],
        description=
        "Compute metabolites that are reachable by the community/microbiota and not by individual organisms"
    )
    mincom_parser = subparsers.add_parser(
        "mincom",
        help="minimal communtity selection",
        parents=[
            parent_parser_n, parent_parser_s, parent_parser_o, parent_parser_m,
            parent_parser_q, parent_parser_t_required
        ],
        description=
        "Select minimal-size community to make reachable a set of metabolites")
    seeds_parser = subparsers.add_parser(
        "seeds",
        help="creation of seeds SBML file",
        parents=[parent_parser_o, parent_parser_q],
        description=
        "Create a SBML file starting for a simple text file with metabolic compounds identifiers"
    )
    seeds_parser.add_argument(
        "--metabolites",
        help=
        'metabolites file: one per line, encoded (XXX as in <species id="XXXX" .../> of SBML files)',
        required=True)
    wkf_parser = subparsers.add_parser(
        "workflow",
        help="whole workflow",
        parents=[
            parent_parser_g, parent_parser_s, parent_parser_m, parent_parser_o,
            parent_parser_c, parent_parser_q, parent_parser_no,
            parent_parser_p, parent_parser_t_optional, parent_parser_cl
        ],
        description=
        "Run the whole workflow: metabolic network reconstruction, individual and community scope analysis and community selection"
    )
    metacom_parser = subparsers.add_parser(
        "metacom",
        help="whole metabolism community analysis",
        parents=[
            parent_parser_n, parent_parser_s, parent_parser_m, parent_parser_o,
            parent_parser_t_optional, parent_parser_q
        ],
        description=
        "Run the whole metabolism community analysis: individual and community scope analysis and community selection"
    )
    test_parser = subparsers.add_parser(
        "test",
        help="test on sample data from rumen experiments",
        parents=[parent_parser_q, parent_parser_c, parent_parser_o],
        description="Test the whole workflow on a data sample")

    args = parser.parse_args()

    # If no argument print the help.
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # test writing in out_directory if a subcommand is given else print version and help
    if args.cmd:
        if not utils.is_valid_dir(args.out):
            logger.critical("Impossible to access/create output directory")
            sys.exit(1)
    else:
        logger.info("m2m " + VERSION + "\n" + LICENSE)
        parser.print_help()
        sys.exit()

    # logger = logging.getLogger()    #TODO: get rid of it once mpwt's logger is fixed
    # logger.setLevel(logging.DEBUG)  #TODO: get rid of it once mpwt's logger is fixed
    # add logger in file
    formatter = logging.Formatter('%(message)s')
    log_file_path = os.path.join(args.out, f'm2m_{args.cmd}.log')
    file_handler = logging.FileHandler(log_file_path, 'w+')
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    # set up the default console logger
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)
    if args.quiet:
        console_handler.setLevel(logging.WARNING)
    logger.addHandler(console_handler)

    #if modelhost is given as an arg: check the SBML level and turn it into 2 if needed
    if args.cmd in ["workflow", "metacom", "mincom", "cscope", "addedvalue"
                    ] and args.modelhost:
        new_arg_modelhost = args.modelhost
        logger.warning(
            f"\n A metabolic model is given for an host. The metabolite producibility of the community will display metabolites that can be produced by the hsot or the microbiome, *not including* the metabolites that the host can produce by itself. If this is not what you want to do, you can consider placing the host in the same directory as the symbionts, which will lead to a complete community scope. \n"
        )
    else:
        new_arg_modelhost = None

    if "seeds" in args and args.seeds is not None:
        if not utils.is_valid_file(args.seeds):
            logger.critical(args.seeds + " is not a correct filepath")
            sys.exit(1)

    # deal with given subcommand
    if args.cmd == "workflow":
        main_workflow(args.genomes, args.out, args.cpu, args.clean, args.seeds,
                      args.noorphan, args.padmet, new_arg_modelhost,
                      args.targets)
    elif args.cmd in ["iscope", "cscope", "addedvalue", "mincom", "metacom"]:
        if not os.path.isdir(args.networksdir):
            logger.critical(args.networksdir +
                            " is not a correct directory path")
            sys.exit(1)
        network_dir = args.networksdir
        if "targets" in args and args.targets is not None:
            if not utils.is_valid_file(args.targets):
                logger.critical(args.targets + " is not a correct filepath")
                sys.exit(1)
            # test if some targets are seeds
            itsct_seeds_targets = sbml_management.compare_seeds_and_targets(
                args.seeds, args.targets)
            if itsct_seeds_targets != set():
                logger.warning(
                    f"\nWARNING: compounds {*list(itsct_seeds_targets),} are both in seeds and targets. Since they are in seeds, they will be in each organism's individual producibility scope (iscope), but not appear in the community scope (cscope). To be certain that they are produced (through an activable reaction and not just because they are seeds), check the output file: producibility_targets.json.\n"
                )
        if args.cmd == "iscope":
            main_iscope(network_dir, args.seeds, args.out)
        elif args.cmd == "cscope":
            main_cscope(network_dir, args.seeds, args.out, args.targets,
                        new_arg_modelhost)
        elif args.cmd == "addedvalue":
            main_added_value(network_dir, args.seeds, args.out,
                             new_arg_modelhost)
        elif args.cmd == "mincom":
            main_mincom(network_dir, args.seeds, args.out, args.targets,
                        new_arg_modelhost)
        elif args.cmd == "metacom":
            main_metacom(network_dir, args.out, args.seeds, new_arg_modelhost,
                         args.targets)
    elif args.cmd == "recon":
        main_recon(args.genomes, args.out, args.noorphan, args.padmet,
                   args.level, args.cpu, args.clean)
    elif args.cmd == "seeds":
        if not utils.is_valid_file(args.metabolites):
            logger.critical(args.metabolites + " is not a correct filepath")
            sys.exit(1)
        else:
            main_seeds(args.metabolites, args.out)
    elif args.cmd == 'test':
        main_test(args.out, args.cpu)

    logger.info("--- Total runtime %.2f seconds ---" %
                (time.time() - start_time))
    logger.warning(f'--- Logs written in {log_file_path} ---')
Example #18
0
def genomes_to_pgdb(genomes_dir, output_dir, cpu, clean, use_pwt_xml):
    """Run Pathway Tools on each genome of the repository
    
    Args:
        genomes_dir (str): genome repository
        output_dir (str): output repository
        cpu (int): number of CPUs to use
        clean (bool): delete PGDBs in ptools-local coresponding to the input data
        use_pwt_xml (bool): use Pathway Tools XML instead of creating them with padmet

    Returns:
        pgdb_dir (str): pgdb repository
    """
    logger.info(
        "######### Running metabolic network reconstruction with Pathway Tools #########"
    )
    if not os.path.isdir(genomes_dir):
        logger.critical("Genomes directory path does not exist.")
        sys.exit(1)

    pgdb_dir = os.path.join(output_dir, 'pgdb')
    log_dir = os.path.join(output_dir,  'pgdb_log')
    ncbirc_path = os.path.join(os.path.expanduser('~'), '.ncbirc')
    log_path = os.path.join(log_dir, 'log_error.txt')

    if not utils.is_valid_dir(pgdb_dir):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)

    if not utils.check_program('pathway-tools'):
        logger.critical(
            'Pathway Tools is not in the PATH, please fix it before using the program'
        )
        sys.exit(1)

    if not utils.check_program("blastp"):
        logger.critical(
            'blastp is not in the PATH, please fix it before using the program'
        )
        sys.exit(1)

    if not utils.is_valid_file(ncbirc_path):
        logger.critical(
            f'No {ncbirc_path} file, please fix it before using the program'
        )
        sys.exit(1)

    genomes_pgdbs = [genome_dir.lower() + 'cyc' for genome_dir in os.listdir(genomes_dir)]
    if clean:
        remove_pgdbs(to_delete_pgdbs=genomes_pgdbs, number_cpu=cpu)
        cleaning_input(genomes_dir, verbose=False)

    # Check whether PGDBs are already created. If yes and not --clean, pursue without running ptools again
    pgdb_dirs = [pgdb_dir.lower() + 'cyc' for pgdb_dir in os.listdir(pgdb_dir)]
    if set(pgdb_dirs) == set(genomes_pgdbs):
        logger.warning("PGDBs are already created and will be used. To overrun them, run m2m with --clean option")
        return pgdb_dir

    taxon_file = None
    if 'taxon_id.tsv' in set(next(os.walk(genomes_dir))[2]):
        taxon_file = True

    if use_pwt_xml:
        move_dat = False
        move_xml = True
    else:
        move_dat = True
        move_xml = False

    multiprocess_pwt(genomes_dir, pgdb_dir,
                        patho_inference=True,
                        patho_hole_filler=False,
                        patho_operon_predictor=False,
                        no_download_articles=False,
                        flat_creation=True,
                        dat_extraction=move_dat,
                        xml_extraction=move_xml,
                        owl_extraction=False,
                        col_extraction=False,
                        size_reduction=False,
                        number_cpu=cpu,
                        taxon_file=taxon_file,
                        patho_log=log_dir,
                        verbose=False)

    nb_genomes_dir = len([folder for folder in os.listdir(genomes_dir) if os.path.isdir(os.path.join(genomes_dir, folder))])
    if use_pwt_xml:
        nb_pgdb_dir = len([folder for folder in os.listdir(pgdb_dir) if os.path.isfile(os.path.join(pgdb_dir, folder))])
    else:
        nb_pgdb_dir = len([folder for folder in os.listdir(pgdb_dir) if os.path.isdir(os.path.join(pgdb_dir, folder))])

    if nb_pgdb_dir != nb_genomes_dir:
        if os.path.exists(log_path):
            logger.critical("Something went wrong running Pathway Tools. See the log file in " + log_path)
        else:
            logger.critical("Something went wrong running Pathway Tools.")
        sys.exit(1)

    return (pgdb_dir)
Example #19
0
def mincom(instance_w_targets, out_dir):
    """Compute minimal community selection and show analyses.
    
    Args:
        instance_w_targets (str): ASP instance filepath
        out_dir (str): results directory
    """
    starttime = time.time()
    miscoto_dir = out_dir + "/community_analysis"
    if not utils.is_valid_dir(miscoto_dir):
        logger.critical("Impossible to access/create output directory")
        sys.exit(1)
    # Compute community selection
    logger.info("Running minimal community selection")
    all_results = compute_mincom(instance_w_targets, out_dir)

    for key in all_results:
        all_results[key] = list(all_results[key])
    with open(miscoto_dir + "/mincom.json", 'w') as dumpfile:
        json.dump(all_results,
                  dumpfile,
                  indent=4,
                  default=lambda x: x.__dict__)
    logger.info("Community scopes for all metabolic networks available in " +
                miscoto_dir + "/comm_scopes.json")
    # Give one solution
    one_sol_bact = []
    for bact in all_results['bacteria']:
        one_sol_bact.append(bact)
    logger.info('######### One minimal community #########')
    logger.info(
        "# One minimal community enabling the producibility of the target metabolites given as inputs"
    )
    logger.info("Minimal number of bacteria in communities = " +
                str(len(one_sol_bact)))
    logger.info("\n".join(one_sol_bact))
    # Give union of solutions
    union = all_results['union_bacteria']
    logger.info(
        '######### Keystone species: Union of minimal communities #########')
    logger.info(
        "# Bacteria occurring in at least one minimal community enabling the producibility of the target metabolites given as inputs"
    )
    logger.info("Keystone species = " + str(len(union)))
    logger.info("\n".join(union))
    # Give intersection of solutions
    intersection = all_results['inter_bacteria']
    logger.info(
        '######### Essential symbionts: Intersection of minimal communities #########'
    )
    logger.info(
        "# Bacteria occurring in ALL minimal communities enabling the producibility of the target metabolites given as inputs"
    )
    logger.info("Essential symbionts = " + str(len(intersection)))
    logger.info("\n".join(intersection))
    # Give keystones, essential and alternative symbionts
    alternative_symbionts = list(set(union) - set(intersection))
    logger.info(
        '######### Alternative symbionts: Difference between Union and Intersection #########'
    )
    logger.info(
        "# Bacteria occurring in at least one minimal community but not all minimal communities enabling the producibility of the target metabolites given as inputs"
    )
    logger.info("Alternative symbionts = " + str(len(alternative_symbionts)))
    logger.info("\n".join(alternative_symbionts))
    logger.info("--- Mincom runtime %.2f seconds ---\n" %
                (time.time() - starttime))
Example #20
0
def create_gml(json_paths, target_paths, output_dir, taxon_file=None):
    """Create solution graph from miscoto output and compute stats

    Args:
        json_paths (str): {target: path_to_corresponding_json}
        target_paths (str): {target: path_to_corresponding_sbml}
        output_dir (str): results directory
        taxon_file (str): mpwt taxon file for species in sbml folder
    """
    miscoto_stat_output = os.path.join(output_dir, 'miscoto_stats.txt')
    key_species_stats_output = os.path.join(output_dir,
                                            'key_species_stats.tsv')
    key_species_json = os.path.join(output_dir, 'key_species.json')

    gml_output = os.path.join(output_dir, 'gml')

    if not utils.is_valid_dir(gml_output):
        logger.critical('Impossible to access/create output directory')
        sys.exit(1)

    len_min_sol = {}
    len_union = {}
    len_intersection = {}
    len_solution = {}
    len_target = {}

    target_categories = {}
    for target in target_paths:
        target_categories[target] = sbml_management.get_compounds(
            target_paths[target])

    if taxon_file:
        taxon_named_species, all_taxons = get_taxon(taxon_file)
    else:
        taxon_named_species = None
        all_taxons = None

    key_species_data = {}
    miscoto_stat_output_datas = []

    for target_category in target_categories:
        key_species_data[target_category] = {}
        key_species_data[target_category]['essential_symbionts'] = {}
        key_species_data[target_category]['alternative_symbionts'] = {}
        target_output_gml_path = os.path.join(gml_output,
                                              target_category + '.gml')
        with open(json_paths[target_category]) as json_data:
            dicti = json.load(json_data)

        G = nx.Graph()
        added_node = []
        species_weight = {}
        if dicti['still_unprod'] != []:
            logger.warning('ERROR ', dicti["still_unprod"], ' is unproducible')
        len_target[target_category] = len(dicti['newly_prod']) + len(
            dicti['still_unprod'])
        len_min_sol[target_category] = len(dicti['bacteria'])
        len_union[target_category] = len(dicti['union_bacteria'])
        len_intersection[target_category] = len(dicti['inter_bacteria'])
        key_species_types = {
            organism: 'ES' if organism in dicti['inter_bacteria'] else 'AS'
            for organism in dicti['union_bacteria']
        }
        if taxon_file:
            for taxon in all_taxons:
                key_species_data[target_category]['essential_symbionts'][
                    taxon] = [
                        organism for organism in key_species_types
                        if key_species_types[organism] == 'ES' and
                        taxon_named_species[organism].split('__')[0] == taxon
                    ]
                key_species_data[target_category]['alternative_symbionts'][
                    taxon] = [
                        organism for organism in key_species_types
                        if key_species_types[organism] == 'AS' and
                        taxon_named_species[organism].split('__')[0] == taxon
                    ]
        else:
            key_species_data[target_category]['essential_symbionts'][
                'data'] = [
                    organism for organism in key_species_types
                    if key_species_types[organism] == 'ES'
                ]
            key_species_data[target_category]['alternative_symbionts'][
                'data'] = [
                    organism for organism in key_species_types
                    if key_species_types[organism] == 'AS'
                ]

        len_solution[target_category] = len(dicti['enum_bacteria'])
        for sol in dicti['enum_bacteria']:
            if len(dicti['enum_bacteria'][sol]) > 1:
                for species_1, species_2 in combinations(
                        dicti['enum_bacteria'][sol], 2):
                    if species_1 not in added_node:
                        if taxon_file:
                            G.add_node(taxon_named_species[species_1],
                                       note=key_species_types[species_1])
                        else:
                            G.add_node(species_1,
                                       note=key_species_types[species_1])
                        added_node.append(species_1)
                    if species_2 not in added_node:
                        if taxon_file:
                            G.add_node(taxon_named_species[species_2],
                                       note=key_species_types[species_2])
                        else:
                            G.add_node(species_2,
                                       note=key_species_types[species_2])
                        added_node.append(species_2)
                    combination_species = '_'.join(
                        sorted([species_1, species_2]))
                    if combination_species not in species_weight:
                        species_weight[combination_species] = 1
                    else:
                        species_weight[combination_species] += 1
                    if taxon_file:
                        G.add_edge(taxon_named_species[species_1],
                                   taxon_named_species[species_2],
                                   weight=species_weight[combination_species])
                    else:
                        G.add_edge(species_1,
                                   species_2,
                                   weight=species_weight[combination_species])
            elif len(dicti['enum_bacteria'][sol]) == 1:
                species_1 = dicti['enum_bacteria'][sol][0]
                if species_1 not in added_node:
                    if taxon_file:
                        G.add_node(taxon_named_species[species_1],
                                   note=key_species_types[species_1])
                    else:
                        G.add_node(species_1,
                                   note=key_species_types[species_1])
                    added_node.append(species_1)

        # Check if all the nodes of G are not isolates.
        if len(G.nodes) == nx.number_of_isolates(G):
            logger.critical(
                r'/!\ Warning: All the nodes of the solution graph are isolated (they are not connected to other nodes). This lead to powergrasp creating san empty powergraph.'
            )
            logger.critical(
                'So m2m_analysis stops at the solution graph step.')
            sys.exit(1)

        miscoto_stat_output_datas.append([
            target_category,
            str(len_target[target_category]),
            str(len_min_sol[target_category]),
            str(len_union[target_category]),
            str(len_intersection[target_category]),
            str(len_solution[target_category])
        ])
        logger.info('######### Graph of ' + target_category + ' #########')
        logger.info('Number of nodes: ' + str(G.number_of_nodes()))
        logger.info('Number of edges: ' + str(G.number_of_edges()))
        nx.write_gml(G, target_output_gml_path)

    with open(miscoto_stat_output, 'w') as stats_output:
        statswriter = csv.writer(stats_output, delimiter="\t")
        statswriter.writerow([
            'categories', 'nb_target', 'size_min_sol', 'size_union',
            'size_intersection', 'size_enum'
        ])
        for miscoto_stat_output_data in miscoto_stat_output_datas:
            statswriter.writerow(miscoto_stat_output_data)

    with open(key_species_json, 'w') as json_output:
        json.dump(key_species_data, json_output, indent=4)

    with open(key_species_stats_output, 'w') as key_stat_output:
        key_stats_writer = csv.writer(key_stat_output, delimiter='\t')
        if all_taxons:
            key_stats_writer.writerow(
                ['target_categories', 'key_group', *sorted(all_taxons), 'Sum'])
        else:
            key_stats_writer.writerow(
                ['target_categories', 'key_group', 'data', 'Sum'])
        for target in key_species_data:
            if all_taxons:
                essential_counts = [
                    len(key_species_data[target]['essential_symbionts'][taxon])
                    for taxon in sorted(all_taxons)
                ]
                alternative_counts = [
                    len(key_species_data[target]['alternative_symbionts']
                        [taxon]) for taxon in sorted(all_taxons)
                ]
            else:
                essential_counts = [
                    len(key_species_data[target]['essential_symbionts']
                        ['data'])
                ]
                alternative_counts = [
                    len(key_species_data[target]['alternative_symbionts']
                        ['data'])
                ]
            key_counts = list(map(add, essential_counts, alternative_counts))
            key_stats_writer.writerow(
                [target, 'key_species', *key_counts,
                 sum(key_counts)])
            key_stats_writer.writerow([
                target, 'essential_symbionts', *essential_counts,
                sum(essential_counts)
            ])
            key_stats_writer.writerow([
                target, 'alternative_symbionts', *alternative_counts,
                sum(alternative_counts)
            ])