def setup_logger(logger_name, file_path, level, to_screen=False): ''' Function to initialize and configure a logger that can write to file and (optionally) the screen. Parameters ---------- logger_name : string name of the logger file_path : string file path to the log file on disk level : integer indicates the level at which the logger should log; this is controlled by integers that come with the python logging package. (e.g. logging.INFO=20, logging.DEBUG=10) to_screen : boolean (optional) flag to indicate whether to enable logging to the screen Returns ------- logger : logging.Logger object Python logging.Logger object which is capable of logging run- time information about the program to file and/or screen ''' # Import packages import logging # Init logger, formatter, filehandler, streamhandler logger = logging.getLogger(logger_name) logger.setLevel(level) formatter = logging.Formatter('%(asctime)s : %(message)s') # Write logs to file fileHandler = logging.FileHandler(file_path) fileHandler.setFormatter(formatter) logger.addHandler(fileHandler) # Write to screen, if desired if to_screen: streamHandler = logging.StreamHandler() streamHandler.setFormatter(formatter) logger.addHandler(streamHandler) # Return the logger return logger
def build_collect_workflow(args, retval): import os import glob import warnings warnings.filterwarnings("ignore") import ast import pkg_resources from pathlib import Path import yaml import uuid from time import strftime import shutil try: import pynets print(f"\n\nPyNets Version:\n{pynets.__version__}\n\n") except ImportError: print("PyNets not installed! Ensure that you are using the correct" " python version.") # Set Arguments to global variables resources = args.pm if resources == "auto": from multiprocessing import cpu_count import psutil nthreads = cpu_count() - 1 procmem = [ int(nthreads), int(list(psutil.virtual_memory())[4] / 1000000000) ] else: procmem = list(eval(str(resources))) plugin_type = args.plug if isinstance(plugin_type, list): plugin_type = plugin_type[0] verbose = args.v working_path = args.basedir work_dir = args.work modality = args.modality drop_cols = args.dc if isinstance(modality, list): modality = modality[0] if os.path.isdir(work_dir): shutil.rmtree(work_dir) os.makedirs(f"{str(Path(working_path))}/{modality}_group_topology_auc", exist_ok=True) wf = collect_all(working_path, modality, drop_cols) with open(pkg_resources.resource_filename("pynets", "runconfig.yaml"), "r") as stream: try: hardcoded_params = yaml.load(stream) runtime_dict = {} execution_dict = {} for i in range(len(hardcoded_params["resource_dict"])): runtime_dict[list(hardcoded_params["resource_dict"][i].keys( ))[0]] = ast.literal_eval( list(hardcoded_params["resource_dict"][i].values())[0][0]) for i in range(len(hardcoded_params["execution_dict"])): execution_dict[list( hardcoded_params["execution_dict"][i].keys())[0]] = list( hardcoded_params["execution_dict"][i].values())[0][0] except FileNotFoundError: print("Failed to parse runconfig.yaml") run_uuid = f"{strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4()}" os.makedirs(f"{work_dir}/pynets_out_collection{run_uuid}", exist_ok=True) wf.base_dir = f"{work_dir}/pynets_out_collection{run_uuid}" if verbose is True: from nipype import config, logging cfg_v = dict( logging={ "workflow_level": "DEBUG", "utils_level": "DEBUG", "interface_level": "DEBUG", "filemanip_level": "DEBUG", "log_directory": str(wf.base_dir), "log_to_file": True, }, monitoring={ "enabled": True, "sample_frequency": "0.1", "summary_append": True, "summary_file": str(wf.base_dir), }, ) logging.update_logging(config) config.update_config(cfg_v) config.enable_debug_mode() config.enable_resource_monitor() import logging callback_log_path = f"{wf.base_dir}{'/run_stats.log'}" logger = logging.getLogger("callback") logger.setLevel(logging.DEBUG) handler = logging.FileHandler(callback_log_path) logger.addHandler(handler) execution_dict["crashdump_dir"] = str(wf.base_dir) execution_dict["plugin"] = str(plugin_type) cfg = dict(execution=execution_dict) for key in cfg.keys(): for setting, value in cfg[key].items(): wf.config[key][setting] = value try: wf.write_graph(graph2use="colored", format="png") except BaseException: pass if verbose is True: from nipype.utils.profiler import log_nodes_cb plugin_args = { "n_procs": int(procmem[0]), "memory_gb": int(procmem[1]), "status_callback": log_nodes_cb, "scheduler": "mem_thread", } else: plugin_args = { "n_procs": int(procmem[0]), "memory_gb": int(procmem[1]), "scheduler": "mem_thread", } print("%s%s%s" % ("\nRunning with ", str(plugin_args), "\n")) wf.run(plugin=plugin_type, plugin_args=plugin_args) if verbose is True: from nipype.utils.draw_gantt_chart import generate_gantt_chart print("Plotting resource profile from run...") generate_gantt_chart(callback_log_path, cores=int(procmem[0])) handler.close() logger.removeHandler(handler) return
def run(self, config_file=None, partic_list=None): """Establish where and how we're running the pipeline and set up the run. (Entry point) - This is the entry point for pipeline building and connecting. Depending on the inputs, the appropriate workflow runner will be selected and executed. :type config_file: str :param config_file: Filepath to the pipeline configuration file in YAML format. :type partic_list: str :param partic_list: Filepath to the participant list file in YAML format. """ from time import strftime from qap_utils import raise_smart_exception, \ check_config_settings # in case we are overloading if config_file: from qap.script_utils import read_yml_file self._config = read_yml_file(config_file) self.validate_config_dict() self._config["pipeline_config_yaml"] = config_file if not self._config: raise Exception("config not found!") if partic_list: self._config["subject_list"] = partic_list # Get configurations and settings check_config_settings(self._config, "num_processors") check_config_settings(self._config, "num_sessions_at_once") check_config_settings(self._config, "available_memory") check_config_settings(self._config, "output_directory") check_config_settings(self._config, "working_directory") self._num_bundles_at_once = 1 write_report = self._config.get('write_report', False) if "cluster_system" in self._config.keys() and not self._bundle_idx: res_mngr = self._config["cluster_system"] if (res_mngr == None) or ("None" in res_mngr) or \ ("none" in res_mngr): self._platform = None else: platforms = ["SGE", "PBS", "SLURM"] self._platform = str(res_mngr).upper() if self._platform not in platforms: msg = "The resource manager %s provided in the pipeline "\ "configuration file is not one of the valid " \ "choices. It must be one of the following:\n%s" \ % (self._platform, str(platforms)) raise_smart_exception(locals(), msg) else: self._platform = None # Create output directory try: os.makedirs(self._config["output_directory"]) except: if not op.isdir(self._config["output_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["output_directory"] raise Exception(err) else: pass # Create working directory try: os.makedirs(self._config["working_directory"]) except: if not op.isdir(self._config["working_directory"]): err = "[!] Output directory unable to be created.\n" \ "Path: %s\n\n" % self._config["working_directory"] raise Exception(err) else: pass results = [] # set up callback logging import logging from nipype.pipeline.plugins.callback_log import log_nodes_cb cb_log_filename = os.path.join(self._config["output_directory"], "callback.log") # Add handler to callback log file cb_logger = logging.getLogger('callback') cb_logger.setLevel(logging.DEBUG) handler = logging.FileHandler(cb_log_filename) cb_logger.addHandler(handler) # settle run arguments (plugins) self.runargs = {} self.runargs['plugin'] = 'MultiProc' self.runargs['plugin_args'] = \ {'memory_gb': int(self._config["available_memory"]), 'status_callback': log_nodes_cb} n_procs = {'n_procs': self._config["num_processors"]} self.runargs['plugin_args'].update(n_procs) # load the participant list file into dictionary subdict = self.load_sublist() # flatten the participant dictionary self._sub_dict = self.create_session_dict(subdict) # create the list of bundles self._bundles_list = self.create_bundles() num_bundles = len(self._bundles_list) if not self._bundle_idx: # want to initialize the run-level log directory (not the bundle- # level) only the first time we run the script, due to the # timestamp. if sub-nodes are being kicked off by a batch file on # a cluster, we don't want a new timestamp for every new node run self._run_log_dir = op.join( self._config['output_directory'], '_'.join([self._run_name, "logs"]), '_'.join( [strftime("%Y%m%d_%H_%M_%S"), "%dbundles" % num_bundles])) if self._run_log_dir: if not os.path.isdir(self._run_log_dir): try: os.makedirs(self._run_log_dir) except: if not op.isdir(self._run_log_dir): err = "[!] Log directory unable to be created.\n" \ "Path: %s\n\n" % self._run_log_dir raise Exception(err) else: pass if num_bundles == 1: self._config["num_sessions_at_once"] = \ len(self._bundles_list[0]) # Start the magic if not self._platform and not self._bundle_idx: # not a cluster/grid run for idx in range(1, num_bundles + 1): results.append(self.run_one_bundle(idx)) elif not self._bundle_idx: # there is a self._bundle_idx only if the pipeline runner is run # with bundle_idx as a parameter - only happening either manually, # or when running on a cluster self.submit_cluster_batch_file(num_bundles) else: # if there is a bundle_idx supplied to the runner results = self.run_one_bundle(self._bundle_idx)
def build_collect_workflow(args, retval): import re import glob import warnings warnings.filterwarnings("ignore") import ast import pkg_resources from pathlib import Path import yaml try: import pynets print(f"\n\nPyNets Version:\n{pynets.__version__}\n\n") except ImportError: print( "PyNets not installed! Ensure that you are using the correct python version." ) # Set Arguments to global variables resources = args.pm if resources: procmem = list(eval(str(resources))) else: from multiprocessing import cpu_count nthreads = cpu_count() procmem = [int(nthreads), int(float(nthreads) * 2)] plugin_type = args.plug if isinstance(plugin_type, list): plugin_type = plugin_type[0] verbose = args.v working_path = args.basedir work_dir = args.work modality = args.modality os.makedirs(f"{str(Path(working_path).parent)}/all_visits_netmets_auc", exist_ok=True) wf = collect_all(working_path, modality) with open(pkg_resources.resource_filename("pynets", "runconfig.yaml"), "r") as stream: try: hardcoded_params = yaml.load(stream) runtime_dict = {} execution_dict = {} for i in range(len(hardcoded_params["resource_dict"])): runtime_dict[list(hardcoded_params["resource_dict"][i].keys( ))[0]] = ast.literal_eval( list(hardcoded_params["resource_dict"][i].values())[0][0]) for i in range(len(hardcoded_params["execution_dict"])): execution_dict[list( hardcoded_params["execution_dict"][i].keys())[0]] = list( hardcoded_params["execution_dict"][i].values())[0][0] except FileNotFoundError: print("Failed to parse runconfig.yaml") os.makedirs(f"{work_dir}{'/pynets_out_collection'}", exist_ok=True) wf.base_dir = f"{work_dir}{'/pynets_out_collection'}" if verbose is True: from nipype import config, logging cfg_v = dict( logging={ "workflow_level": "DEBUG", "utils_level": "DEBUG", "interface_level": "DEBUG", "filemanip_level": "DEBUG", "log_directory": str(wf.base_dir), "log_to_file": True, }, monitoring={ "enabled": True, "sample_frequency": "0.1", "summary_append": True, "summary_file": str(wf.base_dir), }, ) logging.update_logging(config) config.update_config(cfg_v) config.enable_debug_mode() config.enable_resource_monitor() import logging callback_log_path = f"{wf.base_dir}{'/run_stats.log'}" logger = logging.getLogger("callback") logger.setLevel(logging.DEBUG) handler = logging.FileHandler(callback_log_path) logger.addHandler(handler) execution_dict["crashdump_dir"] = str(wf.base_dir) execution_dict["plugin"] = str(plugin_type) cfg = dict(execution=execution_dict) for key in cfg.keys(): for setting, value in cfg[key].items(): wf.config[key][setting] = value try: wf.write_graph(graph2use="colored", format="png") except BaseException: pass if verbose is True: from nipype.utils.profiler import log_nodes_cb plugin_args = { "n_procs": int(procmem[0]), "memory_gb": int(procmem[1]), "status_callback": log_nodes_cb, "scheduler": "mem_thread", } else: plugin_args = { "n_procs": int(procmem[0]), "memory_gb": int(procmem[1]), "scheduler": "mem_thread", } print("%s%s%s" % ("\nRunning with ", str(plugin_args), "\n")) wf.run(plugin=plugin_type, plugin_args=plugin_args) if verbose is True: from nipype.utils.draw_gantt_chart import generate_gantt_chart print("Plotting resource profile from run...") generate_gantt_chart(callback_log_path, cores=int(procmem[0])) handler.close() logger.removeHandler(handler) files_ = glob.glob( f"{str(Path(working_path).parent)}{'/all_visits_netmets_auc/*clean.csv'}" ) print("Aggregating dataframes...") dfs = [] for file_ in files_: df = pd.read_csv(file_, chunksize=100000).read() try: df.drop(df.filter(regex="Unname"), axis=1, inplace=True) except BaseException: pass dfs.append(df) del df df_concat(dfs, working_path) return
def run(self, number_of_cores=1, memory=None, save_profiler_log=False): """Execute the workflow of the super-resolution reconstruction pipeline. Nipype execution engine will take care of the management and execution of all processing steps involved in the super-resolution reconstruction pipeline. Note that the complete execution graph is saved as a PNG image to support transparency on the whole processing. Parameters ---------- number_of_cores : int Number of cores / CPUs used by the workflow memory : int Maximal memory used by the workflow save_profiler_log : bool If `True`, generates the profiling callback log (Default: `False`) """ from nipype import logging as nipype_logging # Use nipype.interface logger to print some information messages iflogger = nipype_logging.getLogger('nipype.interface') iflogger.info("**** Workflow graph creation ****") self.wf.write_graph(dotfilename='graph.dot', graph2use='colored', format='png', simple_form=True) # Copy and rename the generated "graph.png" image src = os.path.join(self.wf.base_dir, self.wf.name, 'graph.png') if self.session is not None: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, self.session, 'figures', f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_desc-processing_graph.png' ) else: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, 'figures', f'{self.subject}_rec-SR_id-{self.sr_id}_desc-processing_graph.png' ) # Create the figures/ and parent directories if they do not exist figures_dir = os.path.dirname(dst) os.makedirs(figures_dir, exist_ok=True) # Make the copy iflogger.info(f'\t > Copy {src} to {dst}...') shutil.copy(src=src, dst=dst) # Create dictionary of arguments passed to plugin_args args_dict = { 'maxtasksperchild': 1, 'raise_insufficient': False, 'n_procs': number_of_cores } if (memory is not None) and (memory > 0): args_dict['memory_gb'] = memory if save_profiler_log: args_dict['status_callback'] = log_nodes_cb # Set path to log file and create callback logger callback_log_path = os.path.join(self.wf.base_dir, self.wf.name, 'run_stats.log') import logging import logging.handlers logger = logging.getLogger('callback') logger.setLevel(logging.DEBUG) handler = logging.FileHandler(callback_log_path) logger.addHandler(handler) iflogger.info("**** Processing ****") # datetime object containing current start date and time start = datetime.now() self.run_start_time = start.strftime("%B %d, %Y / %H:%M:%S") print(f" Start date / time : {self.run_start_time}") # Execute the workflow res = self.wf.run(plugin='MultiProc', plugin_args=args_dict) # Copy and rename the workflow execution log src = os.path.join(self.wf.base_dir, "pypeline.log") if self.session is not None: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, self.session, 'logs', f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_log.txt' ) else: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, 'logs', f'{self.subject}_rec-SR_id-{self.sr_id}_log.txt') # Create the logs/ and parent directories if they do not exist logs_dir = os.path.dirname(dst) os.makedirs(logs_dir, exist_ok=True) # Make the copy iflogger.info(f'\t > Copy {src} to {dst}...') shutil.copy(src=src, dst=dst) # datetime object containing current end date and time end = datetime.now() self.run_end_time = end.strftime("%B %d, %Y / %H:%M:%S") print(f" End date / time : {self.run_end_time}") # Compute elapsed running time in minutes and seconds duration = end - start (minutes, seconds) = divmod(duration.total_seconds(), 60) self.run_elapsed_time = f'{int(minutes)} minutes and {int(seconds)} seconds' print(f" Elapsed time: {self.run_end_time}") iflogger.info("**** Write dataset derivatives description ****") for toolbox in ["pymialsrtk", "nipype"]: write_bids_derivative_description(bids_dir=self.bids_dir, deriv_dir=self.output_dir, pipeline_name=toolbox) if save_profiler_log: iflogger.info("**** Workflow execution profiling ****") iflogger.info(f'\t > Creation of report...') generate_gantt_chart(logfile=callback_log_path, cores=number_of_cores, minute_scale=10, space_between_minutes=50, pipeline_name=os.path.join( self.wf.base_dir, self.wf.name)) # Copy and rename the computational resources log src = os.path.join(self.wf.base_dir, self.wf.name, "run_stats.log.html") if self.session is not None: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, self.session, 'logs', f'{self.subject}_{self.session}_rec-SR_id-{self.sr_id}_desc-profiling_log.html' ) else: dst = os.path.join( self.output_dir, '-'.join(["pymialsrtk", __version__]), self.subject, 'logs', f'{self.subject}_rec-SR_id-{self.sr_id}_desc-profiling_log.html' ) # Make the copy iflogger.info(f'\t > Copy {src} to {dst}...') shutil.copy(src=src, dst=dst) iflogger.info("**** Super-resolution HTML report creation ****") self.create_subject_report() return res