def run(args): files = glob(args.file_regex) out_dir = os.path.abspath(args.out_dir) logger = Logger(out_dir, active_file='extraction_log', overwrite_existing=args.overwrite, print_calling_method=False) logger("Args dump: {}".format(vars(args))) logger("Found {} files matching glob statement".format(len(files))) if len(files) == 0: return channels = ChannelMontageTuple(args.channels, relax=True) renamed_channels = args.rename_channels if renamed_channels and (len(renamed_channels) != len(channels)): raise ValueError("--rename_channels argument must have the same number" " of elements as --channels. Got {} and {}.".format( len(channels), len(renamed_channels) )) logger("Extracting channels {}".format(channels.names)) if renamed_channels: logger("Saving channels under names {}".format(renamed_channels)) logger("Saving .h5 files to '{}'".format(out_dir)) logger("Re-sampling: {}".format(args.resample)) logger("-*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*-") extract( files=files, out_dir=out_dir, channels=channels, renamed_channels=renamed_channels, logger=logger, args=args )
def get_logger(out_dir, overwrite, name="evaluation_log"): """ Returns a Logger object for the given out_dir. The logger will throw an OSError if the dir exists and overwrite=False, in which case the script will terminate with a print message. """ from mpunet.logging import Logger try: logger = Logger(out_dir, active_file=name, overwrite_existing=overwrite, no_sub_folder=True) except OSError: from sys import exit print("[*] A logging file 'logs/{}' already exists. " "If you wish to overwrite this logfile, set the --overwrite " "flag.".format(name)) exit(0) return logger
def get_logger(project_dir, overwrite_existing): """ Initialises and returns a Logger object for a given project directory. If a logfile already exists at the specified location, it will be overwritten if continue_training == True, otherwise raises RuntimeError Args: project_dir: Path to a mpunet project folder overwrite_existing: Whether to overwrite existing logfile in project_dir Returns: A mpunet Logger object initialized in project_dir """ # Define Logger object from mpunet.logging import Logger try: logger = Logger(base_path=project_dir, print_to_screen=True, overwrite_existing=overwrite_existing) except OSError as e: raise RuntimeError("[*] A training session at '%s' already exists." "\n Use the --overwrite flag to " "overwrite." % project_dir) from e return logger
def entry_func(args=None): # Get parser parser = vars(get_parser().parse_args(args)) # Get parser arguments cv_dir = os.path.abspath(parser["CV_dir"]) out_dir = os.path.abspath(parser["out_dir"]) create_folders(out_dir) await_PID = parser["wait_for"] run_split = parser["run_on_split"] start_from = parser["start_from"] or 0 num_jobs = parser["num_jobs"] or 1 # GPU settings num_GPUs = parser["num_GPUs"] force_GPU = parser["force_GPU"] ignore_GPU = parser["ignore_GPU"] monitor_GPUs_every = parser["monitor_GPUs_every"] # User input assertions _assert_force_and_ignore_gpus(force_GPU, ignore_GPU) if run_split: _assert_run_split(start_from, monitor_GPUs_every, num_jobs) # Wait for PID? if await_PID: from mpunet.utils import await_PIDs await_PIDs(await_PID) # Get file paths script = os.path.abspath(parser["script_prototype"]) hparams = os.path.abspath(parser["hparams_prototype"]) no_hparams = parser["no_hparams"] # Get list of folders of CV data to run on cv_folders = get_CV_folders(cv_dir) if run_split is not None: if run_split < 0 or run_split >= len(cv_folders): raise ValueError("--run_on_split should be in range [0-{}], " "got {}".format(len(cv_folders) - 1, run_split)) cv_folders = [cv_folders[run_split]] log_appendix = "_split{}".format(run_split) else: log_appendix = "" # Get a logger object logger = Logger(base_path="./", active_file="output" + log_appendix, print_calling_method=False, overwrite_existing=True) if force_GPU: # Only these GPUs fill be chosen from from mpunet.utils import set_gpu set_gpu(force_GPU) if num_GPUs: # Get GPU sets (up to the number of splits) gpu_sets = get_free_GPU_sets(num_GPUs, ignore_GPU)[:len(cv_folders)] elif not num_jobs or num_jobs < 0: raise ValueError("Should specify a number of jobs to run in parallel " "with the --num_jobs flag when using 0 GPUs pr. " "process (--num_GPUs=0 was set).") else: gpu_sets = ["''"] * parser["num_jobs"] # Get process pool, lock and GPU queue objects lock = Lock() gpu_queue = Queue() for gpu in gpu_sets: gpu_queue.put(gpu) procs = [] if monitor_GPUs_every is not None and monitor_GPUs_every: logger("\nOBS: Monitoring GPU pool every %i seconds\n" % monitor_GPUs_every) # Start a process monitoring new GPU availability over time stop_event = Event() t = Process(target=monitor_GPUs, args=(monitor_GPUs_every, gpu_queue, num_GPUs, ignore_GPU, gpu_sets, stop_event)) t.start() procs.append(t) else: stop_event = None try: for cv_folder in cv_folders[start_from:]: gpus = gpu_queue.get() t = Process(target=run_sub_experiment, args=(cv_folder, out_dir, script, hparams, no_hparams, gpus, gpu_queue, lock, logger)) t.start() procs.append(t) for t in procs: if not t.is_alive(): t.join() except KeyboardInterrupt: for t in procs: t.terminate() if stop_event is not None: stop_event.set() for t in procs: t.join()
def entry_func(args=None): # Project base path args = vars(get_argparser().parse_args(args)) basedir = os.path.abspath(args["project_dir"]) overwrite = args["overwrite"] continue_training = args["continue_training"] eval_prob = args["eval_prob"] await_PID = args["wait_for"] dice_weight = args["dice_weight"] print("Fitting fusion model for project-folder: %s" % basedir) # Minimum images in validation set before also using training images min_val_images = 15 # Fusion model training params epochs = args['epochs'] fm_batch_size = args["batch_size"] # Early stopping params early_stopping = args["early_stopping"] # Wait for PID? if await_PID: from mpunet.utils import await_PIDs await_PIDs(await_PID) # Fetch GPU(s) num_GPUs = args["num_GPUs"] force_gpu = args["force_GPU"] # Wait for free GPU if not force_gpu: await_and_set_free_gpu(N=num_GPUs, sleep_seconds=120) else: set_gpu(force_gpu) # Get logger logger = Logger(base_path=basedir, active_file="train_fusion", overwrite_existing=overwrite) # Get YAML hyperparameters hparams = YAMLHParams(os.path.join(basedir, "train_hparams.yaml")) # Get some key settings n_classes = hparams["build"]["n_classes"] if hparams["build"]["out_activation"] == "linear": # Trained with logit targets? hparams["build"][ "out_activation"] = "softmax" if n_classes > 1 else "sigmoid" # Get views views = np.load("%s/views.npz" % basedir)["arr_0"] del hparams["fit"]["views"] # Get weights and set fusion (output) path weights = get_best_model("%s/model" % basedir) weights_name = os.path.splitext(os.path.split(weights)[-1])[0] fusion_weights = "%s/model/fusion_weights/" \ "%s_fusion_weights.h5" % (basedir, weights_name) create_folders(os.path.split(fusion_weights)[0]) # Log a few things log(logger, hparams, views, weights, fusion_weights) # Check if exists already... if not overwrite and os.path.exists(fusion_weights): from sys import exit print("\n[*] A fusion weights file already exists at '%s'." "\n Use the --overwrite flag to overwrite." % fusion_weights) exit(0) # Load validation data images = ImagePairLoader(**hparams["val_data"], logger=logger) is_validation = {m.identifier: True for m in images} # Define random sets of images to train on simul. (cant be all due # to memory constraints) image_IDs = [m.identifier for m in images] if len(images) < min_val_images: # Pick N random training images diff = min_val_images - len(images) logger("Adding %i training images to set" % diff) # Load the training data and pick diff images train = ImagePairLoader(**hparams["train_data"], logger=logger) indx = np.random.choice(np.arange(len(train)), diff, replace=diff > len(train)) # Add the images to the image set set train_add = [train[i] for i in indx] for m in train_add: is_validation[m.identifier] = False image_IDs.append(m.identifier) images.add_images(train_add) # Append to length % sub_size == 0 sub_size = args["images_per_round"] rest = int(sub_size * np.ceil(len(image_IDs) / sub_size)) - len(image_IDs) if rest: image_IDs += list(np.random.choice(image_IDs, rest, replace=False)) # Shuffle and split random.shuffle(image_IDs) sets = [ set(s) for s in np.array_split(image_IDs, len(image_IDs) / sub_size) ] assert (contains_all_images(sets, image_IDs)) # Define fusion model (named 'org' to store reference to orgiginal model if # multi gpu model is created below) fusion_model = FusionModel(n_inputs=len(views), n_classes=n_classes, weight=dice_weight, logger=logger, verbose=False) if continue_training: fusion_model.load_weights(fusion_weights) print("\n[OBS] CONTINUED TRAINING FROM:\n", fusion_weights) import tensorflow as tf with tf.distribute.MirroredStrategy().scope(): # Define model unet = init_model(hparams["build"], logger) print("\n[*] Loading weights: %s\n" % weights) unet.load_weights(weights, by_name=True) # Compile the model logger("Compiling...") metrics = [ "sparse_categorical_accuracy", sparse_fg_precision, sparse_fg_recall ] fusion_model.compile(optimizer=Adam(lr=1e-3), loss=fusion_model.loss, metrics=metrics) fusion_model._log() try: _run_fusion_training(sets, logger, hparams, min_val_images, is_validation, views, n_classes, unet, fusion_model, early_stopping, fm_batch_size, epochs, eval_prob, fusion_weights) except KeyboardInterrupt: pass finally: if not os.path.exists(os.path.split(fusion_weights)[0]): os.mkdir(os.path.split(fusion_weights)[0]) # Save fusion model weights # OBS: Must be original model if multi-gpu is performed! fusion_model.save_weights(fusion_weights)
def run(args, gpu_mon): """ Run the script according to args - Please refer to the argparser. args: args: (Namespace) command-line arguments gpu_mon: (GPUMonitor) Initialized mpunet GPUMonitor object """ assert_args(args) from mpunet.logging import Logger from utime.train import Trainer from utime.hyperparameters import YAMLHParams from utime.utils.scriptutils import (assert_project_folder, make_multi_gpu_model) from utime.utils.scriptutils.train import (get_train_and_val_datasets, get_h5_train_and_val_datasets, get_data_queues, get_generators, find_and_set_gpus, get_samples_per_epoch, save_final_weights) project_dir = os.path.abspath("./") assert_project_folder(project_dir) if args.overwrite and not args.continue_training: from mpunet.bin.train import remove_previous_session remove_previous_session(project_dir) # Get logger object logger = Logger(project_dir, overwrite_existing=args.overwrite, append_existing=args.continue_training, log_prefix=args.log_file_prefix) logger("Args dump: {}".format(vars(args))) # Settings depending on --preprocessed flag. if args.preprocessed: yaml_path = utime.Defaults.get_pre_processed_hparams_path(project_dir) dataset_func = get_h5_train_and_val_datasets train_queue_type = 'eager' val_queue_type = 'eager' else: yaml_path = utime.Defaults.get_hparams_path(project_dir) dataset_func = get_train_and_val_datasets train_queue_type = args.train_queue_type val_queue_type = args.val_queue_type # Load hparams hparams = YAMLHParams(yaml_path, logger=logger) update_hparams_with_command_line_arguments(hparams, args) # Initialize and load (potentially multiple) datasets train_datasets, val_datasets = dataset_func(hparams, args.no_val, args.train_on_val, logger) if args.just: keep_n_random(*train_datasets, *val_datasets, keep=args.just, logger=logger) # Get a data loader queue object for each dataset train_datasets_queues = get_data_queues( datasets=train_datasets, queue_type=train_queue_type, max_loaded_per_dataset=args.max_loaded_per_dataset, num_access_before_reload=args.num_access_before_reload, logger=logger) if val_datasets: val_dataset_queues = get_data_queues( datasets=val_datasets, queue_type=val_queue_type, max_loaded_per_dataset=args.max_loaded_per_dataset, num_access_before_reload=args.num_access_before_reload, study_loader=getattr(train_datasets_queues[0], 'study_loader', None), logger=logger) else: val_dataset_queues = None # Get sequence generators for all datasets train_seq, val_seq = get_generators(train_datasets_queues, val_dataset_queues=val_dataset_queues, hparams=hparams) # Add additional (inferred) parameters to parameter file hparams.set_value("build", "n_classes", train_seq.n_classes, overwrite=True) hparams.set_value("build", "batch_shape", train_seq.batch_shape, overwrite=True) hparams.save_current() if args.continue_training: # Prepare the project directory for continued training. # Please refer to the function docstring for details from utime.models.model_init import prepare_for_continued_training parameter_file = prepare_for_continued_training( hparams=hparams, project_dir=project_dir, logger=logger) else: parameter_file = args.initialize_from # most often is None # Set the GPU visibility num_GPUs = find_and_set_gpus(gpu_mon, args.force_GPU, args.num_GPUs) # Initialize and potential load parameters into the model from utime.models.model_init import init_model, load_from_file org_model = init_model(hparams["build"], logger) if parameter_file: load_from_file(org_model, parameter_file, logger, by_name=True) model, org_model = make_multi_gpu_model(org_model, num_GPUs) # Prepare a trainer object. Takes care of compiling and training. trainer = Trainer(model, org_model=org_model, logger=logger) import tensorflow as tf trainer.compile_model(n_classes=hparams["build"].get("n_classes"), reduction=tf.keras.losses.Reduction.NONE, **hparams["fit"]) # Fit the model on a number of samples as specified in args samples_pr_epoch = get_samples_per_epoch(train_seq, args.max_train_samples_per_epoch) _ = trainer.fit(train=train_seq, val=val_seq, train_samples_per_epoch=samples_pr_epoch, **hparams["fit"]) # Save weights to project_dir/model/{final_weights_file_name}.h5 # Note: these weights are rarely used, as a checkpoint callback also saves # weights to this directory through training save_final_weights(project_dir, model=model, file_name=args.final_weights_file_name, logger=logger)
def run(args, gpu_mon): """ Run the script according to args - Please refer to the argparser. args: args: (Namespace) command-line arguments gpu_mon: (GPUMonitor) Initialized MultiPlanarUNet GPUMonitor object """ assert_args(args) from mpunet.logging import Logger from utime.train import Trainer from utime.hyperparameters import YAMLHParams from utime.utils.scriptutils import (assert_project_folder, make_multi_gpu_model) from utime.utils.scriptutils.train import (get_train_and_val_datasets, get_generators, find_and_set_gpus, get_samples_per_epoch, save_final_weights) project_dir = os.path.abspath("./") assert_project_folder(project_dir) if args.overwrite and not args.continue_training: from mpunet.bin.train import remove_previous_session remove_previous_session(project_dir) # Get logger object logger = Logger(project_dir, overwrite_existing=args.overwrite or args.continue_training, log_prefix=args.log_file_prefix) logger("Args dump: {}".format(vars(args))) # Load hparams hparams = YAMLHParams(os.path.join(project_dir, "hparams.yaml"), logger=logger) update_hparams_with_command_line_arguments(hparams, args) # Initialize and load (potentially multiple) datasets datasets, no_val = get_train_and_val_datasets(hparams, args.no_val, args.train_on_val, logger) # Load data in all datasets for data in datasets: for d in data: d.load(1 if args.just_one else None) d.pairs = d.loaded_pairs # remove the other pairs # Get sequence generators for all datasets train_seq, val_seq = get_generators(datasets, hparams, no_val) # Add additional (inferred) parameters to parameter file hparams.set_value("build", "n_classes", train_seq.n_classes, overwrite=True) hparams.set_value("build", "batch_shape", train_seq.batch_shape, overwrite=True) hparams.save_current() if args.continue_training: # Prepare the project directory for continued training. # Please refer to the function docstring for details from utime.models.model_init import prepare_for_continued_training parameter_file = prepare_for_continued_training( hparams=hparams, project_dir=project_dir, logger=logger) else: parameter_file = args.initialize_from # most often is None # Set the GPU visibility num_GPUs = find_and_set_gpus(gpu_mon, args.force_GPU, args.num_GPUs) # Initialize and potential load parameters into the model from utime.models.model_init import init_model, load_from_file org_model = init_model(hparams["build"], logger) if parameter_file: load_from_file(org_model, parameter_file, logger, by_name=True) model, org_model = make_multi_gpu_model(org_model, num_GPUs) # Prepare a trainer object. Takes care of compiling and training. trainer = Trainer(model, org_model=org_model, logger=logger) trainer.compile_model(n_classes=hparams["build"].get("n_classes"), **hparams["fit"]) # Fit the model on a number of samples as specified in args samples_pr_epoch = get_samples_per_epoch(train_seq, args.max_train_samples_per_epoch, args.val_samples_per_epoch) _ = trainer.fit(train=train_seq, val=val_seq, train_samples_per_epoch=samples_pr_epoch[0], val_samples_per_epoch=samples_pr_epoch[1], **hparams["fit"]) # Save weights to project_dir/model/{final_weights_file_name}.h5 # Note: these weights are rarely used, as a checkpoint callback also saves # weights to this directory through training save_final_weights(project_dir, model=model, file_name=args.final_weights_file_name, logger=logger)
def run(args): cv_dir = os.path.abspath(args.CV_dir) # Get list of folders of CV data to run on cv_folders = get_CV_folders(cv_dir) assert_args(args, n_splits=len(cv_folders)) out_dir = os.path.abspath(args.out_dir) hparams_dir = os.path.abspath(args.hparams_prototype_dir) prepare_hparams_dir(hparams_dir) create_folders(out_dir) # Wait for PID? if args.wait_for: from mpunet.utils import await_PIDs await_PIDs(args.wait_for) if args.run_on_split is not None: cv_folders = [cv_folders[args.run_on_split]] log_appendix = "_split{}".format(args.run_on_split) else: log_appendix = "" # Get a logger object logger = Logger(base_path="./", active_file="output" + log_appendix, print_calling_method=False, overwrite_existing=True) if args.force_GPU: # Only these GPUs fill be chosen from from mpunet.utils import set_gpu set_gpu(args.force_GPU) if args.num_GPUs: # Get GPU sets (up to the number of splits) gpu_sets = get_free_GPU_sets(args.num_GPUs, args.ignore_GPU)[:len(cv_folders)] elif not args.num_jobs or args.num_jobs < 0: raise ValueError("Should specify a number of jobs to run in parallel " "with the --num_jobs flag when using 0 GPUs pr. " "process (--num_GPUs=0 was set).") else: gpu_sets = ["''"] * args.num_jobs # Get process pool, lock and GPU queue objects lock = Lock() gpu_queue = Queue() for gpu in gpu_sets: gpu_queue.put(gpu) # Get file paths script = os.path.abspath(args.script_prototype) # Get GPU monitor process running_processes, stop_event = start_gpu_monitor_process( args, gpu_queue, gpu_sets, logger) try: for cv_folder in cv_folders[args.start_from:]: gpus = gpu_queue.get() t = Process(target=run_sub_experiment, args=(cv_folder, out_dir, script, hparams_dir, args.no_hparams, gpus, gpu_queue, lock, logger)) t.start() running_processes.append(t) for t in running_processes: if not t.is_alive(): t.join() except KeyboardInterrupt: for t in running_processes: t.terminate() if stop_event is not None: stop_event.set() for t in running_processes: t.join()
def run(args): """ Run the script according to args - Please refer to the argparser. args: args: (Namespace) command-line arguments """ from mpunet.logging import Logger from utime.hyperparameters import YAMLHParams from utime.utils.scriptutils import assert_project_folder from utime.utils.scriptutils import get_splits_from_all_datasets project_dir = os.path.abspath("./") assert_project_folder(project_dir) # Get logger object logger = Logger(project_dir + "/preprocessing_logs", active_file='preprocessing', overwrite_existing=args.overwrite, no_sub_folder=True) logger("Args dump: {}".format(vars(args))) # Load hparams hparams = YAMLHParams(Defaults.get_hparams_path(project_dir), logger=logger, no_version_control=True) # Initialize and load (potentially multiple) datasets datasets = get_splits_from_all_datasets(hparams, splits_to_load=args.dataset_splits, logger=logger, return_data_hparams=True) # Check if file exists, and overwrite if specified if os.path.exists(args.out_path): if args.overwrite: os.remove(args.out_path) else: from sys import exit logger("Out file at {} exists, and --overwrite was not set." "".format(args.out_path)) exit(0) # Create dataset hparams output directory out_dir = Defaults.get_pre_processed_data_configurations_dir(project_dir) if not os.path.exists(out_dir): os.mkdir(out_dir) with ThreadPoolExecutor(args.num_threads) as pool: with h5py.File(args.out_path, "w") as h5_file: for dataset, dataset_hparams in datasets: # Create a new version of the dataset-specific hyperparameters # that contain only the fields needed for pre-processed data name = dataset[0].identifier.split("/")[0] hparams_out_path = os.path.join(out_dir, name + ".yaml") copy_dataset_hparams(dataset_hparams, hparams_out_path) # Update paths to dataset hparams in main hparams file hparams.set_value(subdir='datasets', name=name, value=hparams_out_path, overwrite=True) # Save the hyperparameters to the pre-processed main hparams hparams.save_current( Defaults.get_pre_processed_hparams_path(project_dir)) # Process each dataset for split in dataset: # Add this split to the dataset-specific hparams add_dataset_entry(hparams_out_path, args.out_path, split.identifier.split("/")[-1].lower(), split.period_length_sec) # Overwrite potential load time channel sampler to None split.set_load_time_channel_sampling_groups(None) # Create dataset group split_group = h5_file.create_group(split.identifier) # Run the preprocessing process_func = partial(preprocess_study, split_group) logger.print_to_screen = True logger("Preprocessing dataset:", split) logger.print_to_screen = False n_pairs = len(split.pairs) for i, _ in enumerate(pool.map(process_func, split.pairs)): print(" {}/{}".format(i + 1, n_pairs), end='\r', flush=True) print("")