def get_image_pair_loader(args, hparams, out_dir): """ TODO """ from mpunet.image import ImagePairLoader, ImagePair if not args.f: # No single file was specified with -f flag, load the desired dataset dataset = args.dataset.replace("_data", "") + "_data" image_pair_loader = ImagePairLoader(predict_mode=args.no_eval, **hparams[dataset]) else: predict_mode = not bool(args.l) image_pair_loader = ImagePairLoader(predict_mode=predict_mode, initialize_empty=True) image_pair_loader.add_image(ImagePair(args.f, args.l)) # Put image pairs into a dict and remove from image_pair_loader to gain # more control with garbage collection image_pair_dict = { image.identifier: image for image in image_pair_loader.images } if vars(args)["continue"]: # Remove images that were already predicted image_pair_dict = remove_already_predicted(image_pair_dict, out_dir) return image_pair_loader, image_pair_dict
def entry_func(args=None): # Project base path args = vars(get_argparser().parse_args(args)) basedir = os.path.abspath(args["project_dir"]) overwrite = args["overwrite"] continue_training = args["continue_training"] eval_prob = args["eval_prob"] await_PID = args["wait_for"] dice_weight = args["dice_weight"] print("Fitting fusion model for project-folder: %s" % basedir) # Minimum images in validation set before also using training images min_val_images = 15 # Fusion model training params epochs = args['epochs'] fm_batch_size = args["batch_size"] # Early stopping params early_stopping = args["early_stopping"] # Wait for PID? if await_PID: from mpunet.utils import await_PIDs await_PIDs(await_PID) # Fetch GPU(s) num_GPUs = args["num_GPUs"] force_gpu = args["force_GPU"] # Wait for free GPU if not force_gpu: await_and_set_free_gpu(N=num_GPUs, sleep_seconds=120) else: set_gpu(force_gpu) # Get logger logger = Logger(base_path=basedir, active_file="train_fusion", overwrite_existing=overwrite) # Get YAML hyperparameters hparams = YAMLHParams(os.path.join(basedir, "train_hparams.yaml")) # Get some key settings n_classes = hparams["build"]["n_classes"] if hparams["build"]["out_activation"] == "linear": # Trained with logit targets? hparams["build"][ "out_activation"] = "softmax" if n_classes > 1 else "sigmoid" # Get views views = np.load("%s/views.npz" % basedir)["arr_0"] del hparams["fit"]["views"] # Get weights and set fusion (output) path weights = get_best_model("%s/model" % basedir) weights_name = os.path.splitext(os.path.split(weights)[-1])[0] fusion_weights = "%s/model/fusion_weights/" \ "%s_fusion_weights.h5" % (basedir, weights_name) create_folders(os.path.split(fusion_weights)[0]) # Log a few things log(logger, hparams, views, weights, fusion_weights) # Check if exists already... if not overwrite and os.path.exists(fusion_weights): from sys import exit print("\n[*] A fusion weights file already exists at '%s'." "\n Use the --overwrite flag to overwrite." % fusion_weights) exit(0) # Load validation data images = ImagePairLoader(**hparams["val_data"], logger=logger) is_validation = {m.identifier: True for m in images} # Define random sets of images to train on simul. (cant be all due # to memory constraints) image_IDs = [m.identifier for m in images] if len(images) < min_val_images: # Pick N random training images diff = min_val_images - len(images) logger("Adding %i training images to set" % diff) # Load the training data and pick diff images train = ImagePairLoader(**hparams["train_data"], logger=logger) indx = np.random.choice(np.arange(len(train)), diff, replace=diff > len(train)) # Add the images to the image set set train_add = [train[i] for i in indx] for m in train_add: is_validation[m.identifier] = False image_IDs.append(m.identifier) images.add_images(train_add) # Append to length % sub_size == 0 sub_size = args["images_per_round"] rest = int(sub_size * np.ceil(len(image_IDs) / sub_size)) - len(image_IDs) if rest: image_IDs += list(np.random.choice(image_IDs, rest, replace=False)) # Shuffle and split random.shuffle(image_IDs) sets = [ set(s) for s in np.array_split(image_IDs, len(image_IDs) / sub_size) ] assert (contains_all_images(sets, image_IDs)) # Define fusion model (named 'org' to store reference to orgiginal model if # multi gpu model is created below) fusion_model = FusionModel(n_inputs=len(views), n_classes=n_classes, weight=dice_weight, logger=logger, verbose=False) if continue_training: fusion_model.load_weights(fusion_weights) print("\n[OBS] CONTINUED TRAINING FROM:\n", fusion_weights) import tensorflow as tf with tf.distribute.MirroredStrategy().scope(): # Define model unet = init_model(hparams["build"], logger) print("\n[*] Loading weights: %s\n" % weights) unet.load_weights(weights, by_name=True) # Compile the model logger("Compiling...") metrics = [ "sparse_categorical_accuracy", sparse_fg_precision, sparse_fg_recall ] fusion_model.compile(optimizer=Adam(lr=1e-3), loss=fusion_model.loss, metrics=metrics) fusion_model._log() try: _run_fusion_training(sets, logger, hparams, min_val_images, is_validation, views, n_classes, unet, fusion_model, early_stopping, fm_batch_size, epochs, eval_prob, fusion_weights) except KeyboardInterrupt: pass finally: if not os.path.exists(os.path.split(fusion_weights)[0]): os.mkdir(os.path.split(fusion_weights)[0]) # Save fusion model weights # OBS: Must be original model if multi-gpu is performed! fusion_model.save_weights(fusion_weights)
def _run_fusion_training(sets, logger, hparams, min_val_images, is_validation, views, n_classes, unet, fusion_model, early_stopping, fm_batch_size, epochs, eval_prob, fusion_weights_path): """ TODO """ for _round, _set in enumerate(sets): s = "Set %i/%i:\n%s" % (_round + 1, len(sets), _set) logger("\n%s" % highlighted(s)) # Reload data images = ImagePairLoader(**hparams["val_data"]) if len(images) < min_val_images: images.add_images(ImagePairLoader(**hparams["train_data"])) # Get list of ImagePair objects to run on image_set_dict = { m.identifier: m for m in images if m.identifier in _set } # Set scaler and bg values images.set_scaler_and_bg_values( bg_value=hparams.get_from_anywhere('bg_value'), scaler=hparams.get_from_anywhere('scaler'), compute_now=False) # Init LazyQueue and get its sequencer from mpunet.sequences.utils import get_sequence seq = get_sequence(data_queue=images, is_validation=True, views=views, **hparams["fit"], **hparams["build"]) # Fetch points from the set images points_collection = [] targets_collection = [] N_im = len(image_set_dict) for num_im, image_id in enumerate(list(image_set_dict.keys())): logger("") logger( highlighted("(%i/%i) Running on %s (%s)" % (num_im + 1, N_im, image_id, "val" if is_validation[image_id] else "train"))) with seq.image_pair_queue.get_image_by_id(image_id) as image: # Get voxel grid in real space voxel_grid_real_space = get_voxel_grid_real_space(image) # Get array to store predictions across all views targets = image.labels.reshape(-1, 1) points = np.empty(shape=(len(targets), len(views), n_classes), dtype=np.float32) points.fill(np.nan) # Predict on all views for k, v in enumerate(views): print("\n%s" % "View: %s" % v) points[:, k, :] = predict_and_map( model=unet, seq=seq, image=image, view=v, voxel_grid_real_space=voxel_grid_real_space, n_planes='same+20', targets=targets, eval_prob=eval_prob).reshape(-1, n_classes) # add to collections points_collection.append(points) targets_collection.append(targets) print(image.is_loaded) # Stack points into one matrix logger("Stacking points...") X, y = stack_collections(points_collection, targets_collection) # Shuffle train print("Shuffling points...") X, y = shuffle(X, y) print("Getting validation set...") val_ind = int(0.20 * X.shape[0]) X_val, y_val = X[:val_ind], y[:val_ind] X, y = X[val_ind:], y[val_ind:] # Prepare dice score callback for validation data val_cb = ValDiceScores((X_val, y_val), n_classes, 50000, logger) # Callbacks cbs = [ val_cb, CSVLogger(filename="logs/fusion_training.csv", separator=",", append=True), PrintLayerWeights(fusion_model.layers[-1], every=1, first=1000, per_epoch=True, logger=logger) ] es = EarlyStopping(monitor='val_dice', min_delta=0.0, patience=early_stopping, verbose=1, mode='max') cbs.append(es) # Start training try: fusion_model.fit(X, y, batch_size=fm_batch_size, epochs=epochs, callbacks=cbs, verbose=1) except KeyboardInterrupt: pass fusion_model.save_weights(fusion_weights_path)
def predict_single(image, model, hparams, verbose=1): """ A generic prediction function that sets up a ImagePairLoader object for the given image, prepares the image and predicts. Note that this function should only be used for convinience in scripts that work on single images at a time anyway, as batch-preparing the entire ImagePairLoader object prior to prediction is faster. NOTE: Only works with iso_live intrp modes at this time """ mode = hparams["fit"]["intrp_style"].lower() assert mode in ("iso_live", "iso_live_3d") # Prepare image for prediction kwargs = hparams["fit"] kwargs.update(hparams["build"]) # Set verbose memory verb_mem = kwargs["verbose"] kwargs["verbose"] = verbose # Create a ImagePairLoader with only the given file from mpunet.image import ImagePairLoader image_pair_loader = ImagePairLoader(predict_mode=True, initialize_empty=True, no_log=bool(verbose)) image_pair_loader.add_image(image) # Get N classes n_classes = kwargs["n_classes"] if mode == "iso_live": # Add views if SMMV model kwargs["views"] = np.load(hparams.project_path + "/views.npz")["arr_0"] # Get sequence object sequence = image_pair_loader.get_sequencer(**kwargs) # Get voxel grid in real space voxel_grid_real_space = get_voxel_grid_real_space(image) # Prepare tensor to store combined prediction d = image.image.shape predicted = np.empty(shape=(len(kwargs["views"]), d[0], d[1], d[2], n_classes), dtype=np.float32) print("Predicting on brain hyper-volume of shape:", predicted.shape) for n_view, v in enumerate(kwargs["views"]): print("\nView %i/%i: %s" % (n_view + 1, len(kwargs["views"]), v)) # Sample the volume along the view X, y, grid, inv_basis = sequence.get_view_from(image.id, v, n_planes="same+20") # Predict on volume using model pred = predict_volume(model, X, axis=2) # Map the real space coordiante predictions to nearest # real space coordinates defined on voxel grid predicted[n_view] = map_real_space_pred(pred, grid, inv_basis, voxel_grid_real_space, method="nearest") else: predicted = pred_3D_iso( model=model, sequence=image_pair_loader.get_sequencer(**kwargs), image=image, extra_boxes="3x", min_coverage=None) # Revert verbose mem kwargs["verbose"] = verb_mem return predicted
def entry_func(args=None): # Get command line arguments args = vars(get_argparser().parse_args(args)) base_dir = os.path.abspath(args["project_dir"]) _file = args["f"] label = args["l"] N_extra = args["extra"] try: N_extra = int(N_extra) except ValueError: pass # Get settings from YAML file from mpunet.hyperparameters import YAMLHParams hparams = YAMLHParams(os.path.join(base_dir, "train_hparams.yaml")) # Set strides hparams["fit"]["strides"] = args["strides"] if not _file: try: # Data specified from command line? data_dir = os.path.abspath(args["data_dir"]) # Set with default sub dirs hparams["test_data"] = { "base_dir": data_dir, "img_subdir": "images", "label_subdir": "labels" } except (AttributeError, TypeError): data_dir = hparams["test_data"]["base_dir"] else: data_dir = False out_dir = os.path.abspath(args["out_dir"]) overwrite = args["overwrite"] predict_mode = args["no_eval"] save_only_pred = args["save_only_pred"] # Check if valid dir structures validate_folders(base_dir, data_dir, out_dir, overwrite) # Import all needed modules (folder is valid at this point) import numpy as np from mpunet.image import ImagePairLoader, ImagePair from mpunet.utils import get_best_model, create_folders, \ pred_to_class, await_and_set_free_gpu, set_gpu from mpunet.utils.fusion import predict_3D_patches, predict_3D_patches_binary, pred_3D_iso from mpunet.logging import init_result_dict_3D, save_all_3D from mpunet.evaluate import dice_all from mpunet.bin.predict import save_nii_files # Fetch GPU(s) num_GPUs = args["num_GPUs"] force_gpu = args["force_GPU"] # Wait for free GPU if force_gpu == -1: await_and_set_free_gpu(N=num_GPUs, sleep_seconds=240) else: set_gpu(force_gpu) # Read settings from the project hyperparameter file dim = hparams["build"]["dim"] n_classes = hparams["build"]["n_classes"] mode = hparams["fit"]["intrp_style"] # Set ImagePairLoader object if not _file: image_pair_loader = ImagePairLoader(predict_mode=predict_mode, **hparams["test_data"]) else: predict_mode = not bool(label) image_pair_loader = ImagePairLoader(predict_mode=predict_mode, initialize_empty=True) image_pair_loader.add_image(ImagePair(_file, label)) all_images = { image.identifier: image for image in image_pair_loader.images } # Set scaler and bg values image_pair_loader.set_scaler_and_bg_values( bg_value=hparams.get_from_anywhere('bg_value'), scaler=hparams.get_from_anywhere('scaler'), compute_now=False) # Init LazyQueue and get its sequencer from mpunet.sequences.utils import get_sequence seq = get_sequence(data_queue=image_pair_loader, is_validation=True, **hparams["fit"], **hparams["build"]) """ Define UNet model """ from mpunet.models import model_initializer hparams["build"]["batch_size"] = 1 unet = model_initializer(hparams, False, base_dir) model_path = get_best_model(base_dir + "/model") unet.load_weights(model_path) # Evaluate? if not predict_mode: # Prepare dictionary to store results in pd df results, detailed_res = init_result_dict_3D(all_images, n_classes) # Save to check correct format save_all_3D(results, detailed_res, out_dir) # Define result paths nii_res_dir = os.path.join(out_dir, "nii_files") create_folders(nii_res_dir) image_ids = sorted(all_images) for n_image, image_id in enumerate(image_ids): print("\n[*] Running on: %s" % image_id) with seq.image_pair_queue.get_image_by_id(image_id) as image_pair: if mode.lower() == "iso_live_3d": pred = pred_3D_iso(model=unet, sequence=seq, image=image_pair, extra_boxes=N_extra, min_coverage=None) else: # Predict on volume using model if n_classes > 1: pred = predict_3D_patches(model=unet, patches=seq, image=image_pair, N_extra=N_extra) else: pred = predict_3D_patches_binary(model=unet, patches=seq, image_id=image_id, N_extra=N_extra) if not predict_mode: # Get patches for the current image y = image_pair.labels # Calculate dice score print("Mean dice: ", end="", flush=True) p = pred_to_class(pred, img_dims=3, has_batch_dim=False) dices = dice_all(y, p, n_classes=n_classes, ignore_zero=True) mean_dice = dices[~np.isnan(dices)].mean() print("Dices: ", dices) print("%s (n=%i)" % (mean_dice, len(dices))) # Add to results results[image_id] = [mean_dice] detailed_res[image_id] = dices # Overwrite with so-far results save_all_3D(results, detailed_res, out_dir) # Save results save_nii_files(p, image_pair, nii_res_dir, save_only_pred) if not predict_mode: # Write final results save_all_3D(results, detailed_res, out_dir)
def predict_single(image, model, hparams, verbose=1): """ A generic prediction function that sets up a ImagePairLoader object for the given image, prepares the image and predicts. Note that this function should only be used for convinience in scripts that work on single images at a time anyway, as batch-preparing the entire ImagePairLoader object prior to prediction is faster. NOTE: Only works with iso_live intrp modes at this time """ mode = hparams["fit"]["intrp_style"].lower() assert mode in ("iso_live", "iso_live_3d") # Create a ImagePairLoader with only the given file from mpunet.image import ImagePairLoader image_pair_loader = ImagePairLoader(predict_mode=True, initialize_empty=True, no_log=bool(verbose)) image_pair_loader.add_image(image) # Set scaler and bg values image_pair_loader.set_scaler_and_bg_values( bg_value=hparams.get_from_anywhere('bg_value'), scaler=hparams.get_from_anywhere('scaler'), compute_now=False) if mode == "iso_live": # Init LazyQueue and get its sequencer seq = get_sequence(data_queue=image_pair_loader, views=np.load(hparams.project_path + "/views.npz")["arr_0"], is_validation=True, **hparams["fit"], **hparams["build"]) with seq.image_pair_queue.get_image_by_id(image.identifier) as image: # Get voxel grid in real space voxel_grid_real_space = get_voxel_grid_real_space(image) # Prepare tensor to store combined prediction d = image.image.shape predicted = np.empty(shape=(len(seq.views), d[0], d[1], d[2], seq.n_classes), dtype=np.float32) print("Predicting on brain hyper-volume of shape:", predicted.shape) for n_view, v in enumerate(seq.views): print("\nView %i/%i: %s" % (n_view + 1, len(seq.views), v)) # Sample the volume along the view X, y, grid, inv_basis = seq.get_view_from(image, v, n_planes="same+20") # Predict on volume using model pred = predict_volume(model, X, axis=2) # Map the real space coordiante predictions to nearest # real space coordinates defined on voxel grid predicted[n_view] = map_real_space_pred(pred, grid, inv_basis, voxel_grid_real_space, method="nearest") else: # Init LazyQueue and get its sequencer seq = get_sequence(data_queue=image_pair_loader, is_validation=True, **hparams["fit"], **hparams["build"]) predicted = pred_3D_iso(model=model, sequence=seq, image=image, extra_boxes="3x", min_coverage=None) return predicted
def _base_loader_func(hparams, just_one, no_val, logger, mtype): """ Base loader function used for all models. This function performs a series of actions: 1) Loads train, val and test data according to hparams 2) Performs a hparam audit on the training + validation images 3) If any audited parameters were not manually specified, updates the hparams dict with the audited values and updates the YAML file on disk 4) If just_one, discards all but the first training and validation images 5) Initializes a ImageQueue object on the training and validation data if needed. Args: hparams: A mpunet.train.YAMLHParams object just_one: A bool specifying whether to keep only the first train and validation samples (for quick testing purposes) no_val: A bool specifying whether to omit validation data entirely Note: This setting applies even if validation data is specified in the YAMLHparams object logger: A mpunet.logger object mtype: A string identifier for the dimensionality of the model, currently either '2d', '3d' (upper/lower ignored) Returns: train_data: An ImagePairLoader object storing the training images val_data: An ImagePairLoader object storing the validation images, or an 'empty' ImagePairLoader storing no images if no_val=True logger: The passed logger object or a ScreenLogger object auditor: An auditor object storing statistics on the training data """ # Get basic ScreenLogger if no logger is passed logger = logger or ScreenLogger() logger("Looking for images...") # Get data loaders train_data = ImagePairLoader(logger=logger, **hparams["train_data"]) val_data = ImagePairLoader(logger=logger, **hparams["val_data"]) # Audit lab_paths = train_data.label_paths + val_data.label_paths auditor = Auditor(train_data.image_paths + val_data.image_paths, nii_lab_paths=lab_paths, logger=logger, dim_3d=hparams.get_from_anywhere("dim") or 64, hparams=hparams) # Fill hparams with audited values, if not specified manually auditor.fill(hparams, mtype) # Add augmented data? if hparams.get("aug_data"): aug_data = hparams["aug_data"] if "include" not in aug_data: logger.warn("Found 'aug_data' group, but the group does not " "contain the key 'include', which is required in " "version 2.0 and above. OBS: Not including aug data!") elif aug_data["include"]: logger("\n[*] Adding augmented data with weight ", aug_data["sample_weight"]) train_data.add_images(ImagePairLoader(logger=logger, **aug_data)) if just_one: # For testing purposes, run only on one train and one val image? logger("[**NOTTICE**] Only running on first train & val samples.") train_data.images = [train_data.images[0]] val_data.images = [val_data.images[0]] if no_val: # Run without performing validation (even if specified in param file) val_data.images = [] # Set queue object if necessary train_data.set_queue(hparams["train_data"].get("max_load")) val_data.set_queue(hparams["val_data"].get("max_load")) return train_data, val_data, logger, auditor