def test_get_optimizer_not_dict(): """ Test assertion error raised if config passed not dict. """ with pytest.raises(AssertionError): optimizer.get_optimizer(["name"])
def test_get_optimizer_error(): """ Assert value_error raised if unknown optimizer type is passed to get_optimizer func, """ with pytest.raises(ValueError): optimizer.get_optimizer({"name": "random"})
def test_get_optimizer_adam(): """Assert that correct keras optimizer is returned when passing the adam string into get_optimizer function """ dict_config = {"name": "adam", "adam": {}} opt_get = optimizer.get_optimizer(dict_config) assert isinstance(opt_get, tensorflow.python.keras.optimizer_v2.adam.Adam)
def test_get_optimizer_rms(): """ Assert that correct keras optimizer is returned when passing the rms string into get_optimizer function """ dict_config = {"name": "rms", "rms": {}} opt_get = optimizer.get_optimizer(dict_config) assert isinstance(opt_get, tensorflow.python.keras.optimizer_v2.rmsprop.RMSprop)
def test_get_optimizer_sgd(): """ Assert that correct keras optimizer is returned when passing the sgd string into get_optimizer function """ dict_config = {"name": "sgd", "sgd": {}} opt_get = optimizer.get_optimizer(dict_config) assert isinstance( opt_get, tensorflow.python.keras.optimizer_v2.gradient_descent.SGD )
def main(gpu, gpu_allow_growth, ckpt_path, mode, batch_size, log): # sanity check if not ckpt_path.endswith( ".ckpt"): # should be like log_folder/save/xxx.ckpt raise ValueError("checkpoint path should end with .ckpt") # env vars os.environ['CUDA_VISIBLE_DEVICES'] = gpu os.environ[ "TF_FORCE_GPU_ALLOW_GROWTH"] = "false" if gpu_allow_growth else "true" # load config config = config_parser.load("/".join(ckpt_path.split("/")[:-2]) + "/config.yaml") data_config = config["data"] if data_config["name"] == "mr_us": data_config["sample_label"]["train"] = "all" data_config["sample_label"]["test"] = "all" tf_data_config = config["tf"]["data"] tf_data_config["batch_size"] = batch_size tf_opt_config = config["tf"]["opt"] tf_model_config = config["tf"]["model"] tf_loss_config = config["tf"]["loss"] log_folder_name = log if log != "" else datetime.now().strftime( "%Y%m%d-%H%M%S") log_dir = config["log_dir"][:-1] if config["log_dir"][ -1] == "/" else config["log_dir"] log_dir = log_dir + "/" + log_folder_name # data data_loader = load.get_data_loader(data_config, mode) dataset = data_loader.get_dataset_and_preprocess(training=False, repeat=False, **tf_data_config) # optimizer optimizer = opt.get_optimizer(tf_opt_config) # model model = network.build_model( moving_image_size=data_loader.moving_image_shape, fixed_image_size=data_loader.fixed_image_shape, index_size=data_loader.num_indices, batch_size=tf_data_config["batch_size"], tf_model_config=tf_model_config, tf_loss_config=tf_loss_config) # metrics model.compile(optimizer=optimizer, loss=label_loss.get_similarity_fn( config=tf_loss_config["similarity"]["label"]), metrics=[ metric.MeanDiceScore(), metric.MeanCentroidDistance( grid_size=data_loader.fixed_image_shape) ]) # load weights model.load_weights(ckpt_path) # predict fixed_grid_ref = layer_util.get_reference_grid( grid_size=data_loader.fixed_image_shape) predict(data_loader=data_loader, dataset=dataset, fixed_grid_ref=fixed_grid_ref, model=model, save_dir=log_dir + "/test")
def train(gpu: str, config_path: list, gpu_allow_growth: bool, ckpt_path: str, log_dir: str): """ Function to train a model :param gpu: str, which local gpu to use to train :param config_path: str, path to configuration set up :param gpu_allow_growth: bool, whether or not to allocate whole GPU memory to training :param ckpt_path: str, where to store training ckpts :param log_dir: str, where to store logs in training """ # env vars os.environ["CUDA_VISIBLE_DEVICES"] = gpu os.environ[ "TF_FORCE_GPU_ALLOW_GROWTH"] = "true" if gpu_allow_growth else "false" # load config config, log_dir = init(config_path, log_dir, ckpt_path) dataset_config = config["dataset"] preprocess_config = config["train"]["preprocess"] optimizer_config = config["train"]["optimizer"] model_config = config["train"]["model"] loss_config = config["train"]["loss"] num_epochs = config["train"]["epochs"] save_period = config["train"]["save_period"] histogram_freq = save_period # data data_loader_train = get_data_loader(dataset_config, "train") if data_loader_train is None: raise ValueError( "Training data loader is None. Probably the data dir path is not defined." ) data_loader_val = get_data_loader(dataset_config, "valid") dataset_train = data_loader_train.get_dataset_and_preprocess( training=True, repeat=True, **preprocess_config) dataset_val = (data_loader_val.get_dataset_and_preprocess( training=False, repeat=True, **preprocess_config) if data_loader_val is not None else None) dataset_size_train = data_loader_train.num_samples dataset_size_val = (data_loader_val.num_samples if data_loader_val is not None else None) steps_per_epoch_train = max( dataset_size_train // preprocess_config["batch_size"], 1) steps_per_epoch_valid = (max( dataset_size_val // preprocess_config["batch_size"], 1) if data_loader_val is not None else None) strategy = tf.distribute.MirroredStrategy() with strategy.scope(): # model model = build_model( moving_image_size=data_loader_train.moving_image_shape, fixed_image_size=data_loader_train.fixed_image_shape, index_size=data_loader_train.num_indices, labeled=dataset_config["labeled"], batch_size=preprocess_config["batch_size"], model_config=model_config, loss_config=loss_config, ) # compile optimizer = opt.get_optimizer(optimizer_config) model.compile(optimizer=optimizer) # load weights if ckpt_path != "": model.load_weights(ckpt_path) # train # callbacks tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=log_dir, histogram_freq=histogram_freq) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=log_dir + "/save/weights-epoch{epoch:d}.ckpt", save_weights_only=True, period=save_period, ) # it's necessary to define the steps_per_epoch and validation_steps to prevent errors like # BaseCollectiveExecutor::StartAbort Out of range: End of sequence model.fit( x=dataset_train, steps_per_epoch=steps_per_epoch_train, epochs=num_epochs, validation_data=dataset_val, validation_steps=steps_per_epoch_valid, callbacks=[tensorboard_callback, checkpoint_callback], ) data_loader_train.close() if data_loader_val is not None: data_loader_val.close()
def predict( gpu, gpu_allow_growth, ckpt_path, mode, batch_size, log_dir, sample_label, config_path, ): """ Function to predict some metrics from the saved model and logging results. :param gpu: str, which env gpu to use. :param gpu_allow_growth: bool, whether to allow gpu growth or not :param ckpt_path: str, where model is stored, should be like log_folder/save/xxx.ckpt :param mode: which mode to load the data ?? :param batch_size: int, batch size to perform predictions in :param log_dir: str, path to store logs :param sample_label: :param config_path: to overwrite the default config """ logging.error("TODO sample_label is not used in predict") # env vars os.environ["CUDA_VISIBLE_DEVICES"] = gpu os.environ[ "TF_FORCE_GPU_ALLOW_GROWTH"] = "false" if gpu_allow_growth else "true" # load config config, log_dir = init(log_dir, ckpt_path, config_path) dataset_config = config["dataset"] preprocess_config = config["train"]["preprocess"] preprocess_config["batch_size"] = batch_size optimizer_config = config["train"]["optimizer"] model_config = config["train"]["model"] loss_config = config["train"]["loss"] # data data_loader = load.get_data_loader(dataset_config, mode) if data_loader is None: raise ValueError( "Data loader for prediction is None. Probably the data dir path is not defined." ) dataset = data_loader.get_dataset_and_preprocess(training=False, repeat=False, **preprocess_config) # optimizer optimizer = opt.get_optimizer(optimizer_config) # model model = build_model( moving_image_size=data_loader.moving_image_shape, fixed_image_size=data_loader.fixed_image_shape, index_size=data_loader.num_indices, labeled=dataset_config["labeled"], batch_size=preprocess_config["batch_size"], model_config=model_config, loss_config=loss_config, ) # metrics model.compile(optimizer=optimizer) # load weights # https://stackoverflow.com/questions/58289342/tf2-0-translation-model-error-when-restoring-the-saved-model-unresolved-objec model.load_weights(ckpt_path).expect_partial() # predict fixed_grid_ref = layer_util.get_reference_grid( grid_size=data_loader.fixed_image_shape) predict_on_dataset( dataset=dataset, fixed_grid_ref=fixed_grid_ref, model=model, save_dir=log_dir + "/test", ) data_loader.close()
def main(gpu, config_path, gpu_allow_growth, ckpt_path, log): # env vars os.environ["CUDA_VISIBLE_DEVICES"] = gpu os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true" if gpu_allow_growth else "false" # load config config = config_parser.load(config_path) data_config = config["data"] tf_data_config = config["tf"]["data"] tf_opt_config = config["tf"]["opt"] tf_model_config = config["tf"]["model"] tf_loss_config = config["tf"]["loss"] num_epochs = config["tf"]["epochs"] save_period = config["tf"]["save_period"] histogram_freq = config["tf"]["histogram_freq"] log_dir = config["log_dir"][:-1] if config["log_dir"][-1] == "/" else config["log_dir"] # output log_folder_name = log if log != "" else datetime.now().strftime("%Y%m%d-%H%M%S") log_dir = log_dir + "/" + log_folder_name checkpoint_init_path = ckpt_path if checkpoint_init_path != "": if not checkpoint_init_path.endswith(".ckpt"): raise ValueError("checkpoint path should end with .ckpt") # backup config if not os.path.exists(log_dir): os.makedirs(log_dir) config_parser.save(config=config, out_dir=log_dir) # data data_loader_train = load.get_data_loader(data_config, "train") data_loader_val = load.get_data_loader(data_config, "valid") dataset_train = data_loader_train.get_dataset_and_preprocess(training=True, repeat=True, **tf_data_config) dataset_val = data_loader_val.get_dataset_and_preprocess(training=False, repeat=True, **tf_data_config) dataset_size_train = data_loader_train.num_images dataset_size_val = data_loader_val.num_images # optimizer optimizer = opt.get_optimizer(tf_opt_config) # callbacks tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=histogram_freq) checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=log_dir + "/save/weights-epoch{epoch:d}.ckpt", save_weights_only=True, period=save_period) strategy = tf.distribute.MirroredStrategy() with strategy.scope(): # model model = network.build_model(moving_image_size=data_loader_train.moving_image_shape, fixed_image_size=data_loader_train.fixed_image_shape, index_size=data_loader_train.num_indices, batch_size=tf_data_config["batch_size"], tf_model_config=tf_model_config, tf_loss_config=tf_loss_config) model.summary() # metrics model.compile(optimizer=optimizer, loss=label_loss.get_similarity_fn(config=tf_loss_config["similarity"]["label"]), metrics=[metric.MeanDiceScore(), metric.MeanCentroidDistance(grid_size=data_loader_train.fixed_image_shape), metric.MeanForegroundProportion(pred=False), metric.MeanForegroundProportion(pred=True), ]) print(model.summary()) # load weights if checkpoint_init_path != "": model.load_weights(checkpoint_init_path) # train # it's necessary to define the steps_per_epoch and validation_steps to prevent errors like # BaseCollectiveExecutor::StartAbort Out of range: End of sequence model.fit( x=dataset_train, steps_per_epoch=dataset_size_train // tf_data_config["batch_size"], epochs=num_epochs, validation_data=dataset_val, validation_steps=dataset_size_val // tf_data_config["batch_size"], callbacks=[tensorboard_callback, checkpoint_callback], )