def main(args): # Grid search model_output = [] for batch_size in args.batch_size: logger.info("Generating data with batch of %s images...", batch_size) # Data generator building prepro_folder = utils.prepare_preprocessed_folder( args.datapath, args.dataset, args.image_size) nb_labels, train_gen, valid_gen = get_data( prepro_folder, args.dataset, args.model, args.image_size, batch_size, ) for parameters in itertools.product( args.dropout, args.network, args.learning_rate, args.learning_rate_decay, ): logger.info("Instance: %s", utils.list_to_str(parameters)) # Data path and repository management dropout, network, learning_rate, learning_rate_decay = parameters instance_args = [ args.name, args.image_size, network, batch_size, dropout, learning_rate, learning_rate_decay, ] instance_name = utils.list_to_str(instance_args, "_") output_folder = utils.prepare_output_folder( args.datapath, args.dataset, args.model, instance_name) # Model running model_output.append( run_model(train_gen, valid_gen, args.model, output_folder, instance_name, args.image_size, nb_labels, args.nb_epochs, args.nb_training_image, args.nb_validation_image, batch_size, *parameters)) logger.info("Instance result: %s", model_output[-1]) # Recover best instance starting from validation accuracy best_instance = max(model_output, key=lambda x: x["val_acc"]) # Save best model output_folder = utils.prepare_output_folder(args.datapath, args.dataset, args.model) instance_name = os.path.join( output_folder, "best-{}-" + str(args.image_size) + ".{}", ) best_instance["model"].save(instance_name.format("model", "h5")) with open(instance_name.format("instance", "json"), "w") as fobj: json.dump( { key: best_instance[key] for key in best_instance if key != "model" }, fobj, ) backend.clear_session()
if __name__=='__main__': # Parse command-line arguments parser = argparse.ArgumentParser(description=("Convolutional Neural Netw" "ork on street-scene images")) parser = add_instance_arguments(parser) parser = add_hyperparameters(parser) parser = add_training_arguments(parser) args = parser.parse_args() # Data path and repository management aggregate_value = "full" if not args.aggregate_label else "aggregated" instance_args = [args.name, args.image_size, args.network, args.batch_size, aggregate_value, args.dropout, args.learning_rate, args.learning_rate_decay] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size, aggregate_value) # Data gathering if (os.path.isfile(prepro_folder["training_config"]) and os.path.isfile(prepro_folder["validation_config"]) and os.path.isfile(prepro_folder["testing_config"])): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [x['id'] for x in train_config['labels'] if x['is_evaluate']] train_generator = generator.create_generator( args.dataset, args.model, prepro_folder["training"], args.image_size, args.batch_size,
def predict( filenames, dataset, problem, datapath="./data", aggregate=False, name=None, network=None, batch_size=None, dropout=None, learning_rate=None, learning_rate_decay=None, output_dir="/tmp/deeposlandia/predicted", ): """Make label prediction on image indicated by ̀filename`, according to considered `problem` Parameters ---------- filenames : str Name of the image files on the file system dataset : str Name of the dataset problem : str Name of the considered model, either `feature_detection` or `semantic_segmentation` datapath : str Relative path of dataset repository aggregate : bool Either or not the labels are aggregated name : str Name of the saved network network : str Name of the chosen architecture, either `simple`, `vgg` or `inception` batch_size : integer Batch size used for training the model dropout : float Dropout rate used for training the model learning_rate : float Learning rate used for training the model learning_rate_decay : float Learning rate decay used for training the model output_dir : str Path of the output directory, where labelled images will be stored (useful only if `problem=semantic_segmentation`) Returns ------- dict Double predictions (between 0 and 1, acts as percentages) regarding each labels """ # `image_paths` is first got as # [[image1, ..., image_i], [image_j, ..., image_n]] image_paths = [glob.glob(f) for f in filenames] # then it is flattened to get a simple list flattened_image_paths = sum(image_paths, []) images = extract_images(flattened_image_paths) model_input_size = images.shape[1] if dataset == "aerial": tile_size = utils.get_tile_size_from_image(model_input_size) else: tile_size = model_input_size aggregate_value = "full" if not aggregate else "aggregated" instance_args = [ name, tile_size, network, batch_size, aggregate_value, dropout, learning_rate, learning_rate_decay, ] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder( datapath, dataset, tile_size, aggregate_value ) if os.path.isfile(prepro_folder["training_config"]): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [ x["id"] for x in train_config["labels"] if x["is_evaluate"] ] nb_labels = len(label_ids) else: logger.error( ( "There is no training data with the given " "parameters. Please generate a valid dataset " "before calling the program." ) ) sys.exit(1) if any([arg is None for arg in instance_args]): logger.info( ("Some arguments are None, " "the best model is considered.") ) output_folder = utils.prepare_output_folder(datapath, dataset, problem) instance_filename = ( "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json" ) instance_path = os.path.join(output_folder, instance_filename) dropout, network = utils.recover_instance(instance_path) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) checkpoint_filename = ( "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5" ) checkpoint_full_path = os.path.join(output_folder, checkpoint_filename) if os.path.isfile(checkpoint_full_path): logger.info("Checkpoint full path : %s", checkpoint_full_path) model.load_weights(checkpoint_full_path) logger.info( "Model weights have been recovered from %s", checkpoint_full_path, ) else: logger.info( ( "No available trained model for this image size" " with optimized hyperparameters. The " "inference will be done on an untrained model" ) ) else: logger.info("All instance arguments are filled out.") output_folder = utils.prepare_output_folder( datapath, dataset, problem, instance_name ) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) checkpoints = [ item for item in os.listdir(output_folder) if "checkpoint-epoch" in item ] if len(checkpoints) > 0: model_checkpoint = max(checkpoints) checkpoint_full_path = os.path.join( output_folder, model_checkpoint ) model.load_weights(checkpoint_full_path) logger.info( "Model weights have been recovered from %s", checkpoint_full_path, ) else: logger.info( ( "No available checkpoint for this configuration. " "The model will be trained from scratch." ) ) y_raw_pred = model.predict(images) result = {} if problem == "feature_detection": label_info = [ (i["category"], utils.GetHTMLColor(i["color"])) for i in train_config["labels"] ] for filename, prediction in zip(flattened_image_paths, y_raw_pred): result[filename] = [ (i[0], 100 * round(float(j), 2), i[1]) for i, j in zip(label_info, prediction) ] return result elif problem == "semantic_segmentation": os.makedirs(output_dir, exist_ok=True) predicted_labels = np.argmax(y_raw_pred, axis=3) encountered_labels = np.unique(predicted_labels) meaningful_labels = [ x for i, x in enumerate(train_config["labels"]) if i in encountered_labels ] labelled_images = np.zeros( shape=np.append(predicted_labels.shape, 3), dtype=np.int8 ) for i in range(nb_labels): labelled_images[predicted_labels == i] = train_config["labels"][i][ "color" ] for predicted_labels, filename in zip( labelled_images, flattened_image_paths ): predicted_image = Image.fromarray(predicted_labels, "RGB") filename = filename.replace(".jpg", ".png") predicted_image_path = os.path.join( output_dir, os.path.basename(filename) ) predicted_image.save(predicted_image_path) result[filename] = os.path.basename(filename) return { "labels": summarize_config(meaningful_labels), "label_images": result, } else: logger.error( ( "Unknown model argument. Please use " "'feature_detection' or 'semantic_segmentation'." ) ) sys.exit(1)
def predict( filenames, dataset, problem, datapath="./data", name=None, network=None, batch_size=None, dropout=None, learning_rate=None, learning_rate_decay=None, output_dir="/tmp/deeposlandia/predicted", ): """Make label prediction on image indicated by ̀filename`, according to considered `problem` Parameters ---------- filenames : str Name of the image files on the file system dataset : str Name of the dataset problem : str Name of the considered model, either `featdet` or `semseg` datapath : str Relative path of dataset repository name : str Name of the saved network network : str Name of the chosen architecture, either `simple`, `vgg` or `inception` batch_size : integer Batch size used for training the model dropout : float Dropout rate used for training the model learning_rate : float Learning rate used for training the model learning_rate_decay : float Learning rate decay used for training the model output_dir : str Path of the output directory, where labelled images will be stored (useful only if `problem=semantic_segmentation`) Returns ------- dict Double predictions (between 0 and 1, acts as percentages) regarding each labels """ # `image_paths` is first got as # [[image1, ..., image_i], [image_j, ..., image_n]] image_paths = [glob.glob(f) for f in filenames] # then it is flattened to get a simple list flattened_image_paths = sum(image_paths, []) images = extract_images(flattened_image_paths) model_input_size = images.shape[1] instance_args = [ name, model_input_size, network, batch_size, dropout, learning_rate, learning_rate_decay, ] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset, model_input_size) if os.path.isfile(prepro_folder["training_config"]): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [ x["id"] for x in train_config["labels"] if x["is_evaluate"] ] nb_labels = len(label_ids) else: raise FileNotFoundError( "There is no training data with the given parameters. " "Please generate a valid dataset before calling the program.") output_folder = utils.prepare_output_folder(datapath, dataset, model_input_size, problem) instance_path = os.path.join(output_folder, output_folder["best-instance"]) dropout, network = utils.recover_instance(instance_path) model = init_model( problem, instance_name, model_input_size, nb_labels, dropout, network, ) if os.path.isfile(output_folder["best-model"]): model.load_weights(output_folder["best-model"]) logger.info( "Model weights have been recovered from %s", output_folder["best-model"], ) else: logger.info( "No available trained model for this image size with optimized hyperparameters. " "The inference will be done on an untrained model") y_raw_pred = model.predict(images, batch_size=2, verbose=1) result = {} if problem == "featdet": label_info = [(i["category"], utils.GetHTMLColor(i["color"])) for i in train_config["labels"]] for filename, prediction in zip(flattened_image_paths, y_raw_pred): result[filename] = [(i[0], 100 * round(float(j), 2), i[1]) for i, j in zip(label_info, prediction)] return result elif problem == "semseg": os.makedirs(output_dir, exist_ok=True) predicted_labels = np.argmax(y_raw_pred, axis=3) encountered_labels = np.unique(predicted_labels) meaningful_labels = [ x for i, x in enumerate(train_config["labels"]) if i in encountered_labels ] labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3), dtype=np.int8) for i in range(nb_labels): labelled_images[predicted_labels == i] = train_config["labels"][i]["color"] for predicted_labels, filename in zip(labelled_images, flattened_image_paths): predicted_image = Image.fromarray(predicted_labels, "RGB") filename = filename.replace(".jpg", ".png") predicted_image_path = os.path.join(output_dir, os.path.basename(filename)) predicted_image.save(predicted_image_path) result[filename] = os.path.basename(filename) return { "labels": summarize_config(meaningful_labels), "label_images": result, } else: raise ValueError( "Unknown model argument. Please use 'featdet' or 'semseg'.")
# Grid search model_output = [] for batch_size in args.batch_size: logger.info("Generating data with batch of %s images...", batch_size) # Data generator building prepro_folder = utils.prepare_preprocessed_folder( args.datapath, args.dataset, args.image_size, aggregate_value) nb_labels, train_gen, valid_gen = get_data(prepro_folder, args.dataset, args.model, model_input_size, batch_size) for parameters in itertools.product(args.dropout, args.network, args.learning_rate, args.learning_rate_decay): logger.info(utils.list_to_str(parameters)) # Data path and repository management dropout, network, learning_rate, learning_rate_decay = parameters instance_args = [ args.name, args.image_size, network, batch_size, aggregate_value, dropout, learning_rate, learning_rate_decay ] instance_name = utils.list_to_str(instance_args, "_") output_folder = utils.prepare_output_folder( args.datapath, args.dataset, args.model, instance_name) # Model running model_output.append( run_model(train_gen, valid_gen, args.model, output_folder, instance_name, model_input_size, aggregate_value, nb_labels, args.nb_epochs, args.nb_training_image, args.nb_validation_image, batch_size, *parameters))