def test_wrong_model_dataset_generator(shapes_sample_config): """Test a wrong model and wrong dataset """ dataset = "fake" model = "conquer_the_world" IMAGE_SIZE = 10 BATCH_SIZE = 10 datapath = "./tests/data/" + dataset + "/training" config = utils.read_config(shapes_sample_config) # wrong dataset name with pytest.raises(ValueError) as excinfo: generator.create_generator( dataset, "featdet", datapath, IMAGE_SIZE, BATCH_SIZE, config["labels"], ) assert str(excinfo.value) == "Wrong dataset name {}".format(dataset) # wrong model name with pytest.raises(ValueError) as excinfo: generator.create_generator("shapes", model, datapath, IMAGE_SIZE, BATCH_SIZE, config["labels"]) expected_failure_msg = "Wrong model name {} (choose amongst {})".format( model, AVAILABLE_MODELS) assert str(excinfo.value) == expected_failure_msg
def get_data(folders, dataset, model, image_size, batch_size): """On the file system, recover `dataset` that can solve `model` problem Parameters ---------- folders : dict Dictionary of useful folders that indicates paths to data dataset : str Name of the used dataset (*e.g.* `shapes` or `mapillary`) model : str Name of the addressed research problem (*e.g.* `feature_detection` or `semantic_segmentation`) image_size : int Size of the images, in pixel (height=width) batch_size : int Number of images in each batch Returns ------- tuple Number of labels in the dataset, as well as training and validation data generators """ # Data gathering if os.path.isfile(folders["training_config"]): train_config = utils.read_config(folders["training_config"]) label_ids = [ x["id"] for x in train_config["labels"] if x["is_evaluate"] ] train_generator = generator.create_generator( dataset, model, folders["training"], image_size, batch_size, train_config["labels"], seed=SEED, ) else: raise FileNotFoundError( "There is no training data with the given parameters. Please " "generate a valid dataset before calling the training program.") if os.path.isfile(folders["validation_config"]): validation_generator = generator.create_generator( dataset, model, folders["validation"], image_size, batch_size, train_config["labels"], seed=SEED, ) else: raise FileNotFoundError( "There is no validation data with the given parameters. Please " "generate a valid dataset before calling the training program.") nb_labels = len(label_ids) return nb_labels, train_generator, validation_generator
def test_semseg_tanzania_generator(tanzania_image_size, tanzania_sample, tanzania_sample_config, nb_channels): """Test the data generator for the Open AI Tanzania dataset """ BATCH_SIZE = 3 config = utils.read_config(tanzania_sample_config) label_ids = [x["id"] for x in config["labels"]] gen = generator.create_generator( "tanzania", "semseg", tanzania_sample, tanzania_image_size, BATCH_SIZE, config["labels"], ) item = next(gen) assert len(item) == 2 im_shape = item[0].shape assert im_shape == ( BATCH_SIZE, tanzania_image_size, tanzania_image_size, nb_channels, ) label_shape = item[1].shape assert label_shape == ( BATCH_SIZE, tanzania_image_size, tanzania_image_size, len(label_ids), )
def test_semseg_aerial_generator(aerial_image_size, aerial_sample, aerial_sample_config, nb_channels): """Test the data generator for the AerialImage dataset """ BATCH_SIZE = 4 config = utils.read_config(aerial_sample_config) label_ids = [x["id"] for x in config["labels"]] gen = generator.create_generator( "aerial", "semseg", aerial_sample, aerial_image_size, BATCH_SIZE, config["labels"], ) item = next(gen) assert len(item) == 2 im_shape = item[0].shape assert im_shape == ( BATCH_SIZE, aerial_image_size, aerial_image_size, nb_channels, ) label_shape = item[1].shape assert label_shape == ( BATCH_SIZE, aerial_image_size, aerial_image_size, len(label_ids), )
def test_semseg_mapillary_generator( mapillary_image_size, mapillary_sample, mapillary_sample_config, nb_channels, ): """Test the data generator for the Mapillary dataset """ BATCH_SIZE = 10 config = utils.read_config(mapillary_sample_config) label_ids = [x["id"] for x in config["labels"]] gen = generator.create_generator( "mapillary", "semseg", mapillary_sample, mapillary_image_size, BATCH_SIZE, config["labels"], ) item = next(gen) assert len(item) == 2 im_shape = item[0].shape assert im_shape == ( BATCH_SIZE, mapillary_image_size, mapillary_image_size, nb_channels, ) label_shape = item[1].shape assert label_shape == ( BATCH_SIZE, mapillary_image_size, mapillary_image_size, len(label_ids), )
def test_featdet_shape_generator(shapes_image_size, shapes_sample, shapes_sample_config, nb_channels): """Test the data generator for the shape dataset """ BATCH_SIZE = 10 config = utils.read_config(shapes_sample_config) label_ids = [x["id"] for x in config["labels"]] gen = generator.create_generator( "shapes", "featdet", shapes_sample, shapes_image_size, BATCH_SIZE, config["labels"], ) item = next(gen) assert len(item) == 2 im_shape = item[0].shape assert im_shape == ( BATCH_SIZE, shapes_image_size, shapes_image_size, nb_channels, ) label_shape = item[1].shape assert label_shape == (BATCH_SIZE, len(label_ids))
def test_wrong_model_dataset_generator(shapes_sample_config): """Test a wrong model and wrong dataset """ dataset = "fake" model = "conquer_the_world" IMAGE_SIZE = 10 BATCH_SIZE = 10 datapath = ("./tests/data/" + dataset + "/training") config = utils.read_config(shapes_sample_config) # wrong model name with pytest.raises(ValueError) as excinfo: generator.create_generator(dataset, 'feature_detection', datapath, IMAGE_SIZE, BATCH_SIZE, config["labels"]) assert str(excinfo.value) == "Wrong dataset name {}".format(dataset) # wrong model name with pytest.raises(ValueError) as excinfo: generator.create_generator('shapes', model, datapath, IMAGE_SIZE, BATCH_SIZE, config["labels"]) assert str(excinfo.value) == "Wrong model name {}".format(model)
def test_model_training( shapes_image_size, shapes_sample, shapes_sample_config, shapes_temp_dir, shapes_nb_images, ): """Test the training of a simple neural network with Keras API, as well as model inference and trained model backup One big test function to avoid duplicating the training operations (that can be long) """ BATCH_SIZE = 10 NB_EPOCHS = 1 NB_STEPS = shapes_nb_images // BATCH_SIZE config = read_config(shapes_sample_config) label_ids = [x["id"] for x in config["labels"] if x["is_evaluate"]] gen = create_generator( "shapes", "featdet", shapes_sample, shapes_image_size, BATCH_SIZE, config["labels"], ) cnn = FeatureDetectionNetwork("test", image_size=shapes_image_size, nb_labels=len(label_ids)) model = Model(cnn.X, cnn.Y) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["acc"]) hist = model.fit_generator(gen, epochs=NB_EPOCHS, steps_per_epoch=NB_STEPS) assert len(hist.history) == 2 assert all(k in hist.history.keys() for k in ["acc", "loss"]) assert hist.history["acc"][0] >= 0 and hist.history["acc"][0] <= 1 test_image = np.random.randint( 0, 255, [BATCH_SIZE, shapes_image_size, shapes_image_size, 3]) score = model.predict(test_image) assert score.shape == (BATCH_SIZE, len(label_ids)) assert all(0 <= s and s <= 1 for s in score.ravel()) BACKUP_FILENAME = os.path.join( str(shapes_temp_dir), "checkpoints", "test_model_{:02d}.h5".format(NB_EPOCHS), ) model.save(BACKUP_FILENAME) assert os.path.isfile(BACKUP_FILENAME)
def test_featdet_mapillary_generator(mapillary_image_size, mapillary_sample, mapillary_sample_config, nb_channels): """Test the data generator for the Mapillary dataset """ BATCH_SIZE = 10 config = utils.read_config(mapillary_sample_config) label_ids = [x['id'] for x in config["labels"]] gen = generator.create_generator("mapillary", "feature_detection", mapillary_sample, mapillary_image_size, BATCH_SIZE, config["labels"]) item = next(gen) assert (len(item) == 2) im_shape = item[0].shape assert im_shape == (BATCH_SIZE, mapillary_image_size, mapillary_image_size, nb_channels) label_shape = item[1].shape assert label_shape == (BATCH_SIZE, len(label_ids))
def test_semseg_shape_generator(shapes_image_size, shapes_sample, shapes_sample_config, nb_channels): """Test the data generator for the shape dataset """ BATCH_SIZE = 10 config = utils.read_config(shapes_sample_config) label_ids = [x['id'] for x in config["labels"]] gen = generator.create_generator("shapes", "semantic_segmentation", shapes_sample, shapes_image_size, BATCH_SIZE, config["labels"]) item = next(gen) assert len(item) == 2 im_shape = item[0].shape assert im_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size, nb_channels) label_shape = item[1].shape assert label_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size, len(label_ids))
aggregate_value, args.dropout, args.learning_rate, args.learning_rate_decay] instance_name = utils.list_to_str(instance_args, "_") prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset, args.image_size, aggregate_value) # Data gathering if (os.path.isfile(prepro_folder["training_config"]) and os.path.isfile(prepro_folder["validation_config"]) and os.path.isfile(prepro_folder["testing_config"])): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [x['id'] for x in train_config['labels'] if x['is_evaluate']] train_generator = generator.create_generator( args.dataset, args.model, prepro_folder["training"], args.image_size, args.batch_size, label_ids, seed=SEED) validation_generator = generator.create_generator( args.dataset, args.model, prepro_folder["validation"], args.image_size, args.batch_size, label_ids, seed=SEED) test_generator = generator.create_generator( args.dataset, args.model, prepro_folder["testing"],
aggregate_value) if args.dataset == 'aerial': model_input_size = utils.get_image_size_from_tile(args.image_size) else: model_input_size = args.image_size if os.path.isfile(prepro_folder["training_config"]): train_config = utils.read_config(prepro_folder["training_config"]) label_ids = [ x['id'] for x in train_config['labels'] if x['is_evaluate'] ] train_generator = generator.create_generator(args.dataset, args.model, prepro_folder["training"], model_input_size, args.batch_size, train_config['labels'], seed=SEED) else: logger.error(("There is no training data with the given " "parameters. Please generate a valid dataset " "before calling the training program.")) sys.exit(1) if os.path.isfile(prepro_folder["validation_config"]): validation_generator = generator.create_generator( args.dataset, args.model, prepro_folder["validation"], model_input_size,
def get_data(folders, dataset, model, image_size, batch_size): """On the file system, recover `dataset` that can solve `model` problem Parameters ---------- folders : dict Dictionary of useful folders that indicates paths to data dataset : str Name of the used dataset (*e.g.* `shapes` or `mapillary`) model : str Name of the addressed research problem (*e.g.* `feature_detection` or `semantic_segmentation`) image_size : int Size of the images, in pixel (height=width) batch_size : int Number of images in each batch Returns ------- tuple Number of labels in the dataset, as well as training, validation and testing data generators """ # Data gathering if (os.path.isfile(folders["training_config"]) and os.path.isfile(folders["validation_config"]) and os.path.isfile(folders["testing_config"])): train_config = utils.read_config(folders["training_config"]) label_ids = [ x['id'] for x in train_config['labels'] if x['is_evaluate'] ] train_generator = generator.create_generator(dataset, model, folders["training"], image_size, batch_size, train_config["labels"], seed=SEED) validation_generator = generator.create_generator( dataset, model, folders["validation"], image_size, batch_size, train_config["labels"], seed=SEED) test_generator = generator.create_generator(dataset, model, folders["testing"], image_size, batch_size, train_config["labels"], inference=True, seed=SEED) else: utils.logger.error(( "There is no valid data with the specified parameters. " "Please generate a valid dataset before calling the training program." )) sys.exit(1) nb_labels = len(label_ids) return nb_labels, train_generator, validation_generator, test_generator