def test_wrong_model_dataset_generator(shapes_sample_config):
    """Test a wrong model and wrong dataset
    """
    dataset = "fake"
    model = "conquer_the_world"
    IMAGE_SIZE = 10
    BATCH_SIZE = 10
    datapath = "./tests/data/" + dataset + "/training"
    config = utils.read_config(shapes_sample_config)

    # wrong dataset name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator(
            dataset,
            "featdet",
            datapath,
            IMAGE_SIZE,
            BATCH_SIZE,
            config["labels"],
        )
    assert str(excinfo.value) == "Wrong dataset name {}".format(dataset)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator("shapes", model, datapath, IMAGE_SIZE,
                                   BATCH_SIZE, config["labels"])
    expected_failure_msg = "Wrong model name {} (choose amongst {})".format(
        model, AVAILABLE_MODELS)
    assert str(excinfo.value) == expected_failure_msg
Exemple #2
0
def get_data(folders, dataset, model, image_size, batch_size):
    """On the file system, recover `dataset` that can solve `model` problem

    Parameters
    ----------
    folders : dict
        Dictionary of useful folders that indicates paths to data
    dataset : str
        Name of the used dataset (*e.g.* `shapes` or `mapillary`)
    model : str
        Name of the addressed research problem (*e.g.* `feature_detection` or
    `semantic_segmentation`)
    image_size : int
        Size of the images, in pixel (height=width)
    batch_size : int
        Number of images in each batch

    Returns
    -------
    tuple
        Number of labels in the dataset, as well as training and validation
    data generators

    """
    # Data gathering
    if os.path.isfile(folders["training_config"]):
        train_config = utils.read_config(folders["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        train_generator = generator.create_generator(
            dataset,
            model,
            folders["training"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED,
        )
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. Please "
            "generate a valid dataset before calling the training program.")
    if os.path.isfile(folders["validation_config"]):
        validation_generator = generator.create_generator(
            dataset,
            model,
            folders["validation"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED,
        )
    else:
        raise FileNotFoundError(
            "There is no validation data with the given parameters. Please "
            "generate a valid dataset before calling the training program.")
    nb_labels = len(label_ids)
    return nb_labels, train_generator, validation_generator
def test_semseg_tanzania_generator(tanzania_image_size, tanzania_sample,
                                   tanzania_sample_config, nb_channels):
    """Test the data generator for the Open AI Tanzania dataset
    """
    BATCH_SIZE = 3
    config = utils.read_config(tanzania_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "tanzania",
        "semseg",
        tanzania_sample,
        tanzania_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        tanzania_image_size,
        tanzania_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        tanzania_image_size,
        tanzania_image_size,
        len(label_ids),
    )
def test_semseg_aerial_generator(aerial_image_size, aerial_sample,
                                 aerial_sample_config, nb_channels):
    """Test the data generator for the AerialImage dataset
    """
    BATCH_SIZE = 4
    config = utils.read_config(aerial_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "aerial",
        "semseg",
        aerial_sample,
        aerial_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        aerial_image_size,
        aerial_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        aerial_image_size,
        aerial_image_size,
        len(label_ids),
    )
def test_semseg_mapillary_generator(
    mapillary_image_size,
    mapillary_sample,
    mapillary_sample_config,
    nb_channels,
):
    """Test the data generator for the Mapillary dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(mapillary_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "mapillary",
        "semseg",
        mapillary_sample,
        mapillary_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        mapillary_image_size,
        mapillary_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        mapillary_image_size,
        mapillary_image_size,
        len(label_ids),
    )
def test_featdet_shape_generator(shapes_image_size, shapes_sample,
                                 shapes_sample_config, nb_channels):
    """Test the data generator for the shape dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(shapes_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "shapes",
        "featdet",
        shapes_sample,
        shapes_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        shapes_image_size,
        shapes_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, len(label_ids))
def test_wrong_model_dataset_generator(shapes_sample_config):
    """Test a wrong model and wrong dataset
    """
    dataset = "fake"
    model = "conquer_the_world"
    IMAGE_SIZE = 10
    BATCH_SIZE = 10
    datapath = ("./tests/data/" + dataset + "/training")
    config = utils.read_config(shapes_sample_config)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator(dataset, 'feature_detection', datapath,
                                   IMAGE_SIZE, BATCH_SIZE, config["labels"])
    assert str(excinfo.value) == "Wrong dataset name {}".format(dataset)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator('shapes', model, datapath, IMAGE_SIZE,
                                   BATCH_SIZE, config["labels"])
    assert str(excinfo.value) == "Wrong model name {}".format(model)
def test_model_training(
    shapes_image_size,
    shapes_sample,
    shapes_sample_config,
    shapes_temp_dir,
    shapes_nb_images,
):
    """Test the training of a simple neural network with Keras API, as well as
    model inference and trained model backup

    One big test function to avoid duplicating the training operations (that
    can be long)
    """
    BATCH_SIZE = 10
    NB_EPOCHS = 1
    NB_STEPS = shapes_nb_images // BATCH_SIZE
    config = read_config(shapes_sample_config)
    label_ids = [x["id"] for x in config["labels"] if x["is_evaluate"]]
    gen = create_generator(
        "shapes",
        "featdet",
        shapes_sample,
        shapes_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    cnn = FeatureDetectionNetwork("test",
                                  image_size=shapes_image_size,
                                  nb_labels=len(label_ids))
    model = Model(cnn.X, cnn.Y)
    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["acc"])
    hist = model.fit_generator(gen, epochs=NB_EPOCHS, steps_per_epoch=NB_STEPS)
    assert len(hist.history) == 2
    assert all(k in hist.history.keys() for k in ["acc", "loss"])
    assert hist.history["acc"][0] >= 0 and hist.history["acc"][0] <= 1

    test_image = np.random.randint(
        0, 255, [BATCH_SIZE, shapes_image_size, shapes_image_size, 3])
    score = model.predict(test_image)
    assert score.shape == (BATCH_SIZE, len(label_ids))
    assert all(0 <= s and s <= 1 for s in score.ravel())

    BACKUP_FILENAME = os.path.join(
        str(shapes_temp_dir),
        "checkpoints",
        "test_model_{:02d}.h5".format(NB_EPOCHS),
    )
    model.save(BACKUP_FILENAME)
    assert os.path.isfile(BACKUP_FILENAME)
def test_featdet_mapillary_generator(mapillary_image_size, mapillary_sample,
                                     mapillary_sample_config, nb_channels):
    """Test the data generator for the Mapillary dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(mapillary_sample_config)
    label_ids = [x['id'] for x in config["labels"]]
    gen = generator.create_generator("mapillary", "feature_detection",
                                     mapillary_sample, mapillary_image_size,
                                     BATCH_SIZE, config["labels"])
    item = next(gen)
    assert (len(item) == 2)
    im_shape = item[0].shape
    assert im_shape == (BATCH_SIZE, mapillary_image_size, mapillary_image_size,
                        nb_channels)
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, len(label_ids))
def test_semseg_shape_generator(shapes_image_size, shapes_sample,
                                shapes_sample_config, nb_channels):
    """Test the data generator for the shape dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(shapes_sample_config)
    label_ids = [x['id'] for x in config["labels"]]
    gen = generator.create_generator("shapes", "semantic_segmentation",
                                     shapes_sample, shapes_image_size,
                                     BATCH_SIZE, config["labels"])
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size,
                        nb_channels)
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size,
                           len(label_ids))
Exemple #11
0
                     aggregate_value, args.dropout,
                     args.learning_rate, args.learning_rate_decay]
    instance_name = utils.list_to_str(instance_args, "_")
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset,
                                                      args.image_size,
                                                      aggregate_value)

    # Data gathering
    if (os.path.isfile(prepro_folder["training_config"]) and os.path.isfile(prepro_folder["validation_config"])
        and os.path.isfile(prepro_folder["testing_config"])):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [x['id'] for x in train_config['labels'] if x['is_evaluate']]
        train_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["training"],
            args.image_size,
            args.batch_size,
            label_ids,
            seed=SEED)
        validation_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["validation"],
            args.image_size,
            args.batch_size,
            label_ids,
            seed=SEED)
        test_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["testing"],
Exemple #12
0
                                                      aggregate_value)

    if args.dataset == 'aerial':
        model_input_size = utils.get_image_size_from_tile(args.image_size)
    else:
        model_input_size = args.image_size

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x['id'] for x in train_config['labels'] if x['is_evaluate']
        ]
        train_generator = generator.create_generator(args.dataset,
                                                     args.model,
                                                     prepro_folder["training"],
                                                     model_input_size,
                                                     args.batch_size,
                                                     train_config['labels'],
                                                     seed=SEED)
    else:
        logger.error(("There is no training data with the given "
                      "parameters. Please generate a valid dataset "
                      "before calling the training program."))
        sys.exit(1)

    if os.path.isfile(prepro_folder["validation_config"]):
        validation_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["validation"],
            model_input_size,
Exemple #13
0
def get_data(folders, dataset, model, image_size, batch_size):
    """On the file system, recover `dataset` that can solve `model` problem

    Parameters
    ----------
    folders : dict
        Dictionary of useful folders that indicates paths to data
    dataset : str
        Name of the used dataset (*e.g.* `shapes` or `mapillary`)
    model : str
        Name of the addressed research problem (*e.g.* `feature_detection` or `semantic_segmentation`)
    image_size : int
        Size of the images, in pixel (height=width)
    batch_size : int
        Number of images in each batch

    Returns
    -------
    tuple
        Number of labels in the dataset, as well as training, validation and testing data generators

    """
    # Data gathering
    if (os.path.isfile(folders["training_config"])
            and os.path.isfile(folders["validation_config"])
            and os.path.isfile(folders["testing_config"])):
        train_config = utils.read_config(folders["training_config"])
        label_ids = [
            x['id'] for x in train_config['labels'] if x['is_evaluate']
        ]
        train_generator = generator.create_generator(dataset,
                                                     model,
                                                     folders["training"],
                                                     image_size,
                                                     batch_size,
                                                     train_config["labels"],
                                                     seed=SEED)
        validation_generator = generator.create_generator(
            dataset,
            model,
            folders["validation"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED)
        test_generator = generator.create_generator(dataset,
                                                    model,
                                                    folders["testing"],
                                                    image_size,
                                                    batch_size,
                                                    train_config["labels"],
                                                    inference=True,
                                                    seed=SEED)
    else:
        utils.logger.error((
            "There is no valid data with the specified parameters. "
            "Please generate a valid dataset before calling the training program."
        ))
        sys.exit(1)
    nb_labels = len(label_ids)
    return nb_labels, train_generator, validation_generator, test_generator