예제 #1
0
def test_wrong_model_dataset_generator(shapes_sample_config):
    """Test a wrong model and wrong dataset
    """
    dataset = "fake"
    model = "conquer_the_world"
    IMAGE_SIZE = 10
    BATCH_SIZE = 10
    datapath = "./tests/data/" + dataset + "/training"
    config = utils.read_config(shapes_sample_config)

    # wrong dataset name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator(
            dataset,
            "featdet",
            datapath,
            IMAGE_SIZE,
            BATCH_SIZE,
            config["labels"],
        )
    assert str(excinfo.value) == "Wrong dataset name {}".format(dataset)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator("shapes", model, datapath, IMAGE_SIZE,
                                   BATCH_SIZE, config["labels"])
    expected_failure_msg = "Wrong model name {} (choose amongst {})".format(
        model, AVAILABLE_MODELS)
    assert str(excinfo.value) == expected_failure_msg
예제 #2
0
def test_semseg_aerial_generator(aerial_image_size, aerial_sample,
                                 aerial_sample_config, nb_channels):
    """Test the data generator for the AerialImage dataset
    """
    BATCH_SIZE = 4
    config = utils.read_config(aerial_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "aerial",
        "semseg",
        aerial_sample,
        aerial_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        aerial_image_size,
        aerial_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        aerial_image_size,
        aerial_image_size,
        len(label_ids),
    )
예제 #3
0
def test_semseg_tanzania_generator(tanzania_image_size, tanzania_sample,
                                   tanzania_sample_config, nb_channels):
    """Test the data generator for the Open AI Tanzania dataset
    """
    BATCH_SIZE = 3
    config = utils.read_config(tanzania_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "tanzania",
        "semseg",
        tanzania_sample,
        tanzania_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        tanzania_image_size,
        tanzania_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        tanzania_image_size,
        tanzania_image_size,
        len(label_ids),
    )
예제 #4
0
def test_featdet_shape_generator(shapes_image_size, shapes_sample,
                                 shapes_sample_config, nb_channels):
    """Test the data generator for the shape dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(shapes_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "shapes",
        "featdet",
        shapes_sample,
        shapes_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        shapes_image_size,
        shapes_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, len(label_ids))
예제 #5
0
def test_semseg_mapillary_generator(
    mapillary_image_size,
    mapillary_sample,
    mapillary_sample_config,
    nb_channels,
):
    """Test the data generator for the Mapillary dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(mapillary_sample_config)
    label_ids = [x["id"] for x in config["labels"]]
    gen = generator.create_generator(
        "mapillary",
        "semseg",
        mapillary_sample,
        mapillary_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (
        BATCH_SIZE,
        mapillary_image_size,
        mapillary_image_size,
        nb_channels,
    )
    label_shape = item[1].shape
    assert label_shape == (
        BATCH_SIZE,
        mapillary_image_size,
        mapillary_image_size,
        len(label_ids),
    )
예제 #6
0
    def build_glossary(self, config_filename):
        """Read the Mapillary glossary stored as a json file at the data
        repository root

        Parameters
        ----------
        config_filename : str
            String designing the relative path of the dataset glossary
        (based on Mapillary dataset)
        """
        glossary = utils.read_config(config_filename)
        if "labels" not in glossary:
            logger.error("There is no 'label' key in the provided glossary.")
            return None
        for lab_id, label in enumerate(glossary["labels"]):
            if "aggregate" in config_filename:
                self.add_label(
                    lab_id,
                    label["name"],
                    label["color"],
                    label["evaluate"],
                    label["family"],
                    label["contains_id"],
                    label["contains"],
                )
            else:
                name_items = label["name"].split("--")
                self.add_label(
                    lab_id,
                    name_items[-1],
                    label["color"],
                    label["evaluate"],
                    name_items[0],
                )
예제 #7
0
def get_labels(datapath, dataset, tile_size):
    """Extract labels from the `dataset` glossary, according to the
    preprocessed version of the dataset

    Parameters
    ----------
    datapath : str
        Path of the data on the file system
    dataset : str
        Name of the dataset
    tile_size : int
        Size of preprocessed images, in pixels

    Returns
    -------
    list
        List of dictionnaries that describes the dataset labels
    """
    prepro_folder = utils.prepare_preprocessed_folder(
        datapath,
        dataset,
        tile_size,
    )
    if os.path.isfile(prepro_folder["testing_config"]):
        test_config = utils.read_config(prepro_folder["testing_config"])
    else:
        raise ValueError(("There is no testing data with the given "
                          "parameters. Please generate a valid dataset "
                          "before calling the program."))
    return [l for l in test_config["labels"] if l["is_evaluate"]]
def test_model_backup_loading(shapes_image_size, shapes_sample_config,
                              shapes_temp_dir):
    """Test the model checkpoint recovering
    """
    config = read_config(shapes_sample_config)
    label_ids = [x['id'] for x in config['labels'] if x['is_evaluate']]

    cnn = FeatureDetectionNetwork("test",
                                  image_size=shapes_image_size,
                                  nb_labels=len(label_ids))
    model = Model(cnn.X, cnn.Y)
    old_weights = model.get_weights()
    checkpoint_path = os.path.join(str(shapes_temp_dir), "checkpoints")
    if os.path.isdir(checkpoint_path):
        checkpoints = os.listdir(checkpoint_path)
        if len(checkpoints) > 0:
            model_checkpoint = max(checkpoints)
            trained_model_epoch = int(model_checkpoint[-5:-3])
            checkpoint_complete_path = os.path.join(checkpoint_path,
                                                    model_checkpoint)
            model.load_weights(checkpoint_complete_path)
    else:
        trained_model_epoch = 0
    new_weights = model.get_weights()
    assert trained_model_epoch > 0
    assert len(old_weights) == len(new_weights)
    assert old_weights[0].shape == new_weights[0].shape
    # Test if old and new weights are different (at least for one layer)
    assert any(not np.allclose(lhs, rhs)
               for lhs, rhs in zip(old_weights, new_weights))
예제 #9
0
def get_data(folders, dataset, model, image_size, batch_size):
    """On the file system, recover `dataset` that can solve `model` problem

    Parameters
    ----------
    folders : dict
        Dictionary of useful folders that indicates paths to data
    dataset : str
        Name of the used dataset (*e.g.* `shapes` or `mapillary`)
    model : str
        Name of the addressed research problem (*e.g.* `feature_detection` or
    `semantic_segmentation`)
    image_size : int
        Size of the images, in pixel (height=width)
    batch_size : int
        Number of images in each batch

    Returns
    -------
    tuple
        Number of labels in the dataset, as well as training and validation
    data generators

    """
    # Data gathering
    if os.path.isfile(folders["training_config"]):
        train_config = utils.read_config(folders["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        train_generator = generator.create_generator(
            dataset,
            model,
            folders["training"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED,
        )
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. Please "
            "generate a valid dataset before calling the training program.")
    if os.path.isfile(folders["validation_config"]):
        validation_generator = generator.create_generator(
            dataset,
            model,
            folders["validation"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED,
        )
    else:
        raise FileNotFoundError(
            "There is no validation data with the given parameters. Please "
            "generate a valid dataset before calling the training program.")
    nb_labels = len(label_ids)
    return nb_labels, train_generator, validation_generator
예제 #10
0
def test_model_training(
    shapes_image_size,
    shapes_sample,
    shapes_sample_config,
    shapes_temp_dir,
    shapes_nb_images,
):
    """Test the training of a simple neural network with Keras API, as well as
    model inference and trained model backup

    One big test function to avoid duplicating the training operations (that
    can be long)
    """
    BATCH_SIZE = 10
    NB_EPOCHS = 1
    NB_STEPS = shapes_nb_images // BATCH_SIZE
    config = read_config(shapes_sample_config)
    label_ids = [x["id"] for x in config["labels"] if x["is_evaluate"]]
    gen = create_generator(
        "shapes",
        "featdet",
        shapes_sample,
        shapes_image_size,
        BATCH_SIZE,
        config["labels"],
    )
    cnn = FeatureDetectionNetwork("test",
                                  image_size=shapes_image_size,
                                  nb_labels=len(label_ids))
    model = Model(cnn.X, cnn.Y)
    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["acc"])
    hist = model.fit_generator(gen, epochs=NB_EPOCHS, steps_per_epoch=NB_STEPS)
    assert len(hist.history) == 2
    assert all(k in hist.history.keys() for k in ["acc", "loss"])
    assert hist.history["acc"][0] >= 0 and hist.history["acc"][0] <= 1

    test_image = np.random.randint(
        0, 255, [BATCH_SIZE, shapes_image_size, shapes_image_size, 3])
    score = model.predict(test_image)
    assert score.shape == (BATCH_SIZE, len(label_ids))
    assert all(0 <= s and s <= 1 for s in score.ravel())

    BACKUP_FILENAME = os.path.join(
        str(shapes_temp_dir),
        "checkpoints",
        "test_model_{:02d}.h5".format(NB_EPOCHS),
    )
    model.save(BACKUP_FILENAME)
    assert os.path.isfile(BACKUP_FILENAME)
예제 #11
0
def main(datadir):
    """Generate a new config.json file with aggregated labels.

    Parameters
    ----------
    datadir : str

    Returns
    -------
    dict
    """
    config = utils.read_config(os.path.join(datadir, 'config.json'))
    df = config_as_dataframe(config)
    agg_config = aggregate_config(config, df)
    return agg_config
예제 #12
0
def test_featdet_mapillary_generator(mapillary_image_size, mapillary_sample,
                                     mapillary_sample_config, nb_channels):
    """Test the data generator for the Mapillary dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(mapillary_sample_config)
    label_ids = [x['id'] for x in config["labels"]]
    gen = generator.create_generator("mapillary", "feature_detection",
                                     mapillary_sample, mapillary_image_size,
                                     BATCH_SIZE, config["labels"])
    item = next(gen)
    assert (len(item) == 2)
    im_shape = item[0].shape
    assert im_shape == (BATCH_SIZE, mapillary_image_size, mapillary_image_size,
                        nb_channels)
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, len(label_ids))
예제 #13
0
def test_semseg_shape_generator(shapes_image_size, shapes_sample,
                                shapes_sample_config, nb_channels):
    """Test the data generator for the shape dataset
    """
    BATCH_SIZE = 10
    config = utils.read_config(shapes_sample_config)
    label_ids = [x['id'] for x in config["labels"]]
    gen = generator.create_generator("shapes", "semantic_segmentation",
                                     shapes_sample, shapes_image_size,
                                     BATCH_SIZE, config["labels"])
    item = next(gen)
    assert len(item) == 2
    im_shape = item[0].shape
    assert im_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size,
                        nb_channels)
    label_shape = item[1].shape
    assert label_shape == (BATCH_SIZE, shapes_image_size, shapes_image_size,
                           len(label_ids))
예제 #14
0
def test_wrong_model_dataset_generator(shapes_sample_config):
    """Test a wrong model and wrong dataset
    """
    dataset = "fake"
    model = "conquer_the_world"
    IMAGE_SIZE = 10
    BATCH_SIZE = 10
    datapath = ("./tests/data/" + dataset + "/training")
    config = utils.read_config(shapes_sample_config)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator(dataset, 'feature_detection', datapath,
                                   IMAGE_SIZE, BATCH_SIZE, config["labels"])
    assert str(excinfo.value) == "Wrong dataset name {}".format(dataset)

    # wrong model name
    with pytest.raises(ValueError) as excinfo:
        generator.create_generator('shapes', model, datapath, IMAGE_SIZE,
                                   BATCH_SIZE, config["labels"])
    assert str(excinfo.value) == "Wrong model name {}".format(model)
예제 #15
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `featdet` or `semseg`
    datapath : str
        Relative path of dataset repository
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]

    instance_args = [
        name,
        model_input_size,
        network,
        batch_size,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset,
                                                      model_input_size)

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. "
            "Please generate a valid dataset before calling the program.")

    output_folder = utils.prepare_output_folder(datapath, dataset,
                                                model_input_size, problem)
    instance_path = os.path.join(output_folder, output_folder["best-instance"])
    dropout, network = utils.recover_instance(instance_path)
    model = init_model(
        problem,
        instance_name,
        model_input_size,
        nb_labels,
        dropout,
        network,
    )
    if os.path.isfile(output_folder["best-model"]):
        model.load_weights(output_folder["best-model"])
        logger.info(
            "Model weights have been recovered from %s",
            output_folder["best-model"],
        )
    else:
        logger.info(
            "No available trained model for this image size with optimized hyperparameters. "
            "The inference will be done on an untrained model")

    y_raw_pred = model.predict(images, batch_size=2, verbose=1)

    result = {}
    if problem == "featdet":
        label_info = [(i["category"], utils.GetHTMLColor(i["color"]))
                      for i in train_config["labels"]]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [(i[0], 100 * round(float(j), 2), i[1])
                                for i, j in zip(label_info, prediction)]
        return result
    elif problem == "semseg":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3),
                                   dtype=np.int8)
        for i in range(nb_labels):
            labelled_images[predicted_labels ==
                            i] = train_config["labels"][i]["color"]
        for predicted_labels, filename in zip(labelled_images,
                                              flattened_image_paths):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(output_dir,
                                                os.path.basename(filename))
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        raise ValueError(
            "Unknown model argument. Please use 'featdet' or 'semseg'.")
예제 #16
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    aggregate=False,
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `feature_detection` or
    `semantic_segmentation`
    datapath : str
        Relative path of dataset repository
    aggregate : bool
        Either or not the labels are aggregated
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]
    if dataset == "aerial":
        tile_size = utils.get_tile_size_from_image(model_input_size)
    else:
        tile_size = model_input_size

    aggregate_value = "full" if not aggregate else "aggregated"
    instance_args = [
        name,
        tile_size,
        network,
        batch_size,
        aggregate_value,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(
        datapath, dataset, tile_size, aggregate_value
    )

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        logger.error(
            (
                "There is no training data with the given "
                "parameters. Please generate a valid dataset "
                "before calling the program."
            )
        )
        sys.exit(1)

    if any([arg is None for arg in instance_args]):
        logger.info(
            ("Some arguments are None, " "the best model is considered.")
        )
        output_folder = utils.prepare_output_folder(datapath, dataset, problem)
        instance_filename = (
            "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json"
        )
        instance_path = os.path.join(output_folder, instance_filename)
        dropout, network = utils.recover_instance(instance_path)
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoint_filename = (
            "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5"
        )
        checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
        if os.path.isfile(checkpoint_full_path):
            logger.info("Checkpoint full path : %s", checkpoint_full_path)
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available trained model for this image size"
                    " with optimized hyperparameters. The "
                    "inference will be done on an untrained model"
                )
            )
    else:
        logger.info("All instance arguments are filled out.")
        output_folder = utils.prepare_output_folder(
            datapath, dataset, problem, instance_name
        )
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoints = [
            item
            for item in os.listdir(output_folder)
            if "checkpoint-epoch" in item
        ]
        if len(checkpoints) > 0:
            model_checkpoint = max(checkpoints)
            checkpoint_full_path = os.path.join(
                output_folder, model_checkpoint
            )
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available checkpoint for this configuration. "
                    "The model will be trained from scratch."
                )
            )

    y_raw_pred = model.predict(images)

    result = {}
    if problem == "feature_detection":
        label_info = [
            (i["category"], utils.GetHTMLColor(i["color"]))
            for i in train_config["labels"]
        ]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [
                (i[0], 100 * round(float(j), 2), i[1])
                for i, j in zip(label_info, prediction)
            ]
        return result
    elif problem == "semantic_segmentation":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x
            for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(
            shape=np.append(predicted_labels.shape, 3), dtype=np.int8
        )
        for i in range(nb_labels):
            labelled_images[predicted_labels == i] = train_config["labels"][i][
                "color"
            ]
        for predicted_labels, filename in zip(
            labelled_images, flattened_image_paths
        ):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(
                output_dir, os.path.basename(filename)
            )
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        logger.error(
            (
                "Unknown model argument. Please use "
                "'feature_detection' or 'semantic_segmentation'."
            )
        )
        sys.exit(1)
예제 #17
0
    args = parser.parse_args()

    # Data path and repository management
    aggregate_value = "full" if not args.aggregate_label else "aggregated"
    instance_args = [args.name, args.image_size, args.network, args.batch_size,
                     aggregate_value, args.dropout,
                     args.learning_rate, args.learning_rate_decay]
    instance_name = utils.list_to_str(instance_args, "_")
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset,
                                                      args.image_size,
                                                      aggregate_value)

    # Data gathering
    if (os.path.isfile(prepro_folder["training_config"]) and os.path.isfile(prepro_folder["validation_config"])
        and os.path.isfile(prepro_folder["testing_config"])):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [x['id'] for x in train_config['labels'] if x['is_evaluate']]
        train_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["training"],
            args.image_size,
            args.batch_size,
            label_ids,
            seed=SEED)
        validation_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["validation"],
            args.image_size,
            args.batch_size,
예제 #18
0
def get_data(folders, dataset, model, image_size, batch_size):
    """On the file system, recover `dataset` that can solve `model` problem

    Parameters
    ----------
    folders : dict
        Dictionary of useful folders that indicates paths to data
    dataset : str
        Name of the used dataset (*e.g.* `shapes` or `mapillary`)
    model : str
        Name of the addressed research problem (*e.g.* `feature_detection` or `semantic_segmentation`)
    image_size : int
        Size of the images, in pixel (height=width)
    batch_size : int
        Number of images in each batch

    Returns
    -------
    tuple
        Number of labels in the dataset, as well as training, validation and testing data generators

    """
    # Data gathering
    if (os.path.isfile(folders["training_config"])
            and os.path.isfile(folders["validation_config"])
            and os.path.isfile(folders["testing_config"])):
        train_config = utils.read_config(folders["training_config"])
        label_ids = [
            x['id'] for x in train_config['labels'] if x['is_evaluate']
        ]
        train_generator = generator.create_generator(dataset,
                                                     model,
                                                     folders["training"],
                                                     image_size,
                                                     batch_size,
                                                     train_config["labels"],
                                                     seed=SEED)
        validation_generator = generator.create_generator(
            dataset,
            model,
            folders["validation"],
            image_size,
            batch_size,
            train_config["labels"],
            seed=SEED)
        test_generator = generator.create_generator(dataset,
                                                    model,
                                                    folders["testing"],
                                                    image_size,
                                                    batch_size,
                                                    train_config["labels"],
                                                    inference=True,
                                                    seed=SEED)
    else:
        utils.logger.error((
            "There is no valid data with the specified parameters. "
            "Please generate a valid dataset before calling the training program."
        ))
        sys.exit(1)
    nb_labels = len(label_ids)
    return nb_labels, train_generator, validation_generator, test_generator