Exemple #1
0
def main(args):
    # Grid search
    model_output = []
    for batch_size in args.batch_size:
        logger.info("Generating data with batch of %s images...", batch_size)
        # Data generator building
        prepro_folder = utils.prepare_preprocessed_folder(
            args.datapath, args.dataset, args.image_size)
        nb_labels, train_gen, valid_gen = get_data(
            prepro_folder,
            args.dataset,
            args.model,
            args.image_size,
            batch_size,
        )
        for parameters in itertools.product(
                args.dropout,
                args.network,
                args.learning_rate,
                args.learning_rate_decay,
        ):
            logger.info("Instance: %s", utils.list_to_str(parameters))
            # Data path and repository management
            dropout, network, learning_rate, learning_rate_decay = parameters
            instance_args = [
                args.name,
                args.image_size,
                network,
                batch_size,
                dropout,
                learning_rate,
                learning_rate_decay,
            ]
            instance_name = utils.list_to_str(instance_args, "_")
            output_folder = utils.prepare_output_folder(
                args.datapath, args.dataset, args.model, instance_name)
            # Model running
            model_output.append(
                run_model(train_gen, valid_gen, args.model, output_folder,
                          instance_name, args.image_size, nb_labels,
                          args.nb_epochs, args.nb_training_image,
                          args.nb_validation_image, batch_size, *parameters))
            logger.info("Instance result: %s", model_output[-1])

    # Recover best instance starting from validation accuracy
    best_instance = max(model_output, key=lambda x: x["val_acc"])

    # Save best model
    output_folder = utils.prepare_output_folder(args.datapath, args.dataset,
                                                args.model)
    instance_name = os.path.join(
        output_folder,
        "best-{}-" + str(args.image_size) + ".{}",
    )
    best_instance["model"].save(instance_name.format("model", "h5"))
    with open(instance_name.format("instance", "json"), "w") as fobj:
        json.dump(
            {
                key: best_instance[key]
                for key in best_instance if key != "model"
            },
            fobj,
        )

    backend.clear_session()
Exemple #2
0
if __name__=='__main__':

    # Parse command-line arguments
    parser = argparse.ArgumentParser(description=("Convolutional Neural Netw"
                                                  "ork on street-scene images"))
    parser = add_instance_arguments(parser)
    parser = add_hyperparameters(parser)
    parser = add_training_arguments(parser)
    args = parser.parse_args()

    # Data path and repository management
    aggregate_value = "full" if not args.aggregate_label else "aggregated"
    instance_args = [args.name, args.image_size, args.network, args.batch_size,
                     aggregate_value, args.dropout,
                     args.learning_rate, args.learning_rate_decay]
    instance_name = utils.list_to_str(instance_args, "_")
    prepro_folder = utils.prepare_preprocessed_folder(args.datapath, args.dataset,
                                                      args.image_size,
                                                      aggregate_value)

    # Data gathering
    if (os.path.isfile(prepro_folder["training_config"]) and os.path.isfile(prepro_folder["validation_config"])
        and os.path.isfile(prepro_folder["testing_config"])):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [x['id'] for x in train_config['labels'] if x['is_evaluate']]
        train_generator = generator.create_generator(
            args.dataset,
            args.model,
            prepro_folder["training"],
            args.image_size,
            args.batch_size,
Exemple #3
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    aggregate=False,
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `feature_detection` or
    `semantic_segmentation`
    datapath : str
        Relative path of dataset repository
    aggregate : bool
        Either or not the labels are aggregated
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]
    if dataset == "aerial":
        tile_size = utils.get_tile_size_from_image(model_input_size)
    else:
        tile_size = model_input_size

    aggregate_value = "full" if not aggregate else "aggregated"
    instance_args = [
        name,
        tile_size,
        network,
        batch_size,
        aggregate_value,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(
        datapath, dataset, tile_size, aggregate_value
    )

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        logger.error(
            (
                "There is no training data with the given "
                "parameters. Please generate a valid dataset "
                "before calling the program."
            )
        )
        sys.exit(1)

    if any([arg is None for arg in instance_args]):
        logger.info(
            ("Some arguments are None, " "the best model is considered.")
        )
        output_folder = utils.prepare_output_folder(datapath, dataset, problem)
        instance_filename = (
            "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json"
        )
        instance_path = os.path.join(output_folder, instance_filename)
        dropout, network = utils.recover_instance(instance_path)
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoint_filename = (
            "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5"
        )
        checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
        if os.path.isfile(checkpoint_full_path):
            logger.info("Checkpoint full path : %s", checkpoint_full_path)
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available trained model for this image size"
                    " with optimized hyperparameters. The "
                    "inference will be done on an untrained model"
                )
            )
    else:
        logger.info("All instance arguments are filled out.")
        output_folder = utils.prepare_output_folder(
            datapath, dataset, problem, instance_name
        )
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoints = [
            item
            for item in os.listdir(output_folder)
            if "checkpoint-epoch" in item
        ]
        if len(checkpoints) > 0:
            model_checkpoint = max(checkpoints)
            checkpoint_full_path = os.path.join(
                output_folder, model_checkpoint
            )
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available checkpoint for this configuration. "
                    "The model will be trained from scratch."
                )
            )

    y_raw_pred = model.predict(images)

    result = {}
    if problem == "feature_detection":
        label_info = [
            (i["category"], utils.GetHTMLColor(i["color"]))
            for i in train_config["labels"]
        ]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [
                (i[0], 100 * round(float(j), 2), i[1])
                for i, j in zip(label_info, prediction)
            ]
        return result
    elif problem == "semantic_segmentation":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x
            for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(
            shape=np.append(predicted_labels.shape, 3), dtype=np.int8
        )
        for i in range(nb_labels):
            labelled_images[predicted_labels == i] = train_config["labels"][i][
                "color"
            ]
        for predicted_labels, filename in zip(
            labelled_images, flattened_image_paths
        ):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(
                output_dir, os.path.basename(filename)
            )
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        logger.error(
            (
                "Unknown model argument. Please use "
                "'feature_detection' or 'semantic_segmentation'."
            )
        )
        sys.exit(1)
Exemple #4
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `featdet` or `semseg`
    datapath : str
        Relative path of dataset repository
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]

    instance_args = [
        name,
        model_input_size,
        network,
        batch_size,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset,
                                                      model_input_size)

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. "
            "Please generate a valid dataset before calling the program.")

    output_folder = utils.prepare_output_folder(datapath, dataset,
                                                model_input_size, problem)
    instance_path = os.path.join(output_folder, output_folder["best-instance"])
    dropout, network = utils.recover_instance(instance_path)
    model = init_model(
        problem,
        instance_name,
        model_input_size,
        nb_labels,
        dropout,
        network,
    )
    if os.path.isfile(output_folder["best-model"]):
        model.load_weights(output_folder["best-model"])
        logger.info(
            "Model weights have been recovered from %s",
            output_folder["best-model"],
        )
    else:
        logger.info(
            "No available trained model for this image size with optimized hyperparameters. "
            "The inference will be done on an untrained model")

    y_raw_pred = model.predict(images, batch_size=2, verbose=1)

    result = {}
    if problem == "featdet":
        label_info = [(i["category"], utils.GetHTMLColor(i["color"]))
                      for i in train_config["labels"]]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [(i[0], 100 * round(float(j), 2), i[1])
                                for i, j in zip(label_info, prediction)]
        return result
    elif problem == "semseg":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3),
                                   dtype=np.int8)
        for i in range(nb_labels):
            labelled_images[predicted_labels ==
                            i] = train_config["labels"][i]["color"]
        for predicted_labels, filename in zip(labelled_images,
                                              flattened_image_paths):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(output_dir,
                                                os.path.basename(filename))
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        raise ValueError(
            "Unknown model argument. Please use 'featdet' or 'semseg'.")
Exemple #5
0
    # Grid search
    model_output = []
    for batch_size in args.batch_size:
        logger.info("Generating data with batch of %s images...", batch_size)
        # Data generator building
        prepro_folder = utils.prepare_preprocessed_folder(
            args.datapath, args.dataset, args.image_size, aggregate_value)
        nb_labels, train_gen, valid_gen = get_data(prepro_folder, args.dataset,
                                                   args.model,
                                                   model_input_size,
                                                   batch_size)
        for parameters in itertools.product(args.dropout, args.network,
                                            args.learning_rate,
                                            args.learning_rate_decay):
            logger.info(utils.list_to_str(parameters))
            # Data path and repository management
            dropout, network, learning_rate, learning_rate_decay = parameters
            instance_args = [
                args.name, args.image_size, network, batch_size,
                aggregate_value, dropout, learning_rate, learning_rate_decay
            ]
            instance_name = utils.list_to_str(instance_args, "_")
            output_folder = utils.prepare_output_folder(
                args.datapath, args.dataset, args.model, instance_name)
            # Model running
            model_output.append(
                run_model(train_gen, valid_gen, args.model, output_folder,
                          instance_name, model_input_size, aggregate_value,
                          nb_labels, args.nb_epochs, args.nb_training_image,
                          args.nb_validation_image, batch_size, *parameters))