Ejemplo n.º 1
0
def recover_image_info(dataset, filename):
    """Recover image full name on the application as well as corresponding
    ground-truth labels

    Parameters
    ----------
    dataset : str
    filename : str

    Returns
    -------
    dict
        Dictionary that contains image full names (raw images and labelled
    version) and label infos

    """
    dataset_code = dataset + "_agg" if dataset == "mapillary" else dataset
    image_file = os.path.join(dataset_code, "images", filename + ".png")
    label_file = image_file.replace("images", "labels")
    if dataset == "mapillary" or dataset == "mapillary_agg":
        image_file = image_file.replace(".png", ".jpg")
    server_label_filename = os.path.join(app.static_folder, label_file)
    server_label_image = np.array(Image.open(server_label_filename))
    if dataset == "mapillary":
        size_aggregation = "400_aggregated"
    elif dataset == "aerial":
        size_aggregation = "250_full"
    elif dataset == "tanzania":
        size_aggregation = "512_full"
    elif dataset == "shapes":
        size_aggregation = "64_full"
    else:
        raise ValueError(("Unknown dataset. Please choose 'mapillary', "
                          "'aerial', 'tanzania' or 'shapes'."))
    with open(
            os.path.join(
                "data",
                dataset,
                "preprocessed",
                size_aggregation,
                "validation.json",
            )) as fobj:
        config = json.load(fobj)
    if not dataset == "aerial":
        actual_labels = np.unique(server_label_image.reshape([-1, 3]),
                                  axis=0).tolist()
    else:
        actual_labels = np.unique(server_label_image).tolist()
    printed_labels = [(item["category"], utils.GetHTMLColor(item["color"]))
                      for item in config["labels"]
                      if item["color"] in actual_labels]
    return {
        "image_file": image_file,
        "label_file": label_file,
        "labels": printed_labels,
    }
Ejemplo n.º 2
0
def summarize_config(config):
    """Extract and reshape dataset configuration information in a HTML-printing
    context

    Parameters
    ----------
    config : dict
        Dataset label configuration

    Returns
    -------
    dict
        Simplified dataset configuration for HTML-printing purpose
    """
    return [(c["category"], utils.GetHTMLColor(c["color"])) for c in config]
Ejemplo n.º 3
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `featdet` or `semseg`
    datapath : str
        Relative path of dataset repository
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]

    instance_args = [
        name,
        model_input_size,
        network,
        batch_size,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(datapath, dataset,
                                                      model_input_size)

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        raise FileNotFoundError(
            "There is no training data with the given parameters. "
            "Please generate a valid dataset before calling the program.")

    output_folder = utils.prepare_output_folder(datapath, dataset,
                                                model_input_size, problem)
    instance_path = os.path.join(output_folder, output_folder["best-instance"])
    dropout, network = utils.recover_instance(instance_path)
    model = init_model(
        problem,
        instance_name,
        model_input_size,
        nb_labels,
        dropout,
        network,
    )
    if os.path.isfile(output_folder["best-model"]):
        model.load_weights(output_folder["best-model"])
        logger.info(
            "Model weights have been recovered from %s",
            output_folder["best-model"],
        )
    else:
        logger.info(
            "No available trained model for this image size with optimized hyperparameters. "
            "The inference will be done on an untrained model")

    y_raw_pred = model.predict(images, batch_size=2, verbose=1)

    result = {}
    if problem == "featdet":
        label_info = [(i["category"], utils.GetHTMLColor(i["color"]))
                      for i in train_config["labels"]]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [(i[0], 100 * round(float(j), 2), i[1])
                                for i, j in zip(label_info, prediction)]
        return result
    elif problem == "semseg":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(shape=np.append(predicted_labels.shape, 3),
                                   dtype=np.int8)
        for i in range(nb_labels):
            labelled_images[predicted_labels ==
                            i] = train_config["labels"][i]["color"]
        for predicted_labels, filename in zip(labelled_images,
                                              flattened_image_paths):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(output_dir,
                                                os.path.basename(filename))
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        raise ValueError(
            "Unknown model argument. Please use 'featdet' or 'semseg'.")
Ejemplo n.º 4
0
def predict(
    filenames,
    dataset,
    problem,
    datapath="./data",
    aggregate=False,
    name=None,
    network=None,
    batch_size=None,
    dropout=None,
    learning_rate=None,
    learning_rate_decay=None,
    output_dir="/tmp/deeposlandia/predicted",
):
    """Make label prediction on image indicated by ̀filename`, according to
    considered `problem`

    Parameters
    ----------
    filenames : str
        Name of the image files on the file system
    dataset : str
        Name of the dataset
    problem : str
        Name of the considered model, either `feature_detection` or
    `semantic_segmentation`
    datapath : str
        Relative path of dataset repository
    aggregate : bool
        Either or not the labels are aggregated
    name : str
        Name of the saved network
    network : str
        Name of the chosen architecture, either `simple`, `vgg` or `inception`
    batch_size : integer
        Batch size used for training the model
    dropout : float
        Dropout rate used for training the model
    learning_rate : float
        Learning rate used for training the model
    learning_rate_decay : float
        Learning rate decay used for training the model
    output_dir : str
        Path of the output directory, where labelled images will be stored
    (useful only if `problem=semantic_segmentation`)

    Returns
    -------
    dict
        Double predictions (between 0 and 1, acts as percentages) regarding
    each labels

    """
    # `image_paths` is first got as
    # [[image1, ..., image_i], [image_j, ..., image_n]]
    image_paths = [glob.glob(f) for f in filenames]
    # then it is flattened to get a simple list
    flattened_image_paths = sum(image_paths, [])
    images = extract_images(flattened_image_paths)
    model_input_size = images.shape[1]
    if dataset == "aerial":
        tile_size = utils.get_tile_size_from_image(model_input_size)
    else:
        tile_size = model_input_size

    aggregate_value = "full" if not aggregate else "aggregated"
    instance_args = [
        name,
        tile_size,
        network,
        batch_size,
        aggregate_value,
        dropout,
        learning_rate,
        learning_rate_decay,
    ]
    instance_name = utils.list_to_str(instance_args, "_")

    prepro_folder = utils.prepare_preprocessed_folder(
        datapath, dataset, tile_size, aggregate_value
    )

    if os.path.isfile(prepro_folder["training_config"]):
        train_config = utils.read_config(prepro_folder["training_config"])
        label_ids = [
            x["id"] for x in train_config["labels"] if x["is_evaluate"]
        ]
        nb_labels = len(label_ids)
    else:
        logger.error(
            (
                "There is no training data with the given "
                "parameters. Please generate a valid dataset "
                "before calling the program."
            )
        )
        sys.exit(1)

    if any([arg is None for arg in instance_args]):
        logger.info(
            ("Some arguments are None, " "the best model is considered.")
        )
        output_folder = utils.prepare_output_folder(datapath, dataset, problem)
        instance_filename = (
            "best-instance-" + str(tile_size) + "-" + aggregate_value + ".json"
        )
        instance_path = os.path.join(output_folder, instance_filename)
        dropout, network = utils.recover_instance(instance_path)
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoint_filename = (
            "best-model-" + str(tile_size) + "-" + aggregate_value + ".h5"
        )
        checkpoint_full_path = os.path.join(output_folder, checkpoint_filename)
        if os.path.isfile(checkpoint_full_path):
            logger.info("Checkpoint full path : %s", checkpoint_full_path)
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available trained model for this image size"
                    " with optimized hyperparameters. The "
                    "inference will be done on an untrained model"
                )
            )
    else:
        logger.info("All instance arguments are filled out.")
        output_folder = utils.prepare_output_folder(
            datapath, dataset, problem, instance_name
        )
        model = init_model(
            problem,
            instance_name,
            model_input_size,
            nb_labels,
            dropout,
            network,
        )
        checkpoints = [
            item
            for item in os.listdir(output_folder)
            if "checkpoint-epoch" in item
        ]
        if len(checkpoints) > 0:
            model_checkpoint = max(checkpoints)
            checkpoint_full_path = os.path.join(
                output_folder, model_checkpoint
            )
            model.load_weights(checkpoint_full_path)
            logger.info(
                "Model weights have been recovered from %s",
                checkpoint_full_path,
            )
        else:
            logger.info(
                (
                    "No available checkpoint for this configuration. "
                    "The model will be trained from scratch."
                )
            )

    y_raw_pred = model.predict(images)

    result = {}
    if problem == "feature_detection":
        label_info = [
            (i["category"], utils.GetHTMLColor(i["color"]))
            for i in train_config["labels"]
        ]
        for filename, prediction in zip(flattened_image_paths, y_raw_pred):
            result[filename] = [
                (i[0], 100 * round(float(j), 2), i[1])
                for i, j in zip(label_info, prediction)
            ]
        return result
    elif problem == "semantic_segmentation":
        os.makedirs(output_dir, exist_ok=True)
        predicted_labels = np.argmax(y_raw_pred, axis=3)
        encountered_labels = np.unique(predicted_labels)
        meaningful_labels = [
            x
            for i, x in enumerate(train_config["labels"])
            if i in encountered_labels
        ]
        labelled_images = np.zeros(
            shape=np.append(predicted_labels.shape, 3), dtype=np.int8
        )
        for i in range(nb_labels):
            labelled_images[predicted_labels == i] = train_config["labels"][i][
                "color"
            ]
        for predicted_labels, filename in zip(
            labelled_images, flattened_image_paths
        ):
            predicted_image = Image.fromarray(predicted_labels, "RGB")
            filename = filename.replace(".jpg", ".png")
            predicted_image_path = os.path.join(
                output_dir, os.path.basename(filename)
            )
            predicted_image.save(predicted_image_path)
            result[filename] = os.path.basename(filename)
        return {
            "labels": summarize_config(meaningful_labels),
            "label_images": result,
        }
    else:
        logger.error(
            (
                "Unknown model argument. Please use "
                "'feature_detection' or 'semantic_segmentation'."
            )
        )
        sys.exit(1)