예제 #1
0
def detect_objects(input_images, model, confidence_thresh):
    """

    :param input_images:
    :param model:
    :return:
    """

    y_predicted = model.predict(input_images)
    y_predicted_decoded = decode_y(y_predicted,
                                   confidence_thresh=confidence_thresh,
                                   iou_threshold=0.45,
                                   top_k=200,
                                   input_coords='centroids',
                                   normalize_coords=True,
                                   img_height=IMAGE_HEIGHT,
                                   img_width=IMAGE_WIDTH)

    return y_predicted_decoded
 def _predict_image(self,image):
     predictions_appender = []
     shaper = image.shape
     x_num_roll = ceil(shaper[0]/150)
     y_num_roll = ceil(shaper[1]/150)
     for x in range(x_num_roll):
         for y in range(y_num_roll):
             X = numpy.roll(image,150*x)
             X = numpy.roll(X,150*y)
             y_pred = model.predict(X[0:300,0:300])
             # 4: Decode the raw prediction `y_pred`
             y_pred_decoded = decode_y(y_pred,
                                       confidence_thresh=0.01,
                                       iou_threshold=0.45,
                                       top_k='all',
                                       input_coords='centroids',
                                       normalize_coords=normalize_coords,
                                       img_height=img_height,
                                       img_width=img_width)
             y_pred_decoded[:][2:3]+150*x
             y_pred_decoded[:][3:5]+150*y
             predictions_appender(y_pred_decoded[:])
     return np.array(predictions_appender)
예제 #3
0
# 3: Make a prediction

y_pred = model.predict(X)

# Now let's decode the raw prediction `y_pred`. The function `decode_y()` with arguments set as below follows the procedure of the original implementation: First a very low confidence threshold of 0.01 is applied to filter out the majority of the predicted boxes, then greedy non-maximum suppression is performed per class with an intersection-over-union threshold of 0.45, and out of what is left after that, the top 200 highest confidence boxes are returned. I don't understand why you would want to return 200 boxes when there are about two or three objects in a given image on average, but that's what the paper says.
#
# The function `decode_y2()` performs an alternative procedure that is much more efficient and yields better results, so feel free to use that if you like. The documentation explains how it is different from `decode_y()`.

# In[ ]:

# 4: Decode the raw prediction `y_pred`

y_pred_decoded = decode_y(y_pred,
                          confidence_thresh=0.01,
                          iou_threshold=0.45,
                          top_k=200,
                          input_coords='centroids',
                          normalize_coords=normalize_coords,
                          img_height=img_height,
                          img_width=img_width)

print("Predicted boxes:\n")
print(y_pred_decoded[i])

# Finally, let's draw the predicted boxes onto the image in blue to visualize the result. Each predicted box says its confidence next to the category name. The ground truth boxes are also drawn onto the image in green for comparison.

# In[ ]:

# 5: Draw the predicted boxes onto the image

plt.figure(figsize=(20, 12))
plt.imshow(X[i])
예제 #4
0
def predict_all_to_txt(model,
                       img_height,
                       img_width,
                       batch_generator,
                       batch_size,
                       batch_generator_mode='resize',
                       classes=['background',
                                'aeroplane', 'bicycle', 'bird', 'boat',
                                'bottle', 'bus', 'car', 'cat',
                                'chair', 'cow', 'diningtable', 'dog',
                                'horse', 'motorbike', 'person', 'pottedplant',
                                'sheep', 'sofa', 'train', 'tvmonitor'],
                       out_file_prefix='comp3_det_test_',
                       confidence_thresh=0.01,
                       iou_threshold=0.45,
                       top_k=200,
                       pred_coords='centroids',
                       normalize_coords=True):
    '''
    Runs detection predictions over the whole dataset given a model and saves them in a text file
    in the Pascal VOC detection results format, i.e. the format in which the Pascal VOC test server
    expects results.
    This will result in `n_classes` text files, where each file contains the predictions for one class.
    Arguments:
        model (Keras model): A Keras SSD model object.
        img_height (int): The input image height for the model.
        img_width (int): The input image width for the model.
        batch_generator (BatchGenerator): A `BatchGenerator` object with the evaluation dataset.
        batch_size (int): The batch size for the evaluation.
        batch_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
            be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
            If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
            and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
        classes (list or dict, optional): A list or dictionary maps the consecutive class IDs predicted by the model
            their respective name strings. The list must contain the background class for class ID zero.
        out_file_prefix (str, optional): A prefix for the output text file names. The suffix to each output text file name will
            be the respective class name followed by the `.txt` file extension. This string is also how you specify the directory
            in which the results are to be saved.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage. Defaults to 200, following the paper.
        input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
            for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
            and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
            relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
            Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
            coordinates. Requires `img_height` and `img_width` if set to `True`.
    Returns:
        None.
    '''

    if batch_generator_mode == 'resize':
        random_pad_and_resize=False
        resize=(img_height,img_width)
    elif batch_generator_mode == 'pad':
        random_pad_and_resize=(img_height, img_width, 0, 3, 1.0)
        resize=False
    else:
        raise ValueError("Unexpected argument value: `batch_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format(batch_generator_mode))

    # Set the generator parameters.
    generator = batch_generator.generate(batch_size=batch_size,
                                         shuffle=False,
                                         train=False,
                                         returns={'processed_images', 'image_ids', 'inverse_transform'},
                                         convert_to_3_channels=True,
                                         random_pad_and_resize=random_pad_and_resize,
                                         resize=resize,
                                         limit_boxes=False,
                                         keep_images_without_gt=True)

    # We have to generate a separate results file for each class.
    results = []
    for i in range(1, len(classes)):
        # Create one text file per class and put it in our results list.
        results.append(open('{}{}.txt'.format(out_file_prefix, classes[i]), 'w'))

    # Compute the number of batches to iterate over the entire dataset.
    n_images = batch_generator.get_n_samples()
    print("Number of images in the evaluation dataset: {}".format(n_images))
    n_batches = int(ceil(n_images / batch_size))
    # Loop over all batches.
    tr = trange(n_batches, file=sys.stdout)
    tr.set_description('Producing results file')
    for j in tr:
        # Generate batch.
        batch_X, batch_image_ids, batch_inverse_coord_transform = next(generator)
        # Predict.
        y_pred = model.predict(batch_X)
        # Decode.
        y_pred_decoded = decode_y(y_pred,
                                  confidence_thresh=confidence_thresh,
                                  iou_threshold=iou_threshold,
                                  top_k=top_k,
                                  input_coords=pred_coords,
                                  normalize_coords=normalize_coords,
                                  img_height=img_height,
                                  img_width=img_width)
        # Convert each predicted box into the results format.
        for k, batch_item in enumerate(y_pred_decoded):
            # The box coordinates were predicted for the transformed
            # (resized, cropped, padded, etc.) image. We now have to
            # transform these coordinates back to what they would be
            # in the original images.
            batch_item[:,2:] *= batch_inverse_coord_transform[k,:,1]
            batch_item[:,2:] += batch_inverse_coord_transform[k,:,0]
            for box in batch_item:
                image_id = batch_image_ids[k]
                class_id = int(box[0])
                # Round the box coordinates to reduce the file size.
                confidence = str(round(box[1], 4))
                xmin = str(round(box[2], 1))
                ymin = str(round(box[3], 1))
                xmax = str(round(box[4], 1))
                ymax = str(round(box[5], 1))
                prediction = [image_id, confidence, xmin, ymin, xmax, ymax]
                prediction_txt = ' '.join(prediction) + '\n'
                # Write the predicted box to the text file for its class.
                results[class_id - 1].write(prediction_txt)

    # Close all the files.
    for results_file in results:
        results_file.close()

    print("All results files saved.")
예제 #5
0
def predict_all_to_json(out_file,
                        model,
                        img_height,
                        img_width,
                        classes_to_cats,
                        batch_generator,
                        batch_size,
                        batch_generator_mode='resize',
                        confidence_thresh=0.01,
                        iou_threshold=0.45,
                        top_k=200,
                        pred_coords='centroids',
                        normalize_coords=True):
    '''
    Runs detection predictions over the whole dataset given a model and saves them in a JSON file
    in the MS COCO detection results format.

    Arguments:
        out_file (str): The file name (full path) under which to save the results JSON file.
        model (Keras model): A Keras SSD model object.
        img_height (int): The input image height for the model.
        img_width (int): The input image width for the model.
        classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model
            to the non-consecutive original MS COCO category IDs.
        batch_generator (BatchGenerator): A `BatchGenerator` object with the evaluation dataset.
        batch_size (int): The batch size for the evaluation.
        batch_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will
            be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images.
            If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height`
            and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images.
        confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific
            positive class in order to be considered for the non-maximum suppression stage for the respective class.
            A lower value will result in a larger part of the selection process being done by the non-maximum suppression
            stage, while a larger value will result in a larger part of the selection process happening in the confidence
            thresholding stage.
        iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold`
            with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers
            to the box score.
        top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the
            non-maximum suppression stage. Defaults to 200, following the paper.
        input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids'
            for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format
            `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
        normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1])
            and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs
            relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`.
            Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect
            coordinates. Requires `img_height` and `img_width` if set to `True`.

    Returns:
        None.
    '''

    if batch_generator_mode == 'resize':
        random_pad_and_resize = False
        resize = (img_height, img_width)
    elif batch_generator_mode == 'pad':
        random_pad_and_resize = (img_height, img_width, 0, 3, 1.0)
        resize = False
    else:
        raise ValueError(
            "Unexpected argument value: `batch_generator_mode` can be either of 'resize' or 'pad', but received '{}'."
            .format(batch_generator_mode))

    # Set the generator parameters.
    generator = batch_generator.generate(
        batch_size=batch_size,
        shuffle=False,
        train=False,
        returns={'processed_images', 'image_ids', 'inverse_transform'},
        convert_to_3_channels=True,
        random_pad_and_resize=random_pad_and_resize,
        resize=resize,
        limit_boxes=False,
        keep_images_without_gt=True)
    # Put the results in this list.
    results = []
    # Compute the number of batches to iterate over the entire dataset.
    n_images = batch_generator.get_n_samples()
    print("Number of images in the evaluation dataset: {}".format(n_images))
    n_batches = int(ceil(n_images / batch_size))
    # Loop over all batches.
    tr = trange(n_batches, file=sys.stdout)
    tr.set_description('Producing results file')
    for i in tr:
        # Generate batch.
        batch_X, batch_image_ids, batch_inverse_coord_transform = next(
            generator)
        # Predict.
        y_pred = model.predict(batch_X)
        # Decode.
        y_pred_decoded = decode_y(y_pred,
                                  confidence_thresh=confidence_thresh,
                                  iou_threshold=iou_threshold,
                                  top_k=top_k,
                                  input_coords=pred_coords,
                                  normalize_coords=normalize_coords,
                                  img_height=img_height,
                                  img_width=img_width)
        # Convert each predicted box into the results format.
        for k, batch_item in enumerate(y_pred_decoded):
            # The box coordinates were predicted for the transformed
            # (resized, cropped, padded, etc.) image. We now have to
            # transform these coordinates back to what they would be
            # in the original images.
            batch_item[:, 2:] *= batch_inverse_coord_transform[k, :, 1]
            batch_item[:, 2:] += batch_inverse_coord_transform[k, :, 0]
            for box in batch_item:
                class_id = box[0]
                # Transform the consecutive class IDs back to the original COCO category IDs.
                cat_id = classes_to_cats[class_id]
                # Round the box coordinates to reduce the JSON file size.
                xmin = round(box[2], 1)
                ymin = round(box[3], 1)
                xmax = round(box[4], 1)
                ymax = round(box[5], 1)
                width = xmax - xmin
                height = ymax - ymin
                bbox = [xmin, ymin, width, height]
                result = {}
                result['image_id'] = batch_image_ids[k]
                result['category_id'] = cat_id
                result['score'] = round(box[1], 3)
                result['bbox'] = bbox
                results.append(result)

    with open(out_file, 'w') as f:
        json.dump(results, f)

    print("Prediction results saved in '{}'".format(out_file))
예제 #6
0
                             limit_boxes=True,
                             include_thresh=0.40)

# Generate samples
max_ite = dataset.get_n_samples() / batch_size
ite = 0
while ite < max_ite:
    ite += 1
    r = next(generator)
    y_pred = model.predict(r[0])
    for myindex in range(0, len(r[0])):
        i = 0
        y_pred_decoded = decode_y(y_pred,
                                  confidence_thresh=0.95,
                                  iou_threshold=0.4,
                                  top_k=200,
                                  input_coords='centroids',
                                  normalize_coords=normalize_coords,
                                  img_height=1080,#img_height,
                                  img_width=1920)#img_width)

        np.set_printoptions(precision=2, suppress=True, linewidth=90)
        print("Predicted boxes:\n")
        print('   class   conf xmin   ymin   xmax   ymax')
        print(y_pred_decoded[myindex])


        # Visualize the predictions.

        from matplotlib import pyplot as plt
        plt.switch_backend('TKagg')
        plt.figure(figsize=(20,12))