Example #1
0
 def preprocess_image(self, inputs):
     """
     Takes as input an image and prepares it for being passed through the network.
     """
     return preprocess_image(inputs, mode='caffe')
    def detectObjectsFromImage_Me(self, input_image="", output_image_path="", input_type="file", output_type="file",
                               extract_detected_objects=False, minimum_percentage_probability=50,
                               display_percentage_probability=True, display_object_name=True,
                               display_box=True, thread_safe=False, custom_objects=None):
        """
            'detectObjectsFromImage()' function is used to detect objects observable in the given image path:
                    * input_image , which can be a filepath, image numpy array or image file stream
                    * output_image_path (only if output_type = file) , file path to the output image that will contain the detection boxes and label, if output_type="file"
                    * input_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file", "array" and "stream"
                    * output_type (optional) , file path/numpy array/image file stream of the image. Acceptable values are "file" and "array"
                    * extract_detected_objects (optional) , option to save each object detected individually as an image and return an array of the objects' image path.
                    * minimum_percentage_probability (optional, 50 by default) , option to set the minimum percentage probability for nominating a detected object for output.
                    * display_percentage_probability (optional, True by default), option to show or hide the percentage probability of each object in the saved/returned detected image
                    * display_display_object_name (optional, True by default), option to show or hide the name of each object in the saved/returned detected image
                    * thread_safe (optional, False by default), enforce the loaded detection model works across all threads if set to true, made possible by forcing all Tensorflow inference to run on the default graph.
            The values returned by this function depends on the parameters parsed. The possible values returnable
            are stated as below
            - If extract_detected_objects = False or at its default value and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
            - If extract_detected_objects = False or at its default value and output_type = 'array' ,
              Then the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
            - If extract_detected_objects = True and output_type = 'file' or
                at its default value, you must parse in the 'output_image_path' as a string to the path you want
                the detected image to be saved. Then the function will return:
                1. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                2. an array of string paths to the image of each object extracted from the image
            - If extract_detected_objects = True and output_type = 'array', the the function will return:
                1. a numpy array of the detected image
                2. an array of dictionaries, with each dictionary corresponding to the objects
                    detected in the image. Each dictionary contains the following property:
                    * name (string)
                    * percentage_probability (float)
                    * box_points (list of x1,y1,x2 and y2 coordinates)
                3. an array of numpy arrays of each object detected in the image
            :param input_image:
            :param output_image_path:
            :param input_type:
            :param output_type:
            :param extract_detected_objects:
            :param minimum_percentage_probability:
            :param display_percentage_probability:
            :param display_object_name:
            :param thread_safe:
            :return image_frame:
            :return output_objects_array:
            :return detected_objects_image_array:
        """

        if (self.__modelLoaded == False):
            raise ValueError("You must call the loadModel() function before making object detection.")
        elif (self.__modelLoaded == True):
            try:

                model_detections = list()
                detections = list()
                image_copy = None

                detected_objects_image_array = []
                min_probability = minimum_percentage_probability / 100

                if (input_type == "file"):
                    input_image = cv2.imread(input_image)
                elif (input_type == "array"):
                    input_image = np.array(input_image)

                detected_copy = input_image
                image_copy = input_image

                if (self.__modelType == "yolov3" or self.__modelType == "tinyyolov3"):

                    image_h, image_w, _ = detected_copy.shape
                    detected_copy = preprocess_input(detected_copy, self.__yolo_model_image_size)

                    model = self.__model_collection[0]
                    yolo_result = model.predict(detected_copy)

                    model_detections = retrieve_yolo_detections(yolo_result,
                            self.__yolo_anchors,
                            min_probability,
                            self.__nms_thresh,
                            self.__yolo_model_image_size,
                            (image_w, image_h),
                            self.numbers_to_names)
                            
                elif (self.__modelType == "retinanet"):
                    detected_copy = preprocess_image(detected_copy)
                    detected_copy, scale = resize_image(detected_copy)

                    model = self.__model_collection[0]
                    boxes, scores, labels = model.predict_on_batch(np.expand_dims(detected_copy, axis=0))

                    
                    boxes /= scale

                    for box, score, label in zip(boxes[0], scores[0], labels[0]):
                        # scores are sorted so we can break
                        if score < min_probability:
                            break

                        detection_dict = dict()
                        detection_dict["name"] = self.numbers_to_names[label]
                        detection_dict["percentage_probability"] = score * 100
                        detection_dict["box_points"] = box.astype(int).tolist()
                        model_detections.append(detection_dict)

                counting = 0
                objects_dir = output_image_path + "-objects"

                for detection in model_detections:
                    counting += 1
                    label = detection["name"]
                    percentage_probability = detection["percentage_probability"]
                    box_points = detection["box_points"]

                    if (custom_objects is not None):
                        if (custom_objects[label] != "valid"):
                            continue
                    
                    detections.append(detection)

                    if display_object_name == False:
                        label = None

                    if display_percentage_probability == False:
                        percentage_probability = None

                    
                    image_copy = draw_boxes(image_copy, 
                                    box_points,
                                    display_box,
                                    label, 
                                    percentage_probability, 
                                    self.__box_color)
                    
                    

                    if (extract_detected_objects == True):
                        splitted_copy = image_copy.copy()[box_points[1]:box_points[3],
                                        box_points[0]:box_points[2]]
                        if (output_type == "file"):
                            if (os.path.exists(objects_dir) == False):
                                os.mkdir(objects_dir)
                            splitted_image_path = os.path.join(objects_dir,
                                                                detection["name"] + "-" + str(
                                                                    counting) + ".jpg")
                            cv2.imwrite(splitted_image_path, splitted_copy)
                            detected_objects_image_array.append(splitted_image_path)
                        elif (output_type == "array"):
                            detected_objects_image_array.append(splitted_copy)

                image_copy = padded_fragment(image_copy)
                
                if (output_type == "file"):
                    cv2.imwrite(output_image_path, image_copy)

                if (extract_detected_objects == True):
                    if (output_type == "file"):
                        return detections, detected_objects_image_array
                    elif (output_type == "array"):
                        return image_copy, detections, detected_objects_image_array

                else:
                    if (output_type == "file"):
                        return detections
                    elif (output_type == "array"):
                        return image_copy, detections

            except:
                raise ValueError(
                    "Ensure you specified correct input image, input type, output type and/or output image path ")
Example #3
0
def main(args):
    categories = cfg.CATEGORIES
    num_classes = len(categories)

    device = torch.device(args.device)
    model = vgg19(pretrained=True).train().to(device)

    # change the last layer for finetuning
    classifier = model.classifier
    num_ftrs = classifier[-1].in_features
    new_classifier = torch.nn.Sequential(
        *(list(model.classifier.children())[:-1]),
        nn.Linear(num_ftrs, num_classes).to(device))
    model.classifier = new_classifier
    model.train()

    batch_size = args.batch_size
    epoch_size = args.epoch_size

    #use values to normiulize the input as in torchvision ImageNet guide
    mean = cfg.MEAN
    std = cfg.STD

    batch_size_dict = {'train': batch_size, 'test': batch_size}
    rds = data.RawDataset(root_dir=args.dataset_path,
                          num_workers=args.workers_num,
                          output_dims=cfg.INPUT_DIMS,
                          batch_size_dict=batch_size_dict)

    test_first = bool(args.test_first)

    cl_factor = args.cl_loss_factor
    am_factor = args.am_loss_factor

    epochs = args.nepoch
    loss_fn = torch.nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # a GAIN model which saves the chosen classification model and calculates
    # the gradients w.r.t the grad_layer and performs GAIN algorithm
    gain = batch_GAIN_VOC(model=model,
                          grad_layer='features',
                          num_classes=num_classes,
                          pretraining_epochs=args.npretrain,
                          test_first=test_first,
                          grads_off=bool(args.grads_off),
                          grads_magnitude=args.grads_magnitude,
                          device=device)

    i = 0
    num_train_samples = 0

    chkpnt_epoch = 0
    if len(args.checkpoint_file_path_load) > 0:
        checkpoint = torch.load('args.checkpoint_file_path_load')
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        chkpnt_epoch = checkpoint['epoch'] + 1
        i = checkpoint['iteration'] + 1
        num_train_samples = checkpoint['num_train_samples']

    writer = SummaryWriter(
        args.logging_path + args.logging_name + '_' +
        datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

    writer.add_text('Start', 'start')

    print('Started')

    for epoch in range(chkpnt_epoch, epochs):

        total_train_single_accuracy = 0
        total_test_single_accuracy = 0

        epoch_train_cl_loss = 0

        model.train(True)

        if not test_first or (test_first and epoch != 0):

            total_train_single_accuracy = 0
            total_test_single_accuracy = 0

            epoch_train_am_loss = 0
            epoch_train_cl_loss = 0
            epoch_train_total_loss = 0

            for sample in rds.datasets['rnd_train']:
                augmented_batch = []
                batch, augmented = preprocess_image(
                    sample[0][0].squeeze().cpu().detach().numpy(),
                    train=True,
                    mean=mean,
                    std=std)
                augmented_batch.append(augmented)
                for img in sample[0][1:]:
                    input_tensor, augmented_image = preprocess_image(
                        img.squeeze().cpu().detach().numpy(),
                        train=True,
                        mean=mean,
                        std=std)
                    batch = torch.cat((batch, input_tensor), dim=0)
                    augmented_batch.append(augmented_image)
                batch = batch.to(device)

                optimizer.zero_grad()

                labels = sample[2]

                logits_cl, logits_am, heatmap, masked_image, mask = gain(
                    batch, sample[1])

                class_onehot = torch.stack(sample[1]).float()

                cl_loss = loss_fn(logits_cl, class_onehot)

                am_scores = nn.Softmax(dim=1)(logits_am)
                batch_am_lables = []
                batch_am_labels_scores = []
                for k in range(len(batch)):
                    num_of_labels = len(sample[2][k])
                    _, am_labels = am_scores[k].topk(num_of_labels)
                    batch_am_lables.append(am_labels)
                    am_labels_scores = am_scores[k].view(-1)[
                        labels[k]].sum() / num_of_labels
                    batch_am_labels_scores.append(am_labels_scores)
                am_loss = sum(batch_am_labels_scores) / batch_size

                # g = make_dot(am_loss, dict(gain.named_parameters()), show_attrs = True, show_saved = True)
                # g.save('grad_viz', train_path)

                total_loss = cl_loss * cl_factor + am_loss * am_factor

                epoch_train_am_loss += (am_loss *
                                        am_factor).detach().cpu().item()
                epoch_train_cl_loss += (cl_loss *
                                        cl_factor).detach().cpu().item()
                epoch_train_total_loss += total_loss.detach().cpu().item()

                writer.add_scalar('Per_Step/train/cl_loss',
                                  (cl_loss * cl_factor).detach().cpu().item(),
                                  i)
                writer.add_scalar('Per_Step/train/am_loss',
                                  (am_loss * am_factor).detach().cpu().item(),
                                  i)
                writer.add_scalar('Per_Step/train/total_loss',
                                  total_loss.detach().cpu().item(), i)

                loss = cl_loss * cl_factor
                if gain.AM_enabled():
                    loss += am_loss * am_factor

                loss.backward()
                optimizer.step()

                # Single label evaluation
                for k in range(len(batch)):
                    num_of_labels = len(sample[2][k])
                    _, y_pred = logits_cl[k].detach().topk(k=num_of_labels)
                    y_pred = y_pred.view(-1)
                    gt = torch.tensor(sorted(sample[2][k]), device=device)

                    acc = (y_pred == gt).sum()
                    total_train_single_accuracy += acc.detach().cpu()

                # Multi label evaluation
                #_, y_pred_multi = logits_cl.detach().topk(num_of_labels)
                #y_pred_multi = y_pred_multi.view(-1)
                #acc_multi = (y_pred_multi == gt).sum() / num_of_labels
                #total_train_multi_accuracy += acc_multi.detach().cpu()

                if i % args.record_itr_train == 0:
                    for t in range(args.nrecord):
                        num_of_labels = len(sample[2][t])
                        one_heatmap = heatmap[t].squeeze().cpu().detach(
                        ).numpy()

                        one_augmented_im = torch.tensor(
                            np.array(
                                augmented_batch[t])).to(device).unsqueeze(0)
                        one_masked_image = masked_image[t].detach().squeeze()
                        htm = deprocess_image(one_heatmap)
                        visualization, red_htm = show_cam_on_image(
                            one_augmented_im.cpu().detach().numpy(), htm, True)

                        viz = torch.from_numpy(visualization).to(device)
                        masked_im = denorm(one_masked_image, mean, std)
                        masked_im = (masked_im.squeeze().permute([1, 2, 0])
                            .cpu().detach().numpy() * 255).round()\
                            .astype(np.uint8)

                        orig = sample[0][t].unsqueeze(0)
                        masked_im = torch.from_numpy(masked_im).unsqueeze(
                            0).to(device)
                        orig_viz = torch.cat(
                            (orig, one_augmented_im, viz, masked_im), 0)
                        grid = torchvision.utils.make_grid(
                            orig_viz.permute([0, 3, 1, 2]))
                        gt = [categories[x] for x in labels[t]]
                        writer.add_image(tag='Train_Heatmaps/image_' + str(i) +
                                         '_' + str(t) + '_' + '_'.join(gt),
                                         img_tensor=grid,
                                         global_step=epoch,
                                         dataformats='CHW')
                        y_scores = nn.Softmax()(logits_cl[t].detach())
                        _, predicted_categories = y_scores.topk(num_of_labels)
                        predicted_cl = [(categories[x],
                                         format(y_scores.view(-1)[x], '.4f'))
                                        for x in predicted_categories.view(-1)]
                        labels_cl = [(categories[x],
                                      format(y_scores.view(-1)[x], '.4f'))
                                     for x in labels[t]]
                        import itertools
                        predicted_cl = list(itertools.chain(*predicted_cl))
                        labels_cl = list(itertools.chain(*labels_cl))
                        cl_text = 'cl_gt_' + '_'.join(
                            labels_cl) + '_pred_' + '_'.join(predicted_cl)

                        predicted_am = [(categories[x],
                                         format(am_scores[t].view(-1)[x],
                                                '.4f'))
                                        for x in batch_am_lables[t].view(-1)]
                        labels_am = [(categories[x],
                                      format(am_scores.view(-1)[x], '.4f'))
                                     for x in labels[t]]
                        import itertools
                        predicted_am = list(itertools.chain(*predicted_am))
                        labels_am = list(itertools.chain(*labels_am))
                        am_text = '_am_gt_' + '_'.join(
                            labels_am) + '_pred_' + '_'.join(predicted_am)

                        writer.add_text('Train_Heatmaps_Description/image_' +
                                        str(i) + '_' + str(t) + '_' +
                                        '_'.join(gt),
                                        cl_text + am_text,
                                        global_step=epoch)
                i += 1

                if epoch == 0 and test_first == False:
                    num_train_samples += 1
                if epoch == 1 and test_first == True:
                    num_train_samples += 1

                if i % epoch_size == 0:
                    break

        model.train(False)
        j = 0

        for sample in rds.datasets['seq_test']:

            batch, _ = preprocess_image(
                sample[0][0].squeeze().cpu().detach().numpy(),
                train=False,
                mean=mean,
                std=std)
            for img in sample[0][1:]:
                input_tensor, input_image = preprocess_image(
                    img.squeeze().cpu().detach().numpy(),
                    train=False,
                    mean=mean,
                    std=std)
                batch = torch.cat((batch, input_tensor), dim=0)
            batch = batch.to(device)
            labels = sample[2]

            logits_cl, logits_am, heatmap, masked_image, mask = gain(
                batch, sample[1])

            am_scores = nn.Softmax(dim=1)(logits_am)
            batch_am_lables = []
            for k in range(len(batch)):
                num_of_labels = len(sample[2][k])
                _, am_labels = am_scores[k].topk(num_of_labels)
                batch_am_lables.append(am_labels)

            # Single label evaluation
            for k in range(len(batch)):
                num_of_labels = len(sample[2][k])
                _, y_pred = logits_cl[k].detach().topk(k=num_of_labels)
                y_pred = y_pred.view(-1)
                gt = torch.tensor(sorted(sample[2][k]), device=device)

                acc = (y_pred == gt).sum()
                total_test_single_accuracy += acc.detach().cpu()

            if j % args.record_itr_test == 0:
                for t in range(args.nrecord):
                    num_of_labels = len(sample[2][t])
                    one_heatmap = heatmap[t].squeeze().cpu().detach().numpy()
                    one_input_image = sample[0][t].cpu().detach().numpy()
                    one_masked_image = masked_image[t].detach().squeeze()
                    htm = deprocess_image(one_heatmap)
                    visualization, heatmap = show_cam_on_image(
                        one_input_image, htm, True)
                    viz = torch.from_numpy(visualization).unsqueeze(0).to(
                        device)
                    augmented = torch.tensor(one_input_image).unsqueeze(0).to(
                        device)
                    masked_image = denorm(one_masked_image, mean, std)
                    masked_image = (masked_image.squeeze().permute(
                        [1, 2, 0]).cpu().detach().numpy() *
                                    255).round().astype(np.uint8)
                    orig = sample[0][t].unsqueeze(0)
                    masked_image = torch.from_numpy(masked_image).unsqueeze(
                        0).to(device)
                    orig_viz = torch.cat((orig, augmented, viz, masked_image),
                                         0)
                    grid = torchvision.utils.make_grid(
                        orig_viz.permute([0, 3, 1, 2]))
                    gt = [categories[x] for x in labels[t]]
                    writer.add_image(tag='Test_Heatmaps/image_' + str(j) +
                                     '_' + '_'.join(gt),
                                     img_tensor=grid,
                                     global_step=epoch,
                                     dataformats='CHW')
                    y_scores = nn.Softmax()(logits_cl[0].detach())
                    _, predicted_categories = y_scores.topk(num_of_labels)
                    predicted_cl = [(categories[x],
                                     format(y_scores.view(-1)[x], '.4f'))
                                    for x in predicted_categories.view(-1)]
                    labels_cl = [(categories[x],
                                  format(y_scores.view(-1)[x], '.4f'))
                                 for x in labels[t]]
                    import itertools
                    predicted_cl = list(itertools.chain(*predicted_cl))
                    labels_cl = list(itertools.chain(*labels_cl))
                    cl_text = 'cl_gt_' + '_'.join(
                        labels_cl) + '_pred_' + '_'.join(predicted_cl)

                    predicted_am = [(categories[x],
                                     format(am_scores[0].view(-1)[x], '.4f'))
                                    for x in batch_am_lables[0].view(-1)]
                    labels_am = [(categories[x],
                                  format(am_scores.view(-1)[x], '.4f'))
                                 for x in labels[t]]
                    import itertools
                    predicted_am = list(itertools.chain(*predicted_am))
                    labels_am = list(itertools.chain(*labels_am))
                    am_text = '_am_gt_' + '_'.join(
                        labels_am) + '_pred_' + '_'.join(predicted_am)

                    writer.add_text('Test_Heatmaps_Description/image_' +
                                    str(j) + '_' + '_'.join(gt),
                                    cl_text + am_text,
                                    global_step=epoch)

            j += 1

        num_test_samples = len(rds.datasets['seq_test']) * batch_size
        print("finished epoch number:")
        print(epoch)

        if (test_first and epoch > 0) or test_first == False:
            writer.add_scalar(
                'Loss/train/cl_total_loss',
                epoch_train_cl_loss / (num_train_samples * batch_size), epoch)
            writer.add_scalar('Loss/train/am_tota_loss',
                              epoch_train_am_loss / num_train_samples, epoch)
            writer.add_scalar('Loss/train/combined_total_loss',
                              epoch_train_total_loss / num_train_samples,
                              epoch)
            writer.add_scalar(
                'Accuracy/train/cl_accuracy',
                total_train_single_accuracy / (num_train_samples * batch_size),
                epoch)

        writer.add_scalar('Accuracy/test/cl_accuracy',
                          total_test_single_accuracy / num_test_samples, epoch)

        gain.increase_epoch_count()

        if len(args.checkpoint_file_path_save
               ) > 0 and epoch % args.checkpoint_nepoch == 0:
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'iteration': i,
                    'num_train_samples': num_train_samples
                }, args.checkpoint_file_path_save +
                datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
Example #4
0
    'train': 18,
    'tvmonitor': 19
}
labels_to_names = {}
for key, value in voc_classes.items():
    labels_to_names[value] = key
# load image
image_paths = glob.glob('datasets/voc_test/VOC2007/JPEGImages/*.jpg')
for image_path in image_paths:
    image = read_image_bgr(image_path)

    # copy to draw on
    draw = image.copy()

    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)

    # process image
    start = time.time()
    # locations, feature_shapes = model.predict_on_batch(np.expand_dims(image, axis=0))
    boxes, scores, labels = model.predict_on_batch(
        np.expand_dims(image, axis=0))
    print("processing time: ", time.time() - start)

    # correct for image scale
    boxes /= scale
    labels_to_locations = {}
    # visualize detections
    for box, score, label in zip(boxes[0], scores[0], labels[0]):
        # scores are sorted so we can break
Example #5
0
 def preprocess_image(self, image):
     return preprocess_image(image)
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        """
        Produce image detection predictions.

        Parameters
        ----------
            inputs  : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, 
                      and bounding box for each image.

        Returns
        -------
            outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as 
                      a string (8 coordinate format), and confidence scores.
        """
        iou_threshold = 0.5     # Bounding box overlap threshold for false positive or true positive
        score_threshold = 0.05  # The score confidence threshold to use for detections
        max_detections = 100    # Maxmimum number of detections to use per image

        # Convert training model to inference model
        inference_model = models.convert_model(self.training_model)

        # Generate image paths
        image_cols = inputs.metadata.get_columns_with_semantic_type('https://metadata.datadrivendiscovery.org/types/FileName')
        self.base_dir = [inputs.metadata.query((metadata_base.ALL_ELEMENTS, t))['location_base_uris'][0].replace('file:///', '/') for t in image_cols]
        self.image_paths = np.array([[os.path.join(self.base_dir, filename) for filename in inputs.iloc[:,col]] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten()
        self.image_paths = pd.Series(self.image_paths)

        # Initialize output objects
        box_list = []
        score_list = []
        image_name_list = []

        # Predict bounding boxes and confidence scores for each image
        image_list = [x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i]

        start_time = time.time()
        print('Starting testing...', file = sys.__stdout__)

        for i in image_list:
            image = read_image_bgr(i)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            boxes, scores, labels = inference_model.predict_on_batch(np.expand_dims(image, axis = 0))

            # correct for image scale
            boxes /= scale

            for box, score in zip(boxes[0], scores[0]):
                if score < 0.5:
                    break
    
                b = box.astype(int)
                box_list.append(b)
                score_list.append(score)
                image_name_list.append(i * len(b))

        print(f'Testing complete. Testing took {time.time()-start_time} seconds.', file = sys.__stdout__)
        
        ## Convert predicted boxes from a list of arrays to a list of strings
        boxes = np.array(box_list).tolist()
        boxes = list(map(lambda x : [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes))  # Convert to 8 coordinate format for D3M            
        boxes = list(map(lambda x : ",".join(map(str, x)), boxes))

        # Create mapping between image names and D3M index
        input_df = pd.DataFrame({
            'd3mIndex': inputs.d3mIndex,
            'image': [os.path.basename(list) for list in self.image_paths]
        })

        d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list')

        # Extract values for image name keys and get missing image predictions (if they exist)
        image_name_list = [os.path.basename(list) for list in image_name_list]
        d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list]
        empty_predictions_image_names = [k for k,v in d3mIdx_image_mapping.items() if v not in d3mIdx]
        d3mIdx = [item for sublist in d3mIdx for item in sublist]   # Flatten list of lists

        ## Assemble in a Pandas DataFrame
        results = pd.DataFrame({
            'd3mIndex': d3mIdx,
            'bounding_box': boxes,
            'confidence': score_list
        })

        # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return 
        # predictions for an image, create a dummy empty prediction row to add to results_df for that
        # missing image.
        if len(empty_predictions_image_names) != 0:
            # Create data frame of empty predictions for missing each image and concat with results.
            # Sort results_df.
            empty_predictions_df = self._fill_empty_predictions(empty_predictions_image_names, d3mIdx_image_mapping)
            results_df = pd.concat([results, empty_predictions_df]).sort_values('d3mIndex')

        # Convert to DataFrame container
        results_df = d3m_DataFrame(results_df)
        
        ## Assemble first output column ('d3mIndex)
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'd3mIndex'
        col_dict['semantic_types'] = ('http://schema.org/Integer', 
                                      'https://metadata.datadrivendiscovery.org/types/PrimaryKey')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 0), col_dict)

        ## Assemble second output column ('bounding_box')
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'bounding_box'
        col_dict['semantic_types'] = ('http://schema.org/Text', 
                                      'https://metadata.datadrivendiscovery.org/types/PredictedTarget', 
                                      'https://metadata.datadrivendiscovery.org/types/BoundingPolygon')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 1), col_dict)

        ## Assemble third output column ('confidence')
        col_dict = dict(results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2)))
        col_dict['structural_type'] = type("1")
        col_dict['name'] = 'confidence'
        col_dict['semantic_types'] = ('http://schema.org/Integer', 
                                      'https://metadata.datadrivendiscovery.org/types/Score')
        results_df.metadata = results_df.metadata.update((metadata_base.ALL_ELEMENTS, 2), col_dict) 
        
        return CallResult(results_df)