Beispiel #1
0
def non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5, score_threshold=0.3):
    """
    Applies Non-max suppression (NMS) to a set of boxes

    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    score_threshold -- real value, minimum score used for NMS filtering

    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box

    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """

    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold, score_threshold)

    # Use K.gather() to select only nms_indices from scores, boxes and classes
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)

    return scores, boxes, classes
Beispiel #2
0
    def call(self, inputs, training=None):
        class_labels = K.squeeze(inputs[1], axis=1)
        inputs = inputs[0]
        input_shape = K.int_shape(inputs)
        reduction_axes = list(range(0, len(input_shape)))

        if self.axis is not None:
            del reduction_axes[self.axis]

        del reduction_axes[0]

        normed = inputs

        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[0] = K.shape(inputs)[0]
        if self.axis is not None:
            broadcast_shape[self.axis] = input_shape[self.axis]

        if self.scale:
            broadcast_gamma = K.reshape(K.gather(self.gamma, class_labels), broadcast_shape)
            normed = normed * broadcast_gamma
        if self.center:
            broadcast_beta = K.reshape(K.gather(self.beta, class_labels), broadcast_shape)
            normed = normed + broadcast_beta
        return normed
Beispiel #3
0
    def call(self, inputs, **kwargs):
        backend = retina_net_tensorflow_backend()

        boxes, classification, detections = inputs

        # TODO: support batch size > 1.
        boxes = boxes[0]
        classification = classification[0]
        detections = detections[0]

        scores = K.max(classification, axis=1)

        # selecting best anchors theoretically improves speed at the cost of minor performance
        if self.top_k:
            scores, indices = backend.top_k(scores, self.top_k, sorted=False)
            boxes = K.gather(boxes, indices)
            classification = K.gather(classification, indices)
            detections = K.gather(detections, indices)

        indices = backend.non_max_suppression(boxes,
                                              scores,
                                              max_output_size=self.max_boxes,
                                              iou_threshold=self.nms_threshold)

        detections = K.gather(detections, indices)
        return K.expand_dims(detections, axis=0)
Beispiel #4
0
    def call(self, inputs):
        def apply_separate_filter_for_each_batch(inputs):
            kernel = inputs[1]
            x = K.expand_dims(inputs[0], axis=0)
            outputs = K.conv2d(
                        x,
                        kernel,
                        strides=self.strides,
                        padding=self.padding,
                        data_format=self.data_format,
                        dilation_rate=self.dilation_rate)
            if self.bias is not None:
                bias = inputs[2]
                outputs = K.bias_add(outputs, bias, data_format=self.data_format)
            return K.squeeze(outputs, axis=0)
        x = inputs[0]
        classes = K.squeeze(inputs[1], axis=1)

        if self.bias is not None:
            outputs = K.map_fn(apply_separate_filter_for_each_batch,
                          [x, K.gather(self.kernel, classes), K.gather(self.bias, classes)], dtype='float32')
        else:
            outputs = K.map_fn(apply_separate_filter_for_each_batch,
                          [x, K.gather(self.kernel, classes)], dtype='float32')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
Beispiel #5
0
def YOLOEval(yolo_outputs,
             anchors,
             num_classes,
             image_shape,
             max_boxes=20,
             score_threshold=.6,
             iou_threshold=.5):
    '''Returns evaluated filtered boxes based on given input.'''
    num_layers = len(yolo_outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []

    for i in range(num_layers):
        _boxes, _box_scores = YOLOBoxesAndScores(yolo_outputs[i],
                                                 anchors[anchor_mask[i]],
                                                 num_classes, input_shape,
                                                 image_shape)

        boxes.append(_boxes)
        box_scores.append(_box_scores)

    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')

    boxes_, scores_, classes_ = [], [], []

    for i in range(num_classes):
        _class_boxes = tf.boolean_mask(boxes, mask[:, i])
        _class_boxes_scores = tf.boolean_mask(box_scores[:, i], mask[:, i])

        _nms_index = tf.image.non_max_suppression(_class_boxes,
                                                  _class_boxes_scores,
                                                  max_boxes_tensor,
                                                  iou_threshold=iou_threshold)

        _class_boxes = K.gather(_class_boxes, _nms_index)
        _class_boxes_scores = K.gather(_class_boxes_scores, _nms_index)
        _classes = K.ones_like(_class_boxes_scores, dtype='int32') * i

        boxes_.append(_class_boxes)
        scores_.append(_class_boxes_scores)
        classes_.append(_classes)

    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Beispiel #6
0
def yolo_eval(yolo_outputs,
              image_shape,
              max_boxes=10,
              score_threshold=.6,
              iou_threshold=.5):
    """ Evaluate YOLO model on given input batch and return filtered boxes.

    Parameters
    ----------
    yolo_outputs: Tuple
        Contains box_confidence, box_xy, box_wh, box_class_probs variables from yolo_head function
    image_shape: tf.Tensor
        A Tensor contains image shapes
    max_boxes: int
        Maximum boxes
    score_threshold: float
        Probability threshold value
    iou_threshold: float
        IOU threshold value

    Returns
    -------
    boxes, scores, classes: (tf.Tensor, tf.Tensor, tf.Tensor)
        A tuple of Tensors contains boxes, scores and classes.

    """

    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs
    boxes = boxes_to_corners(box_xy, box_wh)
    boxes, scores, classes = filter_boxes(box_confidence,
                                          boxes,
                                          box_class_probs,
                                          threshold=score_threshold)

    # Scale boxes back to original image shape.
    height, width = image_shape[0], image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims

    # TODO: Something must be done about this ugly hack!
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    nms_index = tf.image.non_max_suppression(boxes,
                                             scores,
                                             max_boxes_tensor,
                                             iou_threshold=iou_threshold)
    boxes = K.gather(boxes, nms_index)
    scores = K.gather(scores, nms_index)
    classes = K.gather(classes, nms_index)

    return boxes, scores, classes
Beispiel #7
0
def yolo_eval(
        yolo_outputs,  #通过nms生成相对大小的预测框
        anchors,
        num_classes,
        image_shape,
        max_boxes=50,
        score_threshold=.6,
        iou_threshold=.5):
    """Evaluate YOLO model on given input and return filtered boxes."""
    num_layers = len(yolo_outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[
        3, 4, 5
    ], [1, 2, 3]]  # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(class_boxes,
                                                 class_box_scores,
                                                 max_boxes_tensor,
                                                 iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Beispiel #8
0
    def call(self, inputs):
        if self.data_format is None:
            data_format = self.data_format
        if self.data_format not in {'channels_first', 'channels_last'}:
            raise ValueError('Unknown data_format ' + str(data_format))

        strides = (1,) + self.strides + (1,)

        x = inputs[0]
        cls = K.squeeze(inputs[1], axis=-1)

        #Kernel preprocess
        kernel = K.gather(self.kernel, cls)
        #(bs, w, h, c)
        kernel = tf.transpose(kernel, [1, 2, 3, 0])
        #(w, h, c, bs)
        kernel = K.reshape(kernel, (self.kernel_size[0], self.kernel_size[1], -1))
        #(w, h, c * bs)
        kernel = K.expand_dims(kernel, axis=-1)
        #(w, h, c * bs, 1)

        if self.data_format == 'channles_first':
            x = tf.transpose(x, [0, 2, 3, 1])
        bs, w, h, c = K.int_shape(x)
        #(bs, w, h, c)
        x = tf.transpose(x, [1, 2, 3, 0])
        #(w, h, c, bs)
        x = K.reshape(x, (w, h, -1))
        #(w, h, c * bs)
        x = K.expand_dims(x, axis=0)
        #(1, w, h, c * bs)

        padding = _preprocess_padding(self.padding)

        outputs = tf.nn.depthwise_conv2d(x, kernel,
                                         strides=strides,
                                         padding=padding,
                                         rate=self.dilation_rate)
        #(1, w, h, c * bs)
        _, w, h, _ = K.int_shape(outputs)
        outputs = K.reshape(outputs, [w, h, self.filters, -1])
        #(w, h, c, bs)
        outputs = tf.transpose(outputs, [3, 0, 1, 2])
        #(bs, w, h, c)

        if self.bias is not None:
            #(num_cls, out)
            bias = tf.gather(self.bias, cls)
            #(bs, bias)
            bias = tf.expand_dims(bias, axis=1)
            bias = tf.expand_dims(bias, axis=1)
            #(bs, bias, 1, 1)
            outputs += bias

        if self.data_format == 'channles_first':
            outputs = tf.transpose(outputs, [0, 3, 1, 2])

        if self.activation is not None:
            return self.activation(outputs)
        return outputs
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
         
     def _l2normalize(v, eps=1e-12):
         return v / (K.sum(v ** 2) ** 0.5 + eps)
     def power_iteration(W, u):
         #Accroding the paper, we only need to do power iteration one time.
         _u = u
         _v = _l2normalize(K.dot(_u, K.transpose(W)))
         _u = _l2normalize(K.dot(_v, W))
         return _u, _v
     W_shape = self.embeddings.shape.as_list()
     #Flatten the Tensor
     W_reshaped = K.reshape(self.embeddings, [-1, W_shape[-1]])
     _u, _v = power_iteration(W_reshaped, self.u)
     #Calculate Sigma
     sigma=K.dot(_v, W_reshaped)
     sigma=K.dot(sigma, K.transpose(_u))
     #normalize it
     W_bar = W_reshaped / sigma
     #reshape weight tensor
     if training in {0, False}:
         W_bar = K.reshape(W_bar, W_shape)
     else:
         with tf.control_dependencies([self.u.assign(_u)]):
             W_bar = K.reshape(W_bar, W_shape)
     self.embeddings = W_bar
         
     out = K.gather(self.embeddings, inputs)
     return out 
Beispiel #10
0
def compute_mask_loss(boxes,
                      masks,
                      annotations,
                      masks_target,
                      width,
                      height,
                      iou_threshold=0.5,
                      mask_size=(28, 28)):
    """compute overlap of boxes with annotations"""
    iou = overlap(boxes, annotations)
    argmax_overlaps_inds = K.argmax(iou, axis=1)
    max_iou = K.max(iou, axis=1)

    # filter those with IoU > 0.5
    indices = tf.where(K.greater_equal(max_iou, iou_threshold))
    boxes = tf.gather_nd(boxes, indices)
    masks = tf.gather_nd(masks, indices)
    argmax_overlaps_inds = K.cast(tf.gather_nd(argmax_overlaps_inds, indices), 'int32')
    labels = K.cast(K.gather(annotations[:, 4], argmax_overlaps_inds), 'int32')

    # make normalized boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    boxes = K.stack([
        y1 / (K.cast(height, dtype=K.floatx()) - 1),
        x1 / (K.cast(width, dtype=K.floatx()) - 1),
        (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1),
        (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1),
    ], axis=1)

    # crop and resize masks_target
    # append a fake channel dimension
    masks_target = K.expand_dims(masks_target, axis=3)
    masks_target = tf.image.crop_and_resize(
        masks_target,
        boxes,
        argmax_overlaps_inds,
        mask_size
    )
    masks_target = masks_target[:, :, :, 0]  # remove fake channel dimension

    # gather the predicted masks using the annotation label
    masks = tf.transpose(masks, (0, 3, 1, 2))
    label_indices = K.stack([tf.range(K.shape(labels)[0]), labels], axis=1)

    masks = tf.gather_nd(masks, label_indices)

    # compute mask loss
    mask_loss = K.binary_crossentropy(masks_target, masks)
    normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(masks)[2]
    normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1)
    mask_loss = K.sum(mask_loss) / normalizer

    return mask_loss
Beispiel #11
0
    def call(self, inputs):
        classes = K.squeeze(inputs[1], axis=1)
        kernel = K.gather(self.kernel, classes)
        #(bs, in, out)

        x = K.expand_dims(inputs[0], axis=1)
        #(bs, 1, in)
        output = tf.matmul(x, kernel)
        #(bs, 1, out)
        output = K.squeeze(output, axis=1)
        #(bs, out)

        if self.bias is not None:
            b = K.gather(self.bias, classes)
            output += b

        if self.activation is not None:
            return self.activation(output)
        return output
Beispiel #12
0
    def _filter_detections(scores, labels):
        # threshold based on score
        indices = tf.where(K.greater(scores, score_threshold))

        if nms:
            filtered_boxes = tf.gather_nd(boxes, indices)
            filtered_scores = K.gather(scores, indices)[:, 0]

            # perform NMS
            nms_indices = tf.image.non_max_suppression(
                filtered_boxes,
                filtered_scores,
                max_output_size=max_detections,
                iou_threshold=nms_threshold)

            # filter indices based on NMS
            indices = K.gather(indices, nms_indices)

        # add indices to list of all indices
        labels = tf.gather_nd(labels, indices)
        indices = K.stack([indices[:, 0], labels], axis=1)

        return indices
Beispiel #13
0
def get_total_loss(content_losses, style_losses, total_var_loss,
                   content_weights, style_weights, tv_weights, class_targets):
    total_loss = K.variable(0.)

    # Compute content losses
    for loss in content_losses:
        weighted_loss = K.mean(K.gather(content_weights, class_targets) * loss)
        weighted_content_losses.append(weighted_loss)
        total_loss += weighted_loss

    # Compute style losses
    for loss in style_losses:
        weighted_loss = K.mean(K.gather(style_weights, class_targets) * loss)
        weighted_style_losses.append(weighted_loss)
        total_loss += weighted_loss

    # Compute tv loss
    weighted_tv_loss = K.mean(
        K.gather(tv_weights, class_targets) * total_var_loss)
    total_loss += weighted_tv_loss

    return (total_loss, weighted_content_losses, weighted_style_losses,
            weighted_tv_loss)
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     if self.dropout < 0. or self.dropout > 1.:
         logging.warning('WARNING: value of dropout not in [0, 1), '
                         'automatically set to 0.')
         self.dropout = 0.
     if 0. < self.dropout < 1. and self.training:
         retain_p = 1. - self.dropout
         self.B = K.random_binomial((self.input_dim,), p=retain_p) * \
                  (1. / retain_p)
         self.B = K.expand_dims(self.B)
         self.W = self.embeddings * self.B
     else:
         self.W = self.embeddings
     out = K.gather(self.W, inputs)
     return out
    def call(self, inputs, training=None):
        class_labels = K.squeeze(inputs[1], axis=1)
        inputs = inputs[0]
        input_shape = K.int_shape(inputs)
        # Prepare broadcasting shape.
        ndim = len(input_shape)
        reduction_axes = list(range(len(input_shape)))
        del reduction_axes[self.axis]
        broadcast_shape = [1] * len(input_shape)
        broadcast_shape[self.axis] = input_shape[self.axis]

        # Determines whether broadcasting is needed.
        needs_broadcasting = (sorted(reduction_axes) != range(ndim)[:-1])

        param_broadcast = [1] * len(input_shape)
        param_broadcast[self.axis] = input_shape[self.axis]
        param_broadcast[0] = K.shape(inputs)[0]
        if self.scale:
            broadcast_gamma = K.reshape(K.gather(self.gamma, class_labels),
                                        param_broadcast)
        else:
            broadcast_gamma = None

        if self.center:
            broadcast_beta = K.reshape(K.gather(self.beta, class_labels),
                                       param_broadcast)
        else:
            broadcast_beta = None

        normed, mean, variance = K.normalize_batch_in_training(
            inputs,
            gamma=None,
            beta=None,
            reduction_axes=reduction_axes,
            epsilon=self.epsilon)

        if training in {0, False}:
            return normed
        else:
            self.add_update([
                K.moving_average_update(self.moving_mean, mean, self.momentum),
                K.moving_average_update(self.moving_variance, variance,
                                        self.momentum)
            ], inputs)

            def normalize_inference():
                if needs_broadcasting:
                    # In this case we must explictly broadcast all parameters.
                    broadcast_moving_mean = K.reshape(self.moving_mean,
                                                      broadcast_shape)
                    broadcast_moving_variance = K.reshape(
                        self.moving_variance, broadcast_shape)
                    return K.batch_normalization(inputs,
                                                 broadcast_moving_mean,
                                                 broadcast_moving_variance,
                                                 beta=None,
                                                 gamma=None,
                                                 epsilon=self.epsilon)
                else:
                    return K.batch_normalization(inputs,
                                                 self.moving_mean,
                                                 self.moving_variance,
                                                 beta=None,
                                                 gamma=None,
                                                 epsilon=self.epsilon)

        # Pick the normalized form corresponding to the training phase.
        out = K.in_train_phase(normed, normalize_inference, training=training)
        return out * broadcast_gamma + broadcast_beta
Beispiel #16
0
        def _mask(y_true, y_pred, iou_threshold=0.5, mask_size=(28, 28)):
            # split up the different predicted blobs
            boxes = y_pred[:, :, :4]
            masks = y_pred[:, :, 4:]

            # split up the different blobs
            annotations = y_true[:, :, :5]
            width = K.cast(y_true[0, 0, 5], dtype='int32')
            height = K.cast(y_true[0, 0, 6], dtype='int32')
            masks_target = y_true[:, :, 7:]

            # reshape the masks back to their original size
            masks_target = K.reshape(masks_target,
                                     (K.shape(masks_target)[0] *
                                      K.shape(masks_target)[1], height, width))
            masks = K.reshape(masks, (K.shape(masks)[0] * K.shape(masks)[1],
                                      mask_size[0], mask_size[1], -1))

            # batch size > 1 fix
            boxes = K.reshape(boxes, (-1, K.shape(boxes)[2]))
            annotations = K.reshape(annotations, (-1, K.shape(annotations)[2]))

            # compute overlap of boxes with annotations
            iou = overlap(boxes, annotations)
            argmax_overlaps_inds = K.argmax(iou, axis=1)
            max_iou = K.max(iou, axis=1)

            # filter those with IoU > 0.5
            indices = tf.where(K.greater_equal(max_iou, iou_threshold))
            boxes = tf.gather_nd(boxes, indices)
            masks = tf.gather_nd(masks, indices)
            argmax_overlaps_inds = tf.gather_nd(argmax_overlaps_inds, indices)
            argmax_overlaps_inds = K.cast(argmax_overlaps_inds, 'int32')
            labels = K.gather(annotations[:, 4], argmax_overlaps_inds)
            labels = K.cast(labels, 'int32')

            # make normalized boxes
            x1 = boxes[:, 0]
            y1 = boxes[:, 1]
            x2 = boxes[:, 2]
            y2 = boxes[:, 3]
            boxes = K.stack([
                y1 / (K.cast(height, dtype=K.floatx()) - 1),
                x1 / (K.cast(width, dtype=K.floatx()) - 1),
                (y2 - 1) / (K.cast(height, dtype=K.floatx()) - 1),
                (x2 - 1) / (K.cast(width, dtype=K.floatx()) - 1),
            ],
                            axis=1)

            # crop and resize masks_target
            # append a fake channel dimension
            masks_target = K.expand_dims(masks_target, axis=3)
            masks_target = tf.image.crop_and_resize(masks_target, boxes,
                                                    argmax_overlaps_inds,
                                                    mask_size)

            # remove fake channel dimension
            masks_target = masks_target[:, :, :, 0]

            # gather the predicted masks using the annotation label
            masks = tf.transpose(masks, (0, 3, 1, 2))
            label_indices = K.stack([tf.range(K.shape(labels)[0]), labels],
                                    axis=1)
            masks = tf.gather_nd(masks, label_indices)

            # compute mask loss
            mask_loss = K.binary_crossentropy(masks_target, masks)
            normalizer = K.shape(masks)[0] * K.shape(masks)[1] * K.shape(
                masks)[2]
            normalizer = K.maximum(K.cast(normalizer, K.floatx()), 1)
            mask_loss = K.sum(mask_loss) / normalizer

            return mask_loss
Beispiel #17
0
def filter_detections(boxes,
                      classification,
                      other=[],
                      class_specific_filter=True,
                      nms=True,
                      score_threshold=0.05,
                      max_detections=300,
                      nms_threshold=0.5):
    """Filter detections using the boxes and classification values.

    Args:
        boxes: Tensor of shape (num_boxes, 4) containing the boxes in
            (x1, y1, x2, y2) format.
        classification: Tensor of shape (num_boxes, num_classes) containing
            the classification scores.
        other: List of tensors of shape (num_boxes, ...) to filter along
            with the boxes and classification scores.
        class_specific_filter: Whether to perform filtering per class,
            or take the best scoring class and filter those.
        nms: Flag to enable/disable non maximum suppression.
        score_threshold: Threshold used to prefilter the boxes with.
        max_detections: Maximum number of detections to keep.
        nms_threshold: Threshold for the IoU value to determine when a box
            should be suppressed.

    Returns:
        A list of [boxes, scores, labels, other[0], other[1], ...].
        boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2)
            of the non-suppressed boxes.
        scores is shaped (max_detections,) and contains the scores of the
            predicted class.
        labels is shaped (max_detections,) and contains the predicted label.
        other[i] is shaped (max_detections, ...) and contains the filtered
            other[i] data.
        In case there are less than max_detections detections,
            the tensors are padded with -1's.
    """
    def _filter_detections(scores, labels):
        # threshold based on score
        indices = tf.where(K.greater(scores, score_threshold))

        if nms:
            filtered_boxes = tf.gather_nd(boxes, indices)
            filtered_scores = K.gather(scores, indices)[:, 0]

            # perform NMS
            nms_indices = tf.image.non_max_suppression(
                filtered_boxes,
                filtered_scores,
                max_output_size=max_detections,
                iou_threshold=nms_threshold)

            # filter indices based on NMS
            indices = K.gather(indices, nms_indices)

        # add indices to list of all indices
        labels = tf.gather_nd(labels, indices)
        indices = K.stack([indices[:, 0], labels], axis=1)

        return indices

    if class_specific_filter:
        all_indices = []
        # perform per class filtering
        for c in range(K.int_shape(classification)[1]):
            scores = classification[:, c]
            labels = c * tf.ones((K.shape(scores)[0], ), dtype='int64')
            all_indices.append(_filter_detections(scores, labels))

        # concatenate indices to single tensor
        indices = K.concatenate(all_indices, axis=0)
    else:
        scores = K.max(classification, axis=1)
        labels = K.argmax(classification, axis=1)
        indices = _filter_detections(scores, labels)

    # select top k
    scores = tf.gather_nd(classification, indices)
    labels = indices[:, 1]
    scores, top_indices = tf.nn.top_k(scores,
                                      k=K.minimum(max_detections,
                                                  K.shape(scores)[0]))

    # filter input using the final set of indices
    indices = K.gather(indices[:, 0], top_indices)
    boxes = K.gather(boxes, indices)
    labels = K.gather(labels, top_indices)
    other_ = [K.gather(o, indices) for o in other]

    # zero pad the outputs
    pad_size = K.maximum(0, max_detections - K.shape(scores)[0])
    boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
    scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
    labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
    labels = K.cast(labels, 'int32')
    pads = lambda x: [[0, pad_size]] + [[0, 0] for _ in range(1, K.ndim(x))]
    other_ = [tf.pad(o, pads(o), constant_values=-1) for o in other_]

    # set shapes, since we know what they are
    boxes.set_shape([max_detections, 4])
    scores.set_shape([max_detections])
    labels.set_shape([max_detections])
    for o, s in zip(other_, [list(K.int_shape(o)) for o in other]):
        o.set_shape([max_detections] + s[1:])

    return [boxes, scores, labels] + other_