Ejemplo n.º 1
0
def yolo_head(graph, feats, anchors, num_classes):
    with graph.as_default():
        num_anchors = len(anchors)
        anchors_tensor = K.reshape(K.variable(anchors),
                                   [1, 1, 1, num_anchors, 2])

        conv_dims = K.shape(feats)[1:3]
        conv_height_index = K.arange(0, stop=conv_dims[0])
        conv_width_index = K.arange(0, stop=conv_dims[1])
        conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

        conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                                  [conv_dims[0], 1])
        conv_width_index = K.flatten(K.transpose(conv_width_index))
        conv_index = K.transpose(K.stack([conv_height_index,
                                          conv_width_index]))
        conv_index = K.reshape(conv_index,
                               [1, conv_dims[0], conv_dims[1], 1, 2])
        conv_index = K.cast(conv_index, K.dtype(feats))

        feats = K.reshape(
            feats,
            [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
        conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]),
                           K.dtype(feats))

        box_xy = K.sigmoid(feats[..., :2])
        box_wh = K.exp(feats[..., 2:4])
        box_confidence = K.sigmoid(feats[..., 4:5])
        box_class_probs = K.softmax(feats[..., 5:])

        box_xy = (box_xy + conv_index) / conv_dims
        box_wh = box_wh * anchors_tensor / conv_dims

        return box_xy, box_wh, box_confidence, box_class_probs
Ejemplo n.º 2
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
 def _attention_regularizer(self, attention):
     batch_size = K.cast(K.shape(attention)[0], K.floatx())
     input_len = K.shape(attention)[-1]
     indices = K.expand_dims(K.arange(0, input_len), axis=0)
     diagonal = K.expand_dims(K.arange(0, input_len), axis=-1)
     eye = K.cast(K.equal(indices, diagonal), K.floatx())
     return self.attention_regularizer_weight * K.sum(
         K.square(
             K.batch_dot(attention,
                         K.permute_dimensions(attention, (0, 2, 1))) -
             eye)) / batch_size
Ejemplo n.º 4
0
 def test_arange(self):
     for test_value in (-20, 0, 1, 10):
         t_a = KTF.arange(test_value)
         a = KTF.eval(t_a)
         assert np.array_equal(a, np.arange(test_value))
         t_b = KTH.arange(test_value)
         b = KTH.eval(t_b)
         assert np.array_equal(b, np.arange(test_value))
         assert np.array_equal(a, b)
         assert KTF.dtype(t_a) == KTH.dtype(t_b)
     for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)):
         a = KTF.eval(KTF.arange(start, stop, step))
         assert np.array_equal(a, np.arange(start, stop, step))
         b = KTH.eval(KTH.arange(start, stop, step))
         assert np.array_equal(b, np.arange(start, stop, step))
         assert np.array_equal(a, b)
     for dtype in ('int32', 'int64', 'float32', 'float64'):
         for backend in (KTF, KTH):
             t = backend.arange(10, dtype=dtype)
             assert backend.dtype(t) == dtype
Ejemplo n.º 5
0
 def test_arange(self):
     for test_value in (-20, 0, 1, 10):
         t_a = KTF.arange(test_value)
         a = KTF.eval(t_a)
         assert np.array_equal(a, np.arange(test_value))
         t_b = KTH.arange(test_value)
         b = KTH.eval(t_b)
         assert np.array_equal(b, np.arange(test_value))
         assert np.array_equal(a, b)
         assert KTF.dtype(t_a) == KTH.dtype(t_b)
     for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)):
         a = KTF.eval(KTF.arange(start, stop, step))
         assert np.array_equal(a, np.arange(start, stop, step))
         b = KTH.eval(KTH.arange(start, stop, step))
         assert np.array_equal(b, np.arange(start, stop, step))
         assert np.array_equal(a, b)
     for dtype in ('int32', 'int64', 'float32', 'float64'):
         for backend in (KTF, KTH):
             t = backend.arange(10, dtype=dtype)
             assert backend.dtype(t) == dtype
    def call(self, inputs, mask=None, **kwargs):
        input_len = K.shape(inputs)[1]

        if self.attention_type == Attention.ATTENTION_TYPE_ADD:
            e = self._call_additive_emission(inputs)
        elif self.attention_type == Attention.ATTENTION_TYPE_MUL:
            e = self._call_multiplicative_emission(inputs)

        if self.attention_activation is not None:
            e = self.attention_activation(e)
        if self.attention_width is not None:
            if self.history_only:
                lower = K.arange(0, input_len) - (self.attention_width - 1)
            else:
                lower = K.arange(0, input_len) - self.attention_width // 2
            lower = K.expand_dims(lower, axis=-1)
            upper = lower + self.attention_width
            indices = K.expand_dims(K.arange(0, input_len), axis=0)
            e -= 10000.0 * (1.0 - K.cast(lower <= indices, K.floatx()) *
                            K.cast(indices < upper, K.floatx()))
        if mask is not None:
            mask = K.expand_dims(K.cast(mask, K.floatx()), axis=-1)
            e -= 10000.0 * ((1.0 - mask) *
                            (1.0 - K.permute_dimensions(mask, (0, 2, 1))))

        # a_{t} = \text{softmax}(e_t)
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        a = e / K.sum(e, axis=-1, keepdims=True)

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, inputs)
        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(a))

        if self.return_attention:
            return [v, a]
        return v