Ejemplo n.º 1
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""

    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = tf.reshape(tf.constant(anchors),
                                [1, 1, 1, num_anchors, 2])

    grid_shape = tf.shape(feats)[1:3]  # height, width
    grid_y = tf.tile(
        tf.reshape(tf.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = tf.tile(
        tf.reshape(tf.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = tf.concatenate([grid_x, grid_y])
    grid = tf.cast(grid, tf.dtype(feats))

    feats = tf.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (tf.sigmoid(feats[..., :2]) + grid) / tf.cast(
        grid_shape[::-1], tf.dtype(feats))
    box_wh = tf.exp(feats[..., 2:4]) * anchors_tensor / tf.cast(
        input_shape[::-1], tf.dtype(feats))
    box_confidence = tf.sigmoid(feats[..., 4:5])
    box_class_probs = tf.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Ejemplo n.º 2
0
def _preprocess_symbolic_input(x, data_format, mode):
    """Preprocesses a tensor encoding a batch of images.

    Arguments:
      x: Input tensor, 3D or 4D.
      data_format: Data format of the image tensor.
      mode: One of "caffe", "tf" or "torch".
        - caffe: will convert the images from RGB to BGR,
            then will zero-center each color channel with
            respect to the ImageNet dataset,
            without scaling.
        - tf: will scale pixels between -1 and 1,
            sample-wise.
        - torch: will scale pixels between 0 and 1 and then
            will normalize each channel with respect to the
            ImageNet dataset.

    Returns:
        Preprocessed tensor.
    """
    if mode == 'tf':
        x /= 127.5
        x -= 1.
        return x

    if mode == 'torch':
        x /= 255.
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
    else:
        if data_format == 'channels_first':
            # 'RGB'->'BGR'
            if backend.ndim(x) == 3:
                x = x[::-1, ...]
            else:
                x = x[:, ::-1, ...]
        else:
            # 'RGB'->'BGR'
            x = x[..., ::-1]
        mean = [103.939, 116.779, 123.68]
        std = None

    mean_tensor = tf.constant(-np.array(mean))

    # Zero-center by mean pixel
    if tf.dtype(x) != tf.dtype(mean_tensor):
        x = tf.nn.bias_add(x,
                           tf.cast(mean_tensor, tf.dtype(x)),
                           data_format=data_format)
    else:
        x = tf.nn.bias_add(x, mean_tensor, data_format)
    if std is not None:
        x /= std
    return x
Ejemplo n.º 3
0
 def loop_body(b, ignore_mask):
     true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                object_mask_bool[b, ..., 0])
     iou = box_iou(pred_box[b], true_box)
     best_iou = tf.max(iou, axis=-1)
     ignore_mask = ignore_mask.write(
         b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box)))
     return b + 1, ignore_mask
Ejemplo n.º 4
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [tf.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay:
            lr = lr * (1. / (1. + self.decay *
                             tf.cast(self.iterations, tf.dtype(self.decay))))

        t = tf.cast(self.iterations, tf.float32) + 1.
        beta_1 = self.beta_1
        beta_2 = self.beta_2
        beta_1_t = tf.pow(beta_1, t)
        beta_2_t = tf.pow(beta_2, t)
        rho_inf = 2. / (1. - beta_2) - 1.
        rho_t = rho_inf - 2. * t * beta_2_t / (1. - beta_2_t)
        r_t = tf.math.sqrt(
            tf.relu(rho_t - 4.) * (rho_t - 2.) * rho_inf /
            (tf.relu(rho_inf - 4.) * (rho_inf - 2.) * rho_t))
        flag = tf.cast(rho_t > 4., tf.float32)

        ms = [tf.zeros(tf.int_shape(p)) for p in params]
        vs = [tf.zeros(tf.int_shape(p)) for p in params]

        self.weights = [self.iterations] + ms + vs
        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = beta_1 * m + (1. - beta_1) * g
            v_t = beta_2 * v + (1. - beta_2) * tf.square(g)

            m_hat_t = m_t / (1. - beta_1_t)
            v_hat_t = K.sqrt(v_t / (1. - beta_2_t))
            new_p = p - lr * (r_t /
                              (v_hat_t + self.epsilon) + flag - 1.) * m_hat_t

            if getattr(p, "constraint", None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(tf.update(p, new_p))
            self.updates.append(tf.update(m, m_t))
            self.updates.append(tf.update(v, v_t))
        return self.updates
Ejemplo n.º 5
0
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = tf.cast(
        tf.shape(yolo_outputs[0])[1:3] * 32, tf.dtype(y_true[0]))
    grid_shapes = [
        tf.cast(tf.shape(yolo_outputs[l])[1:3], tf.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = tf.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = tf.cast(m, tf.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = tf.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = tf.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                             input_shape[::-1])
        raw_true_wh = tf.switch(
            object_mask, raw_true_wh,
            tf.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(tf.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = tf.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = tf.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = tf.control_flow_ops.while_loop(
            lambda b, *args: b < m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = tf.expand_dims(ignore_mask, -1)

        # tf.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * tf.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * tf.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                     from_logits=True) * ignore_mask
        class_loss = object_mask * tf.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = tf.sum(xy_loss) / mf
        wh_loss = tf.sum(wh_loss) / mf
        confidence_loss = tf.sum(confidence_loss) / mf
        class_loss = tf.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                tf.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss