Esempio n. 1
0
 def calculateGpu(self, gtPosition, predPosition):
     pShape = K.shape(gtPosition)
     inputDim = K.ndim(gtPosition)
     gtPosition = K.reshape(gtPosition, (-1, pShape[-1]))
     predPosition = K.reshape(predPosition, (-1, pShape[-1]))
     left = K.maximum(predPosition[:, 0], gtPosition[:, 0])
     top = K.maximum(predPosition[:, 1], gtPosition[:, 1])
     right = K.minimum(predPosition[:, 2], gtPosition[:, 2])
     bottom = K.minimum(predPosition[:, 3], gtPosition[:, 3])
     intersect = (right - left) * ((right - left) > 0) * (bottom - top) * ((bottom - top) > 0)
     label_area = K.abs(gtPosition[:, 2] - gtPosition[:, 0]) * K.abs(gtPosition[:, 3] - gtPosition[:, 1])
     predict_area = K.abs(predPosition[:, 2] - predPosition[:, 0]) * K.abs(predPosition[:, 3] - predPosition[:, 1])
     union = label_area + predict_area - intersect
     iou = intersect / union
     #iouShape = K.concatenate([pShape[:-1], (1, )])
     iou = THT.reshape(iou, (pShape[0], pShape[1], 1), ndim=inputDim)
             
     return iou
Esempio n. 2
0
def box_iou(b1, b2):
    '''Return iou tensor

    Parameters
    ----------
    b1: tensor, shape=(i1,...,iN, 4), xywh
    b2: tensor, shape=(j, 4), xywh

    Returns
    -------
    iou: tensor, shape=(i1,...,iN, j)

    '''

    # Expand dim to apply broadcasting.
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half

    # Expand dim to apply broadcasting.
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)

    return iou
Esempio n. 3
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Esempio n. 4
0
File: myutils.py Progetto: ghif/drcn
def clip_relu(x):
	y = K.maximum(x, 0)
	return K.minimum(y, 1)
Esempio n. 5
0
def clamp_minus_one_plus_one(x):
    return K.minimum( +1, K.maximum(x, -1) )  # as opposed to min/max, minimum/maximum is element-wise operations
Esempio n. 6
0
    def step(self, a, states):
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2*self.nb_layers]
        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual

        c = []
        r = []
        e = []

        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if self.output_layer_num == l:
                if self.output_layer_type == 'A':
                    output = a
                elif self.output_layer_type == 'Ahat':
                    output = ahat
                elif self.output_layer_type == 'R':
                    output = r[l]
                elif self.output_layer_type == 'E':
                    output = e[l]

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_layer_type is None:
            if self.output_mode == 'prediction':
                output = frame_prediction
            else:
                for l in range(self.nb_layers):
                    layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
                    all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
                if self.output_mode == 'error':
                    output = all_error
                else:
                    output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]
        return output, states
 def call(self, x, mask=None):
     x = K.maximum(K.minimum(x, self.model_dims[1] - 1), 0)
     return K.gather(self.W, x)
 def cross_entropy(self, y_true, y_pred):
     y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15)
     cross_entropy_loss = -K.sum(y_true * K.log(y_pred), axis=-1)
     return cross_entropy_loss
 def custom_activation(self, x):
     if self.activation.split('-')[0] == "custom":
         a = float(self.activation.split('-')[1])
         return 1.0 / (1 + K.exp(-a * x))
     elif self.activation.split('-')[0] == "rounded":
         K.minimum(K.maximum(K.round(K.sigmoid(x)), 0), 1)
Esempio n. 10
0
 def keras_metric_loss(y_true, y_pred):
     x = y_true - y_pred
     return K.mean(huber_weight * K.minimum(K.maximum(2 * huber_delta * K.abs(x) - huber_delta ** 2, huber_delta ** 2), x ** 2) + K.relu(x) ** 2)
Esempio n. 11
0
 def call(self, x):
     min_x = K.minimum(x, self.value * K.ones_like(x))
     return min_x
Esempio n. 12
0
    def classification_loss(self, y_true, y_pred):
        '''Classification loss metric'''
        (yolo_output, true_boxes, detectors_mask,
         matching_true_boxes) = self.args

        num_anchors = len(self.anchors)
        object_scale = LAMBDA_OBJ
        no_object_scale = LAMBDA_NOOBJ
        class_scale = LAMBDA_CLASS
        coordinates_scale = LAMBDA_COORD
        pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
            yolo_output, self.anchors, self.num_classes)

        # Unadjusted box predictions for loss.
        # TODO: Remove extra computation shared with yolo_head.
        yolo_output_shape = K.shape(yolo_output)
        feats = K.reshape(yolo_output, [
            -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
            self.num_classes + 5
        ])
        pred_boxes = K.concatenate(
            (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

        # TODO: Adjust predictions by image width/height for non-square images?
        # IOUs may be off due to different aspect ratio.

        # Expand pred x,y,w,h to allow comparison with ground truth.
        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        pred_xy = K.expand_dims(pred_xy, 4)
        pred_wh = K.expand_dims(pred_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        true_boxes_shape = K.shape(true_boxes)

        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        true_boxes = K.reshape(true_boxes, [
            true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1],
            true_boxes_shape[2]
        ])
        true_xy = true_boxes[..., 0:2]
        true_wh = true_boxes[..., 2:4]

        # Find IOU of each predicted box with each ground truth box.
        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        intersect_mins = K.maximum(pred_mins, true_mins)
        intersect_maxes = K.minimum(pred_maxes, true_maxes)
        intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
        true_areas = true_wh[..., 0] * true_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = intersect_areas / union_areas

        #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES')

        # Best IOUs for each location.
        best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
        best_ious = K.expand_dims(best_ious)

        #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE')

        # A detector has found an object if IOU > thresh for some true box.
        object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
        #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION')

        # TODO: Darknet region training includes extra coordinate loss for early
        # training steps to encourage predictions to match anchor priors.

        # Determine confidence weights from object and no_object weights.
        # NOTE: YOLO does not use binary cross-entropy here.
        no_object_weights = (no_object_scale * (1 - object_detections) *
                             (1 - detectors_mask))
        no_objects_loss = no_object_weights * K.square(-pred_confidence)

        if self.rescore_confidence:
            objects_loss = (object_scale * detectors_mask *
                            K.square(best_ious - pred_confidence))
        else:
            objects_loss = (object_scale * detectors_mask *
                            K.square(1 - pred_confidence))
        confidence_loss = objects_loss + no_objects_loss

        # Classification loss for matching detections.
        # NOTE: YOLO does not use categorical cross-entropy loss here.
        matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
        matching_classes = K.one_hot(matching_classes, self.num_classes)
        #matching_classes = tf.Print(matching_classes,[tf.shape(matching_classes)[3:]],message = 'MATCHING CLASSES')

        classification_loss = (class_scale * detectors_mask *
                               K.square(matching_classes - pred_class_prob))

        classification_loss_sum = K.sum(classification_loss)

        return classification_loss_sum
Esempio n. 13
0
    def IoU(self, y_true, y_pred):
        '''IoU metric'''
        (yolo_output, true_boxes, detectors_mask,
         matching_true_boxes) = self.args

        num_anchors = len(self.anchors)

        # pred_*.shape = (n_images,13,13,n_boxes,1 or 2) 1 = conf/class 2 = xy or wh
        pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
            yolo_output, self.anchors, self.num_classes)

        #pred_xy = tf.Print(pred_xy,[tf.shape(pred_xy)[:]],message='PRED XY')

        # Unadjusted box predictions for loss.
        # TODO: Remove extra computation shared with yolo_head.
        yolo_output_shape = K.shape(yolo_output)
        feats = K.reshape(yolo_output, [
            -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
            self.num_classes + 5
        ])
        pred_boxes = K.concatenate(
            (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

        # TODO: Adjust predictions by image width/height for non-square images?
        # IOUs may be off due to different aspect ratio.

        # Expand pred x,y,w,h to allow comparison with ground truth.
        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        pred_xy = K.expand_dims(pred_xy, 4)
        pred_wh = K.expand_dims(pred_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        true_boxes_shape = K.shape(true_boxes)

        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        true_boxes = K.reshape(true_boxes, [
            true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1],
            true_boxes_shape[2]
        ])
        true_xy = true_boxes[..., 0:2]
        true_wh = true_boxes[..., 2:4]

        # Find IOU of each predicted box with each ground truth box.
        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        intersect_mins = K.maximum(pred_mins, true_mins)
        intersect_maxes = K.minimum(pred_maxes, true_maxes)
        intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
        true_areas = true_wh[..., 0] * true_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = intersect_areas / union_areas
        #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES')

        # Best IOUs for each location.
        best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
        best_ious = K.expand_dims(best_ious)

        #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE')

        # A detector has found an object if IOU > thresh for some true box.
        object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
        #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION')

        total_IoU = K.sum(object_detections)

        return total_IoU
Esempio n. 14
0
def yoloss(y_true, y_pred):

    # calculate first the IOU tensors for the 2 boxes in each grid cell
    box1_pred = y_pred[..., 0:4]
    box2_pred = y_pred[..., 5:9]

    box_true = y_true[..., 0:4]

    # Box 1
    x1b1 = K.maximum(box1_pred[..., 0] - 0.5 * box1_pred[..., 2],
                     box_true[..., 0] - 0.5 * box_true[..., 2])
    y1b1 = K.maximum(box1_pred[..., 1] - 0.5 * box1_pred[..., 3],
                     box_true[..., 1] - 0.5 * box_true[..., 3])
    x2b1 = K.minimum(box1_pred[..., 0] + 0.5 * box1_pred[..., 2],
                     box_true[..., 0] + 0.5 * box_true[..., 2])
    y2b1 = K.minimum(box1_pred[..., 1] + 0.5 * box1_pred[..., 3],
                     box_true[..., 1] + 0.5 * box_true[..., 3])

    intersection1 = K.maximum(x2b1 - x1b1, 0) * K.maximum(y2b1 - y1b1, 0)
    union1 = (box1_pred[..., 2] * box1_pred[..., 3] +
              box_true[..., 2] * box_true[..., 3] - intersection1 +
              K.epsilon())
    iou1 = intersection1 / union1
    iou1 = K.expand_dims(iou1)

    # Box 2
    x1b2 = K.maximum(box2_pred[..., 0] - 0.5 * box2_pred[..., 2],
                     box_true[..., 0] - 0.5 * box_true[..., 2])
    y1b2 = K.maximum(box2_pred[..., 1] - 0.5 * box2_pred[..., 3],
                     box_true[..., 1] - 0.5 * box_true[..., 3])
    x2b2 = K.minimum(box2_pred[..., 0] + 0.5 * box2_pred[..., 2],
                     box_true[..., 0] + 0.5 * box_true[..., 2])
    y2b2 = K.minimum(box2_pred[..., 1] + 0.5 * box2_pred[..., 3],
                     box_true[..., 1] + 0.5 * box_true[..., 3])

    intersection2 = K.maximum(x2b2 - x1b2, 0) * K.maximum(y2b2 - y1b2, 0)
    union2 = (box2_pred[..., 2] * box2_pred[..., 3] +
              box_true[..., 2] * box_true[..., 3] - intersection2 +
              K.epsilon())
    iou2 = intersection2 / union2
    iou2 = K.expand_dims(iou2)

    # Get the maximum IOU --> which box is resposible for the prediction, plus the value of that IOU
    box_iou_max = K.expand_dims(
        K.cast(K.argmax(K.concatenate([iou1, iou2])), y_pred.dtype))
    # shape = (None,S,S,1), casted to a float to be able to multiply float tensors in the following

    IOU_max = K.maximum(iou1, iou2)

    # Now build a revised version of y_true, y_pred, both containing only the box of maximum IOU,
    # and with c=max_iou for y_pred

    ytrue = K.concatenate([y_true[..., 0:4], IOU_max, y_true[..., 10:]])

    ypred = K.concatenate([
        y_pred[..., 0:5] * (1 - box_iou_max) + y_pred[..., 5:10] * box_iou_max,
        y_pred[..., 10:]
    ])

    # The last needed tensor is the 1_i tensor = 1 if an object is in the grid cell
    One = K.max(y_true[..., 10:], axis=-1)
    # shape = (None,S,S) as it is mainly multiplied by particular elements of shape ytrue[...,i].shape
    # will use K.expand_dims for the last term of the loss where tensors have shape ytrue[...,i:j].shape

    # Finally it is time to build the loss function:

    loss = (l_coord * K.sum(One * (K.square(ypred[..., 0] - ytrue[..., 0]) +
                                   K.square(ypred[..., 1] - ytrue[..., 1]))) +
            l_coord *
            K.sum(One *
                  (K.square(K.sqrt(ypred[..., 2]) - K.sqrt(ytrue[..., 2])) +
                   K.square(K.sqrt(ypred[..., 3]) - K.sqrt(ytrue[..., 3])))) +
            K.sum(One *
                  (K.square(ypred[..., 4] - ytrue[..., 4]))) + l_noobj * K.sum(
                      (1. - One) *
                      (K.square(ypred[..., 4] - ytrue[..., 4]))) + K.sum(
                          K.expand_dims(One) *
                          (K.square(ypred[..., 10:] - ytrue[..., 10:]))))

    return loss
Esempio n. 15
0
def loss_yolo(y_pred, y_true):

    # pred_boxes = K.Reshape(y_pred[...,3:], (-1,7*7,B,5)) ** QUITAMOS B POR AHORA
    pred_boxes = K.reshape(y_pred[..., 3:], (-1, 7 * 7, 5))  #245
    true_boxes = K.reshape(y_true[..., 3:], (-1, 7 * 7, 5))  #245
    pred_boxes.shape
    true_boxes.shape

    # probabilidad de que haya un objeto
    y_pred_conf = pred_boxes[..., 4]
    y_true_conf = true_boxes[..., 4]
    y_pred_conf.shape
    y_true_conf.shape

    ### xy_loss--------------------------------------
    y_pred_xy = pred_boxes[..., 0:2]
    y_true_xy = true_boxes[..., 0:2]
    y_pred_xy.shape
    y_true_xy.shape

    xy_loss = 5 * (K.sum(
        K.sum(K.square(y_true_xy - y_pred_xy), axis=-1) * y_true_conf,
        axis=-1))

    ### wh_loss---------------------------------------
    y_pred_wh = pred_boxes[..., 2:4]
    y_true_wh = true_boxes[..., 2:4]

    wh_loss = 5 * (K.sum(
        K.sum(K.square(tf.math.sqrt(y_true_wh) - tf.math.sqrt(y_pred_wh)),
              axis=-1) * y_true_conf,
        axis=-1))

    ### class_loss----------------------------------
    #y_pred_class = y_pred[...,0:3]
    #y_true_class = y_true[...,0:3]
    y_pred_class = K.reshape(y_pred[..., 0:3], (-1, 7 * 7, 3))
    y_true_class = K.reshape(y_true[..., 0:3], (-1, 7 * 7, 3))

    clss_loss = K.sum(K.sum(K.square(y_true_class - y_pred_class), axis=-1) *
                      y_true_conf,
                      axis=-1)

    ### Conf_loss--------------------------------------
    #(***Creo que esto solo tiene sentido cuando tenemos mas de una prediccion por celda (B)!!!!!)

    #Calculo de intersection over union (iou)
    #Coordenadas (xy) superior izquierda e inferior derecha de las cajas predichas y reales
    x1y1_pred = y_pred_xy - (y_pred_wh / 2)
    x2y2_pred = y_pred_xy + (y_pred_wh / 2)
    x1y1_true = y_true_xy - (y_true_wh / 2)
    x2y2_true = y_true_xy + (y_true_wh / 2)
    #Coordenadas superior izquierda e inferior derecha del cuadrado de interseccion
    xi1 = K.maximum(x1y1_pred[..., 0], x1y1_true[..., 0])
    yi1 = K.maximum(x1y1_pred[..., 1], x1y1_true[..., 1])
    xi2 = K.minimum(x2y2_pred[..., 0], x2y2_true[..., 0])
    yi2 = K.minimum(x2y2_pred[..., 1], x2y2_true[..., 1])
    #Calculo de areas
    inter_area = (xi2 - xi1) * (yi2 - yi1)
    true_area = y_true_wh[..., 0] * y_true_wh[..., 1]
    pred_area = y_pred_wh[..., 0] * y_pred_wh[..., 1]
    union_area = pred_area + true_area - inter_area
    iou = inter_area / union_area

    # -> Calculo del Primer termino de conf_loss (penaliza predicciones incorrectas)
    conf_loss1 = K.sum(K.square(y_true_conf * iou - y_pred_conf) * y_true_conf,
                       axis=-1)

    # -> Calculo del Segundo termino de conf_loss  (penaliza predicciones cuando no hay en realidad objeto)
    '''
        Creamos el tensor y_true_conf_op que es igual que y_true_conf pero intercambiando 
        ceros por unos. Asi tenemos en cuenta las celdas donde no hay objetos y podemos calcular
        la funcion de perdida cuando y_pred_conf != 0  (debe ser cero en las celdas donde no 
        hay objetos)
    '''
    ones_tensor = tf.ones(tf.shape(y_true_conf), dtype='float64')
    y_true_conf_op = ones_tensor - y_true_conf
    conf_loss2 = 0.5 * (K.sum(
        K.square(y_true_conf * iou - y_pred_conf) * y_true_conf_op, axis=-1))

    ### LOSS FUNCTION
    loss = clss_loss + xy_loss + wh_loss + conf_loss1 + conf_loss2

    return loss
Esempio n. 16
0
    def step(self, a, states):
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2*self.nb_layers]
        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual

        c = []
        r = []
        e = []

        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_mode == 'prediction':
            output = frame_prediction
        else:
            for l in range(self.nb_layers):
                layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
                all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
            if self.output_mode == 'error':
                output = all_error
            else:
                output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]
        return output, states