def calculateGpu(self, gtPosition, predPosition): pShape = K.shape(gtPosition) inputDim = K.ndim(gtPosition) gtPosition = K.reshape(gtPosition, (-1, pShape[-1])) predPosition = K.reshape(predPosition, (-1, pShape[-1])) left = K.maximum(predPosition[:, 0], gtPosition[:, 0]) top = K.maximum(predPosition[:, 1], gtPosition[:, 1]) right = K.minimum(predPosition[:, 2], gtPosition[:, 2]) bottom = K.minimum(predPosition[:, 3], gtPosition[:, 3]) intersect = (right - left) * ((right - left) > 0) * (bottom - top) * ((bottom - top) > 0) label_area = K.abs(gtPosition[:, 2] - gtPosition[:, 0]) * K.abs(gtPosition[:, 3] - gtPosition[:, 1]) predict_area = K.abs(predPosition[:, 2] - predPosition[:, 0]) * K.abs(predPosition[:, 3] - predPosition[:, 1]) union = label_area + predict_area - intersect iou = intersect / union #iouShape = K.concatenate([pShape[:-1], (1, )]) iou = THT.reshape(iou, (pShape[0], pShape[1], 1), ndim=inputDim) return iou
def box_iou(b1, b2): '''Return iou tensor Parameters ---------- b1: tensor, shape=(i1,...,iN, 4), xywh b2: tensor, shape=(j, 4), xywh Returns ------- iou: tensor, shape=(i1,...,iN, j) ''' # Expand dim to apply broadcasting. b1 = K.expand_dims(b1, -2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh/2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # Expand dim to apply broadcasting. b2 = K.expand_dims(b2, 0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh/2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * ( confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def clip_relu(x): y = K.maximum(x, 0) return K.minimum(y, 1)
def clamp_minus_one_plus_one(x): return K.minimum( +1, K.maximum(x, -1) ) # as opposed to min/max, minimum/maximum is element-wise operations
def step(self, a, states): r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2*self.nb_layers] e_tm1 = states[2*self.nb_layers:3*self.nb_layers] if self.extrap_start_time is not None: t = states[-1] a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual c = [] r = [] e = [] for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat # compute errors e_up = self.error_activation(ahat - a) e_down = self.error_activation(a - ahat) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if self.output_layer_num == l: if self.output_layer_type == 'A': output = a elif self.output_layer_type == 'Ahat': output = ahat elif self.output_layer_type == 'R': output = r[l] elif self.output_layer_type == 'E': output = e[l] if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer if self.output_layer_type is None: if self.output_mode == 'prediction': output = frame_prediction else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) if self.output_mode == 'error': output = all_error else: output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) states = r + c + e if self.extrap_start_time is not None: states += [frame_prediction, t + 1] return output, states
def call(self, x, mask=None): x = K.maximum(K.minimum(x, self.model_dims[1] - 1), 0) return K.gather(self.W, x)
def cross_entropy(self, y_true, y_pred): y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15) cross_entropy_loss = -K.sum(y_true * K.log(y_pred), axis=-1) return cross_entropy_loss
def custom_activation(self, x): if self.activation.split('-')[0] == "custom": a = float(self.activation.split('-')[1]) return 1.0 / (1 + K.exp(-a * x)) elif self.activation.split('-')[0] == "rounded": K.minimum(K.maximum(K.round(K.sigmoid(x)), 0), 1)
def keras_metric_loss(y_true, y_pred): x = y_true - y_pred return K.mean(huber_weight * K.minimum(K.maximum(2 * huber_delta * K.abs(x) - huber_delta ** 2, huber_delta ** 2), x ** 2) + K.relu(x) ** 2)
def call(self, x): min_x = K.minimum(x, self.value * K.ones_like(x)) return min_x
def classification_loss(self, y_true, y_pred): '''Classification loss metric''' (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = self.args num_anchors = len(self.anchors) object_scale = LAMBDA_OBJ no_object_scale = LAMBDA_NOOBJ class_scale = LAMBDA_CLASS coordinates_scale = LAMBDA_COORD pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, self.anchors, self.num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, self.num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES') # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE') # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION') # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if self.rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, self.num_classes) #matching_classes = tf.Print(matching_classes,[tf.shape(matching_classes)[3:]],message = 'MATCHING CLASSES') classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) classification_loss_sum = K.sum(classification_loss) return classification_loss_sum
def IoU(self, y_true, y_pred): '''IoU metric''' (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = self.args num_anchors = len(self.anchors) # pred_*.shape = (n_images,13,13,n_boxes,1 or 2) 1 = conf/class 2 = xy or wh pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, self.anchors, self.num_classes) #pred_xy = tf.Print(pred_xy,[tf.shape(pred_xy)[:]],message='PRED XY') # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, self.num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES') # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE') # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION') total_IoU = K.sum(object_detections) return total_IoU
def yoloss(y_true, y_pred): # calculate first the IOU tensors for the 2 boxes in each grid cell box1_pred = y_pred[..., 0:4] box2_pred = y_pred[..., 5:9] box_true = y_true[..., 0:4] # Box 1 x1b1 = K.maximum(box1_pred[..., 0] - 0.5 * box1_pred[..., 2], box_true[..., 0] - 0.5 * box_true[..., 2]) y1b1 = K.maximum(box1_pred[..., 1] - 0.5 * box1_pred[..., 3], box_true[..., 1] - 0.5 * box_true[..., 3]) x2b1 = K.minimum(box1_pred[..., 0] + 0.5 * box1_pred[..., 2], box_true[..., 0] + 0.5 * box_true[..., 2]) y2b1 = K.minimum(box1_pred[..., 1] + 0.5 * box1_pred[..., 3], box_true[..., 1] + 0.5 * box_true[..., 3]) intersection1 = K.maximum(x2b1 - x1b1, 0) * K.maximum(y2b1 - y1b1, 0) union1 = (box1_pred[..., 2] * box1_pred[..., 3] + box_true[..., 2] * box_true[..., 3] - intersection1 + K.epsilon()) iou1 = intersection1 / union1 iou1 = K.expand_dims(iou1) # Box 2 x1b2 = K.maximum(box2_pred[..., 0] - 0.5 * box2_pred[..., 2], box_true[..., 0] - 0.5 * box_true[..., 2]) y1b2 = K.maximum(box2_pred[..., 1] - 0.5 * box2_pred[..., 3], box_true[..., 1] - 0.5 * box_true[..., 3]) x2b2 = K.minimum(box2_pred[..., 0] + 0.5 * box2_pred[..., 2], box_true[..., 0] + 0.5 * box_true[..., 2]) y2b2 = K.minimum(box2_pred[..., 1] + 0.5 * box2_pred[..., 3], box_true[..., 1] + 0.5 * box_true[..., 3]) intersection2 = K.maximum(x2b2 - x1b2, 0) * K.maximum(y2b2 - y1b2, 0) union2 = (box2_pred[..., 2] * box2_pred[..., 3] + box_true[..., 2] * box_true[..., 3] - intersection2 + K.epsilon()) iou2 = intersection2 / union2 iou2 = K.expand_dims(iou2) # Get the maximum IOU --> which box is resposible for the prediction, plus the value of that IOU box_iou_max = K.expand_dims( K.cast(K.argmax(K.concatenate([iou1, iou2])), y_pred.dtype)) # shape = (None,S,S,1), casted to a float to be able to multiply float tensors in the following IOU_max = K.maximum(iou1, iou2) # Now build a revised version of y_true, y_pred, both containing only the box of maximum IOU, # and with c=max_iou for y_pred ytrue = K.concatenate([y_true[..., 0:4], IOU_max, y_true[..., 10:]]) ypred = K.concatenate([ y_pred[..., 0:5] * (1 - box_iou_max) + y_pred[..., 5:10] * box_iou_max, y_pred[..., 10:] ]) # The last needed tensor is the 1_i tensor = 1 if an object is in the grid cell One = K.max(y_true[..., 10:], axis=-1) # shape = (None,S,S) as it is mainly multiplied by particular elements of shape ytrue[...,i].shape # will use K.expand_dims for the last term of the loss where tensors have shape ytrue[...,i:j].shape # Finally it is time to build the loss function: loss = (l_coord * K.sum(One * (K.square(ypred[..., 0] - ytrue[..., 0]) + K.square(ypred[..., 1] - ytrue[..., 1]))) + l_coord * K.sum(One * (K.square(K.sqrt(ypred[..., 2]) - K.sqrt(ytrue[..., 2])) + K.square(K.sqrt(ypred[..., 3]) - K.sqrt(ytrue[..., 3])))) + K.sum(One * (K.square(ypred[..., 4] - ytrue[..., 4]))) + l_noobj * K.sum( (1. - One) * (K.square(ypred[..., 4] - ytrue[..., 4]))) + K.sum( K.expand_dims(One) * (K.square(ypred[..., 10:] - ytrue[..., 10:])))) return loss
def loss_yolo(y_pred, y_true): # pred_boxes = K.Reshape(y_pred[...,3:], (-1,7*7,B,5)) ** QUITAMOS B POR AHORA pred_boxes = K.reshape(y_pred[..., 3:], (-1, 7 * 7, 5)) #245 true_boxes = K.reshape(y_true[..., 3:], (-1, 7 * 7, 5)) #245 pred_boxes.shape true_boxes.shape # probabilidad de que haya un objeto y_pred_conf = pred_boxes[..., 4] y_true_conf = true_boxes[..., 4] y_pred_conf.shape y_true_conf.shape ### xy_loss-------------------------------------- y_pred_xy = pred_boxes[..., 0:2] y_true_xy = true_boxes[..., 0:2] y_pred_xy.shape y_true_xy.shape xy_loss = 5 * (K.sum( K.sum(K.square(y_true_xy - y_pred_xy), axis=-1) * y_true_conf, axis=-1)) ### wh_loss--------------------------------------- y_pred_wh = pred_boxes[..., 2:4] y_true_wh = true_boxes[..., 2:4] wh_loss = 5 * (K.sum( K.sum(K.square(tf.math.sqrt(y_true_wh) - tf.math.sqrt(y_pred_wh)), axis=-1) * y_true_conf, axis=-1)) ### class_loss---------------------------------- #y_pred_class = y_pred[...,0:3] #y_true_class = y_true[...,0:3] y_pred_class = K.reshape(y_pred[..., 0:3], (-1, 7 * 7, 3)) y_true_class = K.reshape(y_true[..., 0:3], (-1, 7 * 7, 3)) clss_loss = K.sum(K.sum(K.square(y_true_class - y_pred_class), axis=-1) * y_true_conf, axis=-1) ### Conf_loss-------------------------------------- #(***Creo que esto solo tiene sentido cuando tenemos mas de una prediccion por celda (B)!!!!!) #Calculo de intersection over union (iou) #Coordenadas (xy) superior izquierda e inferior derecha de las cajas predichas y reales x1y1_pred = y_pred_xy - (y_pred_wh / 2) x2y2_pred = y_pred_xy + (y_pred_wh / 2) x1y1_true = y_true_xy - (y_true_wh / 2) x2y2_true = y_true_xy + (y_true_wh / 2) #Coordenadas superior izquierda e inferior derecha del cuadrado de interseccion xi1 = K.maximum(x1y1_pred[..., 0], x1y1_true[..., 0]) yi1 = K.maximum(x1y1_pred[..., 1], x1y1_true[..., 1]) xi2 = K.minimum(x2y2_pred[..., 0], x2y2_true[..., 0]) yi2 = K.minimum(x2y2_pred[..., 1], x2y2_true[..., 1]) #Calculo de areas inter_area = (xi2 - xi1) * (yi2 - yi1) true_area = y_true_wh[..., 0] * y_true_wh[..., 1] pred_area = y_pred_wh[..., 0] * y_pred_wh[..., 1] union_area = pred_area + true_area - inter_area iou = inter_area / union_area # -> Calculo del Primer termino de conf_loss (penaliza predicciones incorrectas) conf_loss1 = K.sum(K.square(y_true_conf * iou - y_pred_conf) * y_true_conf, axis=-1) # -> Calculo del Segundo termino de conf_loss (penaliza predicciones cuando no hay en realidad objeto) ''' Creamos el tensor y_true_conf_op que es igual que y_true_conf pero intercambiando ceros por unos. Asi tenemos en cuenta las celdas donde no hay objetos y podemos calcular la funcion de perdida cuando y_pred_conf != 0 (debe ser cero en las celdas donde no hay objetos) ''' ones_tensor = tf.ones(tf.shape(y_true_conf), dtype='float64') y_true_conf_op = ones_tensor - y_true_conf conf_loss2 = 0.5 * (K.sum( K.square(y_true_conf * iou - y_pred_conf) * y_true_conf_op, axis=-1)) ### LOSS FUNCTION loss = clss_loss + xy_loss + wh_loss + conf_loss1 + conf_loss2 return loss
def step(self, a, states): r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2*self.nb_layers] e_tm1 = states[2*self.nb_layers:3*self.nb_layers] if self.extrap_start_time is not None: t = states[-1] a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual c = [] r = [] e = [] for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat # compute errors e_up = self.error_activation(ahat - a) e_down = self.error_activation(a - ahat) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer if self.output_mode == 'prediction': output = frame_prediction else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) if self.output_mode == 'error': output = all_error else: output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) states = r + c + e if self.extrap_start_time is not None: states += [frame_prediction, t + 1] return output, states