Ejemplo n.º 1
0
    def policy_loss_with_metrics(self, Adv, A=None):
        """

        This method constructs the policy loss as a scalar-valued Tensor,
        together with a dictionary of metrics (also scalars).

        This method may be overridden to construct a custom policy loss and/or
        to change the accompanying metrics.

        Parameters
        ----------
        Adv : 1d Tensor, shape: [batch_size]

            A batch of advantages.

        A : nd Tensor, shape: [batch_size, ...]

            A batch of actions taken under the behavior policy. For some
            choices of policy loss, e.g. ``update_strategy='sac'`` this input
            is ignored.

        Returns
        -------
        loss, metrics : (Tensor, dict of Tensors)

            The policy loss along with some metrics, which is a dict of type
            ``{name <str>: metric <Tensor>}``. The loss and each of the metrics
            (dict values) are scalar Tensors, i.e. Tensors with ``ndim=0``.

            The ``loss`` is passed to a keras Model using
            ``train_model.add_loss(loss)``. Similarly, each metric in the
            metric dict is passed to the model using
            ``train_model.add_metric(metric, name=name, aggregation='mean')``.


        """
        if K.ndim(Adv) == 2:
            check_tensor(Adv, axis_size=1, axis=1)
            Adv = K.squeeze(Adv, axis=1)
        check_tensor(Adv, ndim=1)

        if self.update_strategy == 'vanilla':
            assert A is not None

            log_pi = self.dist.log_proba(A)
            check_tensor(log_pi, same_as=Adv)

            entropy = K.mean(self.dist.entropy())

            # flip sign to get loss from objective
            loss = -K.mean(Adv * log_pi) + self.entropy_beta * entropy

            # no metrics related to behavior_dist since its not used in loss
            metrics = {'policy/entropy': entropy}

        elif self.update_strategy == 'ppo':
            assert A is not None

            log_pi = self.dist.log_proba(A)
            log_pi_old = K.stop_gradient(self.target_dist.log_proba(A))
            check_tensor(log_pi, same_as=Adv)
            check_tensor(log_pi_old, same_as=Adv)

            eps = self.ppo_clip_eps
            ratio = K.exp(log_pi - log_pi_old)
            ratio_clip = K.clip(ratio, 1 - eps, 1 + eps)
            check_tensor(ratio, same_as=Adv)
            check_tensor(ratio_clip, same_as=Adv)

            clip_objective = K.mean(K.minimum(Adv * ratio, Adv * ratio_clip))
            entropy = K.mean(self.dist.entropy())
            kl_div = K.mean(self.target_dist.kl_divergence(self.dist))

            # flip sign to get loss from objective
            loss = -(clip_objective + self.entropy_beta * entropy)
            metrics = {'policy/entropy': entropy, 'policy/kl_div': kl_div}

        elif self.update_strategy == 'sac':
            self.logger.debug("using update_strategy 'sac'")
            loss = -K.mean(Adv)
            metrics = {'policy/entropy': K.mean(self.dist.entropy())}

        elif self.update_strategy == 'cross_entropy':
            raise NotImplementedError('cross_entropy')

        else:
            raise ValueError(
                "unknown update_strategy '{}'".format(self.update_strategy))

        # rename
        check_tensor(loss, ndim=0)
        loss = tf.identity(loss, name='policy/loss')

        return loss, metrics
Ejemplo n.º 2
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Ejemplo n.º 3
0
def box_diou(b_true, b_pred, use_ciou=False):
    """
    Calculate DIoU/CIoU loss on anchor boxes
    Reference Paper:
        "Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression"
        https://arxiv.org/abs/1911.08287

    Parameters
    ----------
    b_true: GT boxes tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
    b_pred: predict boxes tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
    use_ciou: bool flag to indicate whether to use CIoU loss type

    Returns
    -------
    diou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
    """
    b_true_xy = b_true[..., :2]
    b_true_wh = b_true[..., 2:4]
    b_true_wh_half = b_true_wh / 2.
    b_true_mins = b_true_xy - b_true_wh_half
    b_true_maxes = b_true_xy + b_true_wh_half

    b_pred_xy = b_pred[..., :2]
    b_pred_wh = b_pred[..., 2:4]
    b_pred_wh_half = b_pred_wh / 2.
    b_pred_mins = b_pred_xy - b_pred_wh_half
    b_pred_maxes = b_pred_xy + b_pred_wh_half

    intersect_mins = K.maximum(b_true_mins, b_pred_mins)
    intersect_maxes = K.minimum(b_true_maxes, b_pred_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b_true_area = b_true_wh[..., 0] * b_true_wh[..., 1]
    b_pred_area = b_pred_wh[..., 0] * b_pred_wh[..., 1]
    union_area = b_true_area + b_pred_area - intersect_area
    # calculate IoU, add epsilon in denominator to avoid dividing by 0
    iou = intersect_area / (union_area + K.epsilon())

    # box center distance
    center_distance = K.sum(K.square(b_true_xy - b_pred_xy), axis=-1)
    # get enclosed area
    enclose_mins = K.minimum(b_true_mins, b_pred_mins)
    enclose_maxes = K.maximum(b_true_maxes, b_pred_maxes)
    enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
    # get enclosed diagonal distance
    enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
    # calculate DIoU, add epsilon in denominator to avoid dividing by 0
    diou = iou - 1.0 * (center_distance) / (enclose_diagonal + K.epsilon())

    if use_ciou:
        # calculate param v and alpha to extend to CIoU
        v = 4 * K.square(
            tf.math.atan2(b_true_wh[..., 0], b_true_wh[..., 1]) -
            tf.math.atan2(b_pred_wh[..., 0], b_pred_wh[..., 1])) / (math.pi *
                                                                    math.pi)

        # a trick: here we add an non-gradient coefficient w^2+h^2 to v to customize it's back-propagate,
        #          to match related description for equation (12) in original paper
        #
        #
        #          v'/w' = (8/pi^2) * (arctan(wgt/hgt) - arctan(w/h)) * (h/(w^2+h^2))          (12)
        #          v'/h' = -(8/pi^2) * (arctan(wgt/hgt) - arctan(w/h)) * (w/(w^2+h^2))
        #
        #          The dominator w^2+h^2 is usually a small value for the cases
        #          h and w ranging in [0; 1], which is likely to yield gradient
        #          explosion. And thus in our implementation, the dominator
        #          w^2+h^2 is simply removed for stable convergence, by which
        #          the step size 1/(w^2+h^2) is replaced by 1 and the gradient direction
        #          is still consistent with Eqn. (12).
        v = v * tf.stop_gradient(b_pred_wh[..., 0] * b_pred_wh[..., 0] +
                                 b_pred_wh[..., 1] * b_pred_wh[..., 1])

        alpha = v / (1.0 - iou + v)
        diou = diou - alpha * v

    diou = K.expand_dims(diou, -1)
    return diou
Ejemplo n.º 4
0
def cyclical_mae_rad(y_true, y_pred):
    return K.mean(K.minimum(
        K.abs(y_pred - y_true),
        K.minimum(K.abs(y_pred - y_true + 2 * np.pi),
                  K.abs(y_pred - y_true - 2 * np.pi))),
                  axis=-1)
Ejemplo n.º 5
0
def sigmoid_iou_loss(y_true, y_pred):
    return 1 - K.sum(K.minimum(y_true, y_pred)) / K.sum(
        K.maximum(y_true, y_pred))
Ejemplo n.º 6
0
def ignore_background_loss(y_true, y_pred):
    # y_true = maximum(y_true, epsilon())
    dont_cares = minimum(1.0, y_true)
    return sum(abs(y_pred - y_true) * dont_cares) / sum(dont_cares)
Ejemplo n.º 7
0
def yolov2_loss(detector_mask, matching_true_boxes, class_one_hot, true_boxes_grid, y_pred, info=False):
	"""
	Calculate YOLO V2 loss from prediction (y_pred) and ground truth tensors (detector_mask,
	matching_true_boxes, class_one_hot, true_boxes_grid,)

	Parameters
	----------
	- detector_mask : tensor, shape (batch, size, GRID_W, GRID_H, anchors_count, 1)
		1 if bounding box detected by grid cell, else 0
	- matching_true_boxes : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, 5)
		Contains adjusted coords of bounding box in YOLO format
	- class_one_hot : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, class_count)
		One hot representation of bounding box label
	- true_boxes_grid : annotations : tensor (shape : batch_size, max annot, 5)
		true_boxes_grid format : x, y, w, h, c (coords unit : grid cell)
	- y_pred : prediction from model. tensor (shape : batch_size, GRID_W, GRID_H, anchors count, (5 + labels count)
	- info : boolean. True to get some infox about loss value
	
	Returns
	-------
	- loss : scalar
	- sub_loss : sub loss list : coords loss, class loss and conf loss : scalar

	"""

	# anchors tensor
	anchors = np.array(ANCHORS)
	anchors = anchors.reshape(len(anchors)//2, 2)

	# grid coords tensor ---> GRID_W * GRID*H grid
	# tf.tile(input, multiples, name=None)
	# left up corner coord , total GRID_W * GRID*H * anchor_count
	coord_x = tf.cast(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)), tf.float32)
	coord_y = tf.transpose(coord_x, (0,2,1,3,4))
	coords = tf.tile(tf.concat([coord_x, coord_y], -1), [y_pred.shape[0], 1, 1, 5, 1])

	# coordinate loss
	# box regression
	# bx = (sigmoid(tx) + cx ) /W
	# bw = pw * e^tw
	# pw is anchors W, cx is left up of coord , tx and tw are pred offset value, W is feature map width
	# in this case, we don't multipy width, because the the coord in matching value also is during 0~16
	pred_xy = K.sigmoid(y_pred[:,:,:,:,0:2]) # adjust center coords between 0 and 1
	pred_xy = (pred_xy + coords) # add cell coord for comparaison with ground truth. New coords in grid cell unit
	pred_wh = K.exp(y_pred[:,:,:,:,2:4]) * anchors # adjust width and height for comparaison with ground truth. New coords in grid cell unit
	# pred_wh = (pred_wh * anchors) # unit: grid cell
	nb_detector_mask = K.sum(tf.cast(detector_mask>0.0, tf.float32))
	xy_loss = LAMBDA_COORD*K.sum(detector_mask*K.square(matching_true_boxes[...,:2] - pred_xy))/(nb_detector_mask + 1e-6) # Non /2
	wh_loss = LAMBDA_COORD * K.sum(detector_mask * K.square(K.sqrt(matching_true_boxes[...,2:4])-
		K.sqrt(pred_wh))) / (nb_detector_mask + 1e-6)

	coord_loss = xy_loss + wh_loss

	# class loss
	pred_box_class = y_pred[...,5:]
	true_box_class = tf.argmax(class_one_hot, -1)
	# class_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
	class_loss = K.sparse_categorical_crossentropy(target=true_box_class, output=pred_box_class, from_logits=True)
	class_loss = K.expand_dims(class_loss, -1)*detector_mask
	class_loss = LAMBDA_CLASS * K.sum(class_loss) / (nb_detector_mask + 1e-6)

	# confidence loss
	pred_conf = K.sigmoid(y_pred[..., 4:5]) # only two class : object or background
	# for each detector : iou between prediction and ground truth
	x1 = matching_true_boxes[...,0]
	y1 = matching_true_boxes[...,1]
	w1 = matching_true_boxes[...,2]
	h1 = matching_true_boxes[...,3]
	x2 = pred_xy[...,0]
	y2 = pred_xy[...,1]
	w2 = pred_wh[...,0]
	h2 = pred_wh[...,1]
	ious = iou(x1, y1, w1, h1, x2, y2, w2, h2)
	ious = K.expand_dims(ious, -1)

	# for each detector: best ious between pred and true_boxes
	pred_xy = K.expand_dims(pred_xy, 4)
	pred_wh = K.expand_dims(pred_wh, 4)
	pred_wh_half = pred_wh / 2.
	pred_mins = pred_xy - pred_wh_half
	pred_maxes = pred_xy + pred_wh_half
	true_boxe_shape = K.int_shape(true_boxes_grid)
	true_boxes_grid = K.reshape(true_boxes_grid, [true_boxe_shape[0], 1, 1, 1, true_boxe_shape[1], true_boxe_shape[2]])
	true_xy = true_boxes_grid[...,0:2]
	true_wh = true_boxes_grid[...,2:4]
	true_wh_half = true_wh * 0.5
	true_mins = true_xy - true_wh_half
	true_maxes = true_xy + true_wh_half
	intersect_mins = K.maximum(pred_mins, true_mins) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2 
	intersect_maxes = K.minimum(pred_maxes, true_maxes) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2
	intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	pred_areas = pred_wh[..., 0] * pred_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, 1, 1
	true_areas = true_wh[..., 0] * true_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	union_areas = pred_areas + true_areas - intersect_areas
	iou_scores = intersect_areas / union_areas # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
	best_ious = K.expand_dims(best_ious) # shape : m, GRID_W, GRID_H, BOX, 1
	
	# no object confidence loss
	no_object_detection = K.cast(best_ious < 0.6, K.dtype(best_ious)) 
	noobj_mask = no_object_detection * (1 - detector_mask)
	nb_noobj_mask  = K.sum(tf.cast(noobj_mask  > 0.0, tf.float32))
	
	noobject_loss =  LAMBDA_NOOBJECT * K.sum(noobj_mask * K.square(-pred_conf)) / (nb_noobj_mask + 1e-6)
	# object confidence loss
	object_loss = LAMBDA_OBJECT * K.sum(detector_mask * K.square(ious - pred_conf)) / (nb_detector_mask + 1e-6)
	# total confidence loss
	conf_loss = noobject_loss + object_loss
	
	# total loss
	loss = conf_loss + class_loss + coord_loss
	sub_loss = [conf_loss, class_loss, coord_loss] 

	if info:
		print('conf_loss   : {:.4f}'.format(conf_loss))
		print('class_loss  : {:.4f}'.format(class_loss))
		print('coord_loss  : {:.4f}'.format(coord_loss))
		print('    xy_loss : {:.4f}'.format(xy_loss))
		print('    wh_loss : {:.4f}'.format(wh_loss))
		print('--------------------')
		print('total loss  : {:.4f}'.format(loss)) 

		# display masks for each anchors
		for i in range(len(anchors)):
			f, (ax1, ax2, ax3) = plt.subplot(1,3,figsize=(10,5))
			# https://blog.csdn.net/Strive_For_Future/article/details/115052014?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522161883865316780262527067%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=161883865316780262527067&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_v2~rank_v29-2-115052014.first_rank_v2_pc_rank_v29&utm_term=f.tight_layout&spm=1018.2226.3001.4187
			f.tight_layout() 
			f.suptitle("MASKS FOR ANCHOR {} :".format(anchors[i,...]))

			ax1.matshow((K.sum(detector_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax1.set_title('detector_mask, count : {}'.format(K.sum(tf.cast(detector_mask[0,:,:,i]  > 0., tf.int32))))
			ax1.xaxis.set_ticks_position('bottom')
			
			ax2.matshow((K.sum(no_object_detection[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax2.set_title('no_object_detection mask')
			ax2.xaxis.set_ticks_position('bottom')
			
			ax3.matshow((K.sum(noobj_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax3.set_title('noobj_mask')
			ax3.xaxis.set_ticks_position('bottom')
			  
	return loss, sub_loss
Ejemplo n.º 8
0
def exploss(y_true, y_pred):
    return K.maximum(K.minimum(K.exp(-y_true * y_pred), 1e3), 1e-6)
Ejemplo n.º 9
0
def q_loss(y_true, y_pred):
    y_true = denormalize(y_true, 0, N)
    y_pred = denormalize(y_pred, 0, N)

    return K.maximum(y_true, y_pred) / K.minimum(y_true, y_pred)
Ejemplo n.º 10
0
 def mrelu(x):
     return K.minimum(K.maximum(1 - x, 0), K.maximum(1 + x, 0))
    def call(self, inputs, mask=None, **kwargs):
        if isinstance(inputs, list):
            inputs, positions = inputs
            positions = K.cast(positions, 'int32')
            mask = mask[1]
        else:
            positions = None

        input_len = K.shape(inputs)[1]

        if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            e = self._call_additive_emission(inputs)
        elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            e = self._call_multiplicative_emission(inputs)

        if self.attention_activation is not None:
            e = self.attention_activation(e)
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        if self.attention_width is not None:
            ones = tf.ones((input_len, input_len))
            if self.history_only:
                local = tf.linalg.band_part(
                    ones,
                    K.minimum(input_len, self.attention_width - 1),
                    0,
                )
            else:
                local = tf.linalg.band_part(
                    ones,
                    K.minimum(input_len, self.attention_width // 2),
                    K.minimum(input_len, (self.attention_width - 1) // 2),
                )
            e = e * K.expand_dims(local, 0)
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            mask = K.expand_dims(mask)
            e = K.permute_dimensions(
                K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1))

        # a_{t} = \text{softmax}(e_t)
        s = K.sum(e, axis=-1)
        s = K.tile(K.expand_dims(s, axis=-1), K.stack([1, 1, input_len]))
        a = e / (s + K.epsilon())

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, inputs)
        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(a))

        if positions is not None:
            pos_num = K.shape(positions)[1]
            batch_indices = K.tile(
                K.expand_dims(K.arange(K.shape(inputs)[0]), axis=-1),
                K.stack([1, pos_num]))
            pos_indices = K.stack([batch_indices, positions], axis=-1)
            v = tf.gather_nd(v, pos_indices)
            a = tf.gather_nd(a, pos_indices)

        if self.return_attention:
            return [v, a]
        return v
Ejemplo n.º 12
0
def detection_loss(mask, boxes, one_hot, grid, y_pred):
    anchors = config.anchors
    size = config.image_size // config.scale
    anchors = anchors.reshape(len(anchors) // 2, 2)

    coord_x = tf.cast(
        tf.reshape(tf.tile(tf.range(size), [size]), (1, size, size, 1, 1)),
        tf.float32)
    coord_y = tf.transpose(coord_x, (0, 2, 1, 3, 4))
    coords = tf.tile(
        tf.concat([coord_x, coord_y], -1),
        [y_pred.shape[0], 1, 1, len(anchors), 1])

    pred_xy = backend.sigmoid(y_pred[:, :, :, :, 0:2])
    pred_xy = (pred_xy + coords)
    pred_wh = backend.exp(y_pred[:, :, :, :, 2:4]) * anchors
    nb_mask = backend.sum(tf.cast(mask > 0.0, tf.float32))
    xy_loss = backend.sum(
        mask * backend.square(boxes[..., :2] - pred_xy)) / (nb_mask + 1e-6)
    wh_loss = backend.sum(
        mask *
        backend.square(backend.sqrt(boxes[..., 2:4]) -
                       backend.sqrt(pred_wh))) / (nb_mask + 1e-6)
    coord_loss = xy_loss + wh_loss

    pred_box_class = y_pred[..., 5:]
    true_box_class = tf.argmax(one_hot, -1)
    class_loss = backend.sparse_categorical_crossentropy(
        true_box_class, pred_box_class, True)
    class_loss = backend.expand_dims(class_loss, -1) * mask
    class_loss = backend.sum(class_loss) / (nb_mask + 1e-6)

    pred_conf = backend.sigmoid(y_pred[..., 4:5])
    x1 = boxes[..., 0]
    y1 = boxes[..., 1]
    w1 = boxes[..., 2]
    h1 = boxes[..., 3]
    x2 = pred_xy[..., 0]
    y2 = pred_xy[..., 1]
    w2 = pred_wh[..., 0]
    h2 = pred_wh[..., 1]

    x_min_1 = x1 - 0.5 * w1
    x_max_1 = x1 + 0.5 * w1
    y_min_1 = y1 - 0.5 * h1
    y_max_1 = y1 + 0.5 * h1
    x_min_2 = x2 - 0.5 * w2
    x_max_2 = x2 + 0.5 * w2
    y_min_2 = y2 - 0.5 * h2
    y_max_2 = y2 + 0.5 * h2
    intersection_x = backend.minimum(x_max_1, x_max_2) - backend.maximum(
        x_min_1, x_min_2)
    intersection_y = backend.minimum(y_max_1, y_max_2) - backend.maximum(
        y_min_1, y_min_2)
    intersection = intersection_x * intersection_y
    union = w1 * h1 + w2 * h2 - intersection
    iou = intersection / (union + 1e-6)
    iou = backend.expand_dims(iou, -1)

    pred_xy = backend.expand_dims(pred_xy, 4)
    pred_wh = backend.expand_dims(pred_wh, 4)
    pred_wh_half = pred_wh / 2.
    pred_min = pred_xy - pred_wh_half
    pred_max = pred_xy + pred_wh_half
    true_boxes_shape = backend.int_shape(grid)
    grid = backend.reshape(grid, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = grid[..., 0:2]
    true_wh = grid[..., 2:4]
    true_wh_half = true_wh * 0.5
    true_min = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half
    intersection_min = backend.maximum(pred_min, true_min)
    intersection_max = backend.minimum(pred_max, true_maxes)
    intersect_wh = backend.maximum(intersection_max - intersection_min, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]
    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas
    best_iou = backend.max(iou_scores, axis=4)
    best_iou = backend.expand_dims(best_iou)

    no_object_detection = backend.cast(best_iou < 0.6, backend.dtype(best_iou))
    no_obj_mask = no_object_detection * (1 - mask)
    nb_no_obj_mask = backend.sum(backend.cast(no_obj_mask > 0.0, 'float32'))

    no_object_loss = backend.sum(
        no_obj_mask * backend.square(-pred_conf)) / (nb_no_obj_mask + 1e-6)
    object_loss = backend.sum(
        mask * backend.square(iou - pred_conf)) / (nb_mask + 1e-6)
    conf_loss = no_object_loss + object_loss
    loss = conf_loss + class_loss + coord_loss
    return loss
Ejemplo n.º 13
0
def ramp(y_true, y_pred):
    beta = 1.0
    return K.mean(K.minimum(1., K.maximum(0., 1. - beta * y_true * y_pred)),
                  axis=-1)
Ejemplo n.º 14
0
def surrogate_loss(r, adv, prob, clip, c2):
    return -K.mean(
        K.minimum(r * adv,
                  K.clip(r, min_value=1 - clip, max_value=1 + clip) * adv) +
        c2 * -(prob * K.log(prob + 1e-10)))
Ejemplo n.º 15
0
 def _cross_entropy(self, y_true, y_pred):
     y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15)
     cross_entropy_loss = -K.sum(y_true * K.log(y_pred), axis=-1)
     return cross_entropy_loss
Ejemplo n.º 16
0
    def call(self, a, **kwargs):
        states = kwargs['initial_state']
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2 * self.nb_layers]
        e_tm1 = states[2 * self.nb_layers:3 * self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(
                t >= self.t_extrap, states[-2], a
            )  # if past self.extrap_start_time, the previous prediction will be treated as the actual
        set_trace()
        c = []
        r = []
        e = []

        # Update R units starting from the top
        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        # Update feedforward path starting from the bottom
        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if self.output_layer_num == l:
                if self.output_layer_type == 'A':
                    output = a
                elif self.output_layer_type == 'Ahat':
                    output = ahat
                elif self.output_layer_type == 'R':
                    output = r[l]
                elif self.output_layer_type == 'E':
                    output = e[l]

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_layer_type is None:
            if self.output_mode == 'prediction':
                output = frame_prediction
            else:
                for l in range(self.nb_layers):
                    layer_error = K.mean(K.batch_flatten(e[l]),
                                         axis=-1,
                                         keepdims=True)
                    all_error = layer_error if l == 0 else K.concatenate(
                        (all_error, layer_error), axis=-1)
                if self.output_mode == 'error':
                    output = all_error
                else:
                    output = K.concatenate(
                        (K.batch_flatten(frame_prediction), all_error),
                        axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]

        set_trace()

        return output, states
Ejemplo n.º 17
0
def yolo2_loss(args,
               anchors,
               num_classes,
               label_smoothing=0,
               use_crossentropy_loss=False,
               use_crossentropy_obj_loss=False,
               rescore_confidence=False):
    """YOLOv2 loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    y_true : array
        output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6]

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.


    Returns
    -------
    total_loss : float
        total mean YOLOv2 loss across minibatch
    """
    (yolo_output, true_boxes, y_true) = args
    num_anchors = len(anchors)
    yolo_output_shape = K.shape(yolo_output)
    input_shape = yolo_output_shape[1:3] * 32
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    object_mask = y_true[..., 4:5]
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo2_head(
        yolo_output, anchors, num_classes, input_shape)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo2_head.
    batch_size = yolo_output_shape[0]  # batch size, tensor
    batch_size_f = K.cast(batch_size, K.dtype(yolo_output))

    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - object_mask))
    if use_crossentropy_obj_loss:
        no_objects_loss = no_object_weights * K.binary_crossentropy(
            K.zeros(K.shape(pred_confidence)),
            pred_confidence,
            from_logits=False)

        if rescore_confidence:
            objects_loss = (object_scale * object_mask * K.binary_crossentropy(
                best_ious, pred_confidence, from_logits=False))
        else:
            objects_loss = (
                object_scale * object_mask *
                K.binary_crossentropy(K.ones(K.shape(pred_confidence)),
                                      pred_confidence,
                                      from_logits=False))
    else:
        no_objects_loss = no_object_weights * K.square(-pred_confidence)

        if rescore_confidence:
            objects_loss = (object_scale * object_mask *
                            K.square(best_ious - pred_confidence))
        else:
            objects_loss = (object_scale * object_mask *
                            K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLOv2 does not use categorical cross-entropy loss.
    #       Here we try it.
    matching_classes = K.cast(y_true[..., 5], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)

    if label_smoothing:
        matching_classes = _smooth_labels(matching_classes, label_smoothing)

    if use_crossentropy_loss:
        classification_loss = (
            class_scale * object_mask *
            K.expand_dims(K.categorical_crossentropy(
                matching_classes, pred_class_prob, from_logits=False),
                          axis=-1))
    else:
        classification_loss = (class_scale * object_mask *
                               K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = y_true[..., 0:4]
    coordinates_loss = (coordinates_scale * object_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss) / batch_size_f
    classification_loss_sum = K.sum(classification_loss) / batch_size_f
    coordinates_loss_sum = K.sum(coordinates_loss) / batch_size_f
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)

    # Fit for tf 2.0.0 loss shape
    total_loss = K.expand_dims(total_loss, axis=-1)

    return total_loss, coordinates_loss_sum, confidence_loss_sum, classification_loss_sum
def clipped_relu(inputs):
    return get(
        Lambda(lambda y: K.minimum(K.maximum(y, 0), 20),
               name='clipped_relu'))(inputs)
Ejemplo n.º 19
0
def vertebrae_classification_rate(y_true, y_pred):
    # y_true = K.maximum(y_true, K.epsilon())
    dont_cares = minimum(1.0, y_true)
    return sum(cast(equal(round(y_pred), y_true), 'float32') *
               dont_cares) / sum(dont_cares)
Ejemplo n.º 20
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)

        # first update the number of iterations
        self.updates = [K.update_add(self.iterations, 1)]

        if self.decay_epochs:
            ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs))
            hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs))

            #print(hit_decay_epoch)
            lr = K.switch(hit_decay_epoch, self.lr['all'] * self.decay['all'],
                          self.lr['all'])

            #K.print_tensor(self.lr['all'])
            #a = K.switch(hit_decay_epoch,
            #             K.print_tensor(self.lr['all'],message='Decays:'),
            #             K.print_tensor(self.lr['all'],message=' '))

            self.updates.append(K.update(self.lr['all'], lr))

        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(s) for s in shapes]
        self.weights = [self.iterations] + moments
        #print(self.weights)

        for p, g, m in zip(params, grads, moments):
            #print("HEREEEE:", p.name, g, m)
            lrptrkey = set_pattern_find(p.name, self.lr.keys())
            if lrptrkey:
                if self.verbose > 0:
                    print("Setting different learning rate for ", p.name,
                          " : ", K.eval(self.lr[lrptrkey]))
                lr = self.lr[lrptrkey]
                dcptrkey = set_pattern_find(p.name, self.decay.keys())
                if self.decay_epochs and dcptrkey:
                    lr = K.switch(hit_decay_epoch,
                                  self.lr[lrptrkey] * self.decay[dcptrkey],
                                  self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey], lr))
                    if self.verbose > 0:
                        print("Added decay to ", p.name, ": ", K.eval(lr), ",",
                              self.decay[dcptrkey])
                elif self.decay_epochs:
                    lr = K.switch(hit_decay_epoch,
                                  self.lr[lrptrkey] * self.decay['all'],
                                  self.lr[lrptrkey])
                    self.updates.append(K.update(self.lr[lrptrkey], lr))
                    if self.verbose > 0:
                        print("Added decay to ", p.name, ": ", K.eval(lr), ",",
                              self.decay['all'])
                else:
                    lr = self.lr[lrptrkey]

            else:
                lr = self.lr['all']

            momptrkey = set_pattern_find(p.name, self.momentum.keys())
            if momptrkey:
                if self.verbose > 0:
                    print("Setting different momentum for ", p.name, " , ",
                          K.eval(self.momentum[momptrkey]))
                momentum = self.momentum[momptrkey]
            else:
                momentum = self.momentum['all']

            v = momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + momentum * (momentum * m - lr * g) - lr * g
            else:
                new_p = p + momentum * m - lr * g

            # CHANGE CLIP
            _to_tensor = K.tensorflow_backend._to_tensor
            _clip_by_val = K.tf.clip_by_value
            margin = K.mean(K.abs(p * K.constant(self.UPCLIP)))
            min_value = _to_tensor(p - margin, p.dtype.base_dtype)
            max_value = _to_tensor(p + margin, p.dtype.base_dtype)

            max_v = K.maximum(min_value, max_value)
            min_v = K.minimum(min_value, max_value)

            new_p = _clip_by_val(new_p, min_v, max_v)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            clptrkey = set_pattern_find(p.name, self.clips.keys())
            if self.clips_val and clptrkey:
                if self.verbose > 0:
                    print("Clipping variable", p.name, " to ",
                          self.clips[clptrkey])
                c = K.eval(self.clips[clptrkey])
                new_p = K.clip(new_p, c[0], c[1])
            #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum))
            self.updates.append(K.update(p, new_p))
        return self.updates
Ejemplo n.º 21
0
 def loss(y_true, y_pred):
     prob = y_true * y_pred
     old_prob = y_true * old_prediction
     r = prob/(old_prob + 1e-10)
     return -K.mean(K.minimum(r * advantage, K.clip(r, min_value=1 - self.loss_clipping, max_value=1 + self.loss_clipping) * advantage) + self.entropy_loss * -(prob * K.log(prob + 1e-10)))
Ejemplo n.º 22
0
 def __call__(self, x):
     reg_0 = math_ops.reduce_sum(math_ops.square(x))
     reg_1 = math_ops.reduce_sum(math_ops.square(x - 1))
     return self.k * K.minimum(reg_0, reg_1)
Ejemplo n.º 23
0
def clipped_relu(inp):
    relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20))(inp)
    return relu
Ejemplo n.º 24
0
def filter_detections(boxes,
                      classification,
                      other=[],
                      class_specific_filter=True,
                      nms=True,
                      score_threshold=0.05,
                      max_detections=300,
                      nms_threshold=0.5):
    """Filter detections using the boxes and classification values.

    Args:
        boxes (numpy.array): Tensor of shape ``(num_boxes, 4)`` containing the
            boxes in ``(x1, y1, x2, y2)`` format.
        classification (numpy.array): Tensor of shape
            ``(num_boxes, num_classes)`` containing the classification scores.
        other (list): List of tensors of shape ``(num_boxes, ...)`` to filter
            along with the boxes and classification scores.
        class_specific_filter (bool): Whether to perform filtering per class,
            or take the best scoring class and filter those.
        nms (bool): Whether to enable non maximum suppression.
        score_threshold (float): Threshold used to prefilter the boxes with.
        max_detections (int): Maximum number of detections to keep.
        nms_threshold (float): Threshold for the IoU value to determine when a
            box should be suppressed.

    Returns:
        list: A list of [``boxes, scores, labels, other[0], other[1], ...]``.
        ``boxes`` is shaped ``(max_detections, 4)`` and contains the
        ``(x1, y1, x2, y2)`` of the non-suppressed boxes.
        ``scores`` is shaped ``(max_detections,)`` and contains the scores
        of the predicted class.
        ``labels`` is shaped ``(max_detections,)`` and contains the
        predicted label.
        ``other[i]`` is shaped ``(max_detections, ...)`` and contains the
        filtered ``other[i]`` data.
        In case there are less than ``max_detections`` detections,
        the tensors are padded with -1's.
    """
    def _filter_detections(scores, labels):
        # threshold based on score
        indices = tf.where(K.greater(scores, score_threshold))

        if nms:
            filtered_boxes = tf.gather_nd(boxes, indices)
            filtered_scores = K.gather(scores, indices)[:, 0]

            # perform NMS
            nms_indices = tf.image.non_max_suppression(
                filtered_boxes,
                filtered_scores,
                max_output_size=max_detections,
                iou_threshold=nms_threshold)

            # filter indices based on NMS
            indices = K.gather(indices, nms_indices)

        # add indices to list of all indices
        labels = tf.gather_nd(labels, indices)
        indices = K.stack([indices[:, 0], labels], axis=1)

        return indices

    if class_specific_filter:
        all_indices = []
        # perform per class filtering
        for c in range(K.int_shape(classification)[1]):
            scores = classification[:, c]
            labels = c * tf.ones((K.shape(scores)[0],), dtype='int64')
            all_indices.append(_filter_detections(scores, labels))

        # concatenate indices to single tensor
        indices = K.concatenate(all_indices, axis=0)
    else:
        scores = K.max(classification, axis=1)
        labels = K.argmax(classification, axis=1)
        indices = _filter_detections(scores, labels)

    # select top k
    scores = tf.gather_nd(classification, indices)
    labels = indices[:, 1]
    scores, top_indices = tf.nn.top_k(
        scores, k=K.minimum(max_detections, K.shape(scores)[0]))

    # filter input using the final set of indices
    indices = K.gather(indices[:, 0], top_indices)
    boxes = K.gather(boxes, indices)
    labels = K.gather(labels, top_indices)
    other_ = [K.gather(o, indices) for o in other]

    # zero pad the outputs
    pad_size = K.maximum(0, max_detections - K.shape(scores)[0])
    boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
    scores = tf.pad(scores, [[0, pad_size]], constant_values=-1)
    labels = tf.pad(labels, [[0, pad_size]], constant_values=-1)
    labels = K.cast(labels, 'int32')
    pads = lambda x: [[0, pad_size]] + [[0, 0] for _ in range(1, K.ndim(x))]
    other_ = [tf.pad(o, pads(o), constant_values=-1) for o in other_]

    # set shapes, since we know what they are
    boxes.set_shape([max_detections, 4])
    scores.set_shape([max_detections])
    labels.set_shape([max_detections])
    for o, s in zip(other_, [list(K.int_shape(o)) for o in other]):
        o.set_shape([max_detections] + s[1:])

    return [boxes, scores, labels] + other_
Ejemplo n.º 25
0
def box_ciou(b1, b2):
    """
    输入为:
    ----------
    b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh
    b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh

    返回为:
    -------
    ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1)
    """
    #-----------------------------------------------------------#
    #   求出预测框左上角右下角
    #   b1_mins     (batch, feat_w, feat_h, anchor_num, 2)
    #   b1_maxes    (batch, feat_w, feat_h, anchor_num, 2)
    #-----------------------------------------------------------#
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh / 2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half
    #-----------------------------------------------------------#
    #   求出真实框左上角右下角
    #   b2_mins     (batch, feat_w, feat_h, anchor_num, 2)
    #   b2_maxes    (batch, feat_w, feat_h, anchor_num, 2)
    #-----------------------------------------------------------#
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh / 2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half

    #-----------------------------------------------------------#
    #   求真实框和预测框所有的iou
    #   iou         (batch, feat_w, feat_h, anchor_num)
    #-----------------------------------------------------------#
    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    union_area = b1_area + b2_area - intersect_area
    iou = intersect_area / K.maximum(union_area, K.epsilon())

    #-----------------------------------------------------------#
    #   计算中心的差距
    #   center_distance (batch, feat_w, feat_h, anchor_num)
    #-----------------------------------------------------------#
    center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1)
    enclose_mins = K.minimum(b1_mins, b2_mins)
    enclose_maxes = K.maximum(b1_maxes, b2_maxes)
    enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0)
    #-----------------------------------------------------------#
    #   计算对角线距离
    #   enclose_diagonal (batch, feat_w, feat_h, anchor_num)
    #-----------------------------------------------------------#
    enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1)
    ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal,
                                                     K.epsilon())

    v = 4 * K.square(
        tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1], K.epsilon())) -
        tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1], K.epsilon()))
    ) / (math.pi * math.pi)
    alpha = v / K.maximum((1.0 - iou + v), K.epsilon())
    ciou = ciou - alpha * v

    ciou = K.expand_dims(ciou, -1)
    ciou = tf.where(tf.math.is_nan(ciou), tf.zeros_like(ciou), ciou)
    return ciou
Ejemplo n.º 26
0
def mymask(y_true):
    return K.minimum(y_true+1., 1.)
Ejemplo n.º 27
0
def bbox_ciou(bboxes1, bboxes2):
    """
    Complete IoU
    @param bboxes1: (a, b, ..., 4)
    @param bboxes2: (A, B, ..., 4)
        x:X is 1:n or n:n or n:1

    @return (max(a,A), max(b,B), ...)

    ex) (4,):(3,4) -> (3,)
        (2,1,4):(2,3,4) -> (2,3)
    """
    xy1 = bboxes1[..., :2]
    wh_h1 = bboxes1[..., 2:4] * 0.5
    xy2 = bboxes2[..., :2]
    wh_h2 = bboxes2[..., 2:4] * 0.5

    bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3]
    bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3]

    lu1 = xy1 - wh_h1
    rd1 = xy1 + wh_h1
    lu2 = xy2 - wh_h2
    rd2 = xy2 + wh_h2

    left_up = K.maximum(lu1, lu2)
    right_down = K.minimum(rd1, rd2)

    inter_section = K.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]

    union_area = bboxes1_area + bboxes2_area - inter_area

    iou = inter_area / (union_area + K.epsilon())

    enclose_left_up = K.minimum(lu1, lu2)
    enclose_right_down = K.maximum(rd1, rd2)

    enclose_section = enclose_right_down - enclose_left_up

    c_2 = K.pow(enclose_section[..., 0], 2) + K.pow(enclose_section[..., 1], 2)

    center_diagonal = xy2 - xy1

    rho_2 = K.pow(center_diagonal[..., 0], 2) + K.pow(center_diagonal[..., 1],
                                                      2)

    diou = iou - rho_2 / (c_2 + K.epsilon())

    v = K.pow(
        (tf.math.atan(bboxes1[..., 2] / (bboxes1[..., 3] + K.epsilon())) -
         tf.math.atan(bboxes2[..., 2] / (bboxes2[..., 3] + K.epsilon()))) *
        0.636619772,  # 2/pi
        2,
    )

    alpha = v / (1 - iou + v + K.epsilon())

    ciou = diou - alpha * v

    return ciou, iou
Ejemplo n.º 28
0
 def on_batch_end(self, epoch, logs=None):
     logs = logs or {}
     self._step_num += 1
     lrate = self._model_dim ** -.5 * K.minimum(self._step_num ** -.5, self._step_num * self._warmup_steps ** -1.5)
     K.set_value(self.model.optimizer.lr, lrate)
Ejemplo n.º 29
0
 def clipped_relu(self, inputs):
     relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20), name=f'clipped_relu_{self.clipped_relu_count}')(inputs)
     self.clipped_relu_count += 1
     return relu
    def deep_dream():
        """
        DeepDream is an artistic image-modification technique that uses the representations learned by convnets. First
        released by Google in the summer of 2015, this algorithm is very similar to the gradient ascent technique we
        viewed earlier to represent the patterns learned by individual filters during training (Chapter 5). There are a
        few differences to the algorithm:
            -> With DeepDream you try to maximise the activation of the entire layer rather than one specific filter,
               thus mixing together visualisations of a larger number of filters.
            -> You start not from a blank, slightly noisy input, but rather from an existing image - thus the resulting
               effects latch on to preexisting visual patterns, distorting elements of the image in a somewhat artistic
               fashion.
            -> The input images are processed at different scales (called octaves), which improves the quality of the
               visualisations.
               
               
        This function does not work due to version issues.

        :return: None
        """

        # You won't be training a model for this application, so let's disable all training functionality before
        # starting
        K.set_learning_phase(0)

        model = inception_v3.InceptionV3(weights='imagenet', include_top=False)

        # In Chapter 5 we use the loss value to maximise the output of a specific filter. This time we'll attempt to
        # maximise the weighted sum of the L2 norm of the activations of a set of high-level layers. The set of layers
        # chosen will have a massive impact on the resulting modifications to the image, so make these params very
        # easily configurable.
        layers_contributions = {
            'mixed2': 0.2,
            'mixed3': 3.0,
            'mixed4': 2.0,
            'mixed5': 1.5
        }
        layer_dict = dict([(layer.name, layer) for layer in model.layers])

        # You'll define the loss by adding layer contributions to this scalar value.
        loss = K.variable(0.0)
        for layer_name in layers_contributions:
            coeff = layers_contributions[layer_name]
            # Retrieve the layer's output.
            activation = layer_dict[layer_name].output

            # Define the scaling factor and add the L2 norm of the features of a layer to the loss. You avoid boarder
            # artifacts by involving non-boarder pixels in the loss.
            scaling = K.prod(K.cast(K.shape(activation), 'float32'))
            loss = loss + coeff * K.sum(K.square(
                activation[:, 2:-2, 2:-2, :])) / scaling

        # Now we can set up the gradient ascent process.
        dream = model.input

        # Compute gradient of the dream w.r.t to the loss, then NORMALISE!!!
        grads = K.gradients(loss, dream)[0]
        grads /= K.minimum(K.mean(K.abs(grads)), 1e-7)

        # Now set up a Keras function to retrieve the value of the loss and gradients given an input image.
        outputs = [loss, grads]
        fetch_loss_and_grads = K.function([dream], outputs)

        def eval_loss_and_grads(x):
            """
            This function is used to call the fetch_loss_and_grads function and package the outputs in an easy to use
            fashion.

            :param x: Input dream
            :return: The loss and the gradient of the layer w.r.t. the dream.
            """
            outs = fetch_loss_and_grads([x])
            loss_value = outs[0]
            grads_value = outs[1]
            return loss_value, grads_value

        def gradient_ascent(x, iterations, step, max_loss=None):
            """
            This function runs gradient ascent for a number of iterations.

            :param x: Input dream
            :param iterations: Number of iterations to run gradient ascent for
            :param step: Step-size of the gradient ascent
            :param max_loss: Maximum loss we'll accept during the gradient ascent before stopping.
            :return: A modified version of the input dream
            """
            for i in range(iterations):
                loss_value, grads_value = eval_loss_and_grads(x)
                if max_loss is not None and loss_value > max_loss:
                    break
                print(f"...Loss value at {i}: {loss_value}")
                x += step * grads_value
            return x

        # Now we can begin programming the DeepDream algorithm itself. First we need to define a set of scales
        # (called octaves) at which to process the image. Each octave is 40% larger than the last. At each scale (from
        # smallest to largest) you run gradient ascent to maximise the loss you previously defined. To prevent artifacts
        # of up-scaling (blurriness and stretching) we'll re-inject the lost back into the image, which is possible
        # because you know what the original image should look like at a larger scale.
        step = 0.01
        num_octave = 3
        octave_scale = 1.4
        iterations = 20

        max_loss = 10.0
        base_image_path = 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\European_Landscape.jpg'
        print("Loading Base Image...")

        # Load the base image into Numpy array.
        img = preprocess_image_inception(base_image_path)
        print(f"Image Preprocessed: {img.dtype} of size: {img.shape}")

        # Prepare a list of shape tuples defining the different scales at which to run gradient ascent.
        original_shape = img.shape[1:3]
        successive_shapes = [original_shape]
        for i in range(1, num_octave):
            shape = tuple(
                [int(dim / (octave_scale**i)) for dim in original_shape])
            successive_shapes.append(shape)

        # Reverse the list so that they run in ascending order.
        successive_shapes = successive_shapes[::-1]

        # Resize the Numpy array of the image to the smallest size.
        original_img = np.copy(img)
        shrunk_original_image = resize_img(original_img, successive_shapes[0])

        # Run deep dream over all octaves.
        for shape in successive_shapes:
            print(f"Processing Image shape: {shape}")

            # Scales up the deep dream image
            img = resize_img(img, shape)

            # Run gradient ascent, altering the dream.
            img = gradient_ascent(img,
                                  iterations=iterations,
                                  step=step,
                                  max_loss=max_loss)

            # Scales up the smaller version of the original image: it will be pixellated. Compute the high-quality
            # version of the original image at this size. The difference between the two is the detail lost in
            # up-scaling.
            upscaled_shrunk_original_img = resize_img(shrunk_original_image,
                                                      shape)
            same_size_original = resize_img(original_img, shape)
            lost_detail = same_size_original - upscaled_shrunk_original_img

            # Re-inject the lost detail back into the dream. Grab the shrunk_original_image and save the dream at this
            # octave
            img += lost_detail
            shrunk_original_image = resize_img(original_img, shape)
            save_img(
                img,
                fname=
                'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\dream_at_scale_'
                + str(shape) + '.png')

        # Save the final dream.
        save_img(
            img,
            fname=
            'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\Final_Dream.png'
        )