def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = tf.reshape(tf.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = tf.shape(feats)[1:3] # height, width grid_y = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = tf.tile( tf.reshape(tf.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = tf.concatenate([grid_x, grid_y]) grid = tf.cast(grid, tf.dtype(feats)) feats = tf.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (tf.sigmoid(feats[..., :2]) + grid) / tf.cast( grid_shape[::-1], tf.dtype(feats)) box_wh = tf.exp(feats[..., 2:4]) * anchors_tensor / tf.cast( input_shape[::-1], tf.dtype(feats)) box_confidence = tf.sigmoid(feats[..., 4:5]) box_class_probs = tf.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if backend.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None mean_tensor = tf.constant(-np.array(mean)) # Zero-center by mean pixel if tf.dtype(x) != tf.dtype(mean_tensor): x = tf.nn.bias_add(x, tf.cast(mean_tensor, tf.dtype(x)), data_format=data_format) else: x = tf.nn.bias_add(x, mean_tensor, data_format) if std is not None: x /= std return x
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = tf.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box))) return b + 1, ignore_mask
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [tf.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay: lr = lr * (1. / (1. + self.decay * tf.cast(self.iterations, tf.dtype(self.decay)))) t = tf.cast(self.iterations, tf.float32) + 1. beta_1 = self.beta_1 beta_2 = self.beta_2 beta_1_t = tf.pow(beta_1, t) beta_2_t = tf.pow(beta_2, t) rho_inf = 2. / (1. - beta_2) - 1. rho_t = rho_inf - 2. * t * beta_2_t / (1. - beta_2_t) r_t = tf.math.sqrt( tf.relu(rho_t - 4.) * (rho_t - 2.) * rho_inf / (tf.relu(rho_inf - 4.) * (rho_inf - 2.) * rho_t)) flag = tf.cast(rho_t > 4., tf.float32) ms = [tf.zeros(tf.int_shape(p)) for p in params] vs = [tf.zeros(tf.int_shape(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = beta_1 * m + (1. - beta_1) * g v_t = beta_2 * v + (1. - beta_2) * tf.square(g) m_hat_t = m_t / (1. - beta_1_t) v_hat_t = K.sqrt(v_t / (1. - beta_2_t)) new_p = p - lr * (r_t / (v_hat_t + self.epsilon) + flag - 1.) * m_hat_t if getattr(p, "constraint", None) is not None: new_p = p.constraint(new_p) self.updates.append(tf.update(p, new_p)) self.updates.append(tf.update(m, m_t)) self.updates.append(tf.update(v, v_t)) return self.updates
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = tf.cast( tf.shape(yolo_outputs[0])[1:3] * 32, tf.dtype(y_true[0])) grid_shapes = [ tf.cast(tf.shape(yolo_outputs[l])[1:3], tf.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = tf.shape(yolo_outputs[0])[0] # batch size, tensor mf = tf.cast(m, tf.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = tf.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = tf.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = tf.switch( object_mask, raw_true_wh, tf.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(tf.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = tf.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = tf.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, tf.cast(best_iou < ignore_thresh, tf.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = tf.expand_dims(ignore_mask, -1) # tf.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * tf.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * tf.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * tf.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * tf.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = tf.sum(xy_loss) / mf wh_loss = tf.sum(wh_loss) / mf confidence_loss = tf.sum(confidence_loss) / mf class_loss = tf.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, tf.sum(ignore_mask) ], message='loss: ') return loss