def yolo_head(graph, feats, anchors, num_classes): with graph.as_default(): num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def get_lr_decay(self): lr = self.model.optimizer.lr if self.model.optimizer.initial_decay > 0: lr = lr * (1. / (1. + self.model.optimizer.decay * backend.cast( self.model.optimizer.iterations, backend.dtype(self.model.optimizer.decay)))) return backend.eval(lr), backend.eval(self.model.optimizer.decay)
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask
def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape): '''Get corrected boxes''' box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1] input_shape = K.cast(input_shape, K.dtype(box_yx)) image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape / image_shape)) offset = (input_shape - new_shape) / 2. / input_shape scale = input_shape / new_shape box_yx = (box_yx - offset) * scale box_hw *= scale box_mins = box_yx - (box_hw / 2.) box_maxes = box_yx + (box_hw / 2.) boxes = K.concatenate([ box_mins[..., 0:1], # y_min box_mins[..., 1:2], # x_min box_maxes[..., 0:1], # y_max box_maxes[..., 1:2] # x_max ]) # Scale boxes back to original image shape. boxes *= K.concatenate([image_shape, image_shape]) return boxes
def test_arange(self): for test_value in (-20, 0, 1, 10): t_a = KTF.arange(test_value) a = KTF.eval(t_a) assert np.array_equal(a, np.arange(test_value)) t_b = KTH.arange(test_value) b = KTH.eval(t_b) assert np.array_equal(b, np.arange(test_value)) assert np.array_equal(a, b) assert KTF.dtype(t_a) == KTH.dtype(t_b) for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): a = KTF.eval(KTF.arange(start, stop, step)) assert np.array_equal(a, np.arange(start, stop, step)) b = KTH.eval(KTH.arange(start, stop, step)) assert np.array_equal(b, np.arange(start, stop, step)) assert np.array_equal(a, b) for dtype in ('int32', 'int64', 'float32', 'float64'): for backend in (KTF, KTH): t = backend.arange(10, dtype=dtype) assert backend.dtype(t) == dtype
def test_arange(self): for test_value in (-20, 0, 1, 10): t_a = KTF.arange(test_value) a = KTF.eval(t_a) assert np.array_equal(a, np.arange(test_value)) t_b = KTH.arange(test_value) b = KTH.eval(t_b) assert np.array_equal(b, np.arange(test_value)) assert np.array_equal(a, b) assert KTF.dtype(t_a) == KTH.dtype(t_b) for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): a = KTF.eval(KTF.arange(start, stop, step)) assert np.array_equal(a, np.arange(start, stop, step)) b = KTH.eval(KTH.arange(start, stop, step)) assert np.array_equal(b, np.arange(start, stop, step)) assert np.array_equal(a, b) for dtype in ('int32', 'int64', 'float32', 'float64'): for backend in (KTF, KTH): t = backend.arange(10, dtype=dtype) assert backend.dtype(t) == dtype
def _preprocess_conv3d_input(x, data_format, force_transpose=False): """Transpose and cast the input before the conv3d. # Arguments x: input tensor. data_format: string, `"channels_last"` or `"channels_first"`. # Returns A tensor. """ # tensorflow doesn't support float64 for conv layer before 1.8.0 if (dtype(x) == 'float64' and (StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0'))): x = tf.cast(x, 'float32') tf_data_format = 'NDHWC' if data_format == 'channels_first': # ADDED: option force_transpose in following line if not _has_nchw_support() or force_transpose: # ADDED till here x = tf.transpose(x, (0, 2, 3, 4, 1)) else: tf_data_format = 'NCDHW' return x, tf_data_format
def yolo_loss(self, args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = self.yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch( object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / ( 1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # if a weight tensor (len > 1) use weight normalized parameterization # this is the only part changed w.r.t. keras.optimizers.Adam ps = K.get_variable_shape(p) if len(ps) > 1: # get weight normalization parameters V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads( p, g) # Adam containers for the 'g' parameter V_scaler_shape = K.get_variable_shape(V_scaler) m_g = K.zeros(V_scaler_shape) v_g = K.zeros(V_scaler_shape) # update g parameters m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g) new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon) self.updates.append(K.update(m_g, m_g_t)) self.updates.append(K.update(v_g, v_g_t)) # update V parameters m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V) new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) # if there are constraints we apply them to V, not W # if p in constraints: # c = constraints[p] # new_V_param = c(new_V_param) # wn param updates --> W updates add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) else: # do optimization normally m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints # if p in constraints: # c = constraints[p] # new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates