def box_diou(b1, b2): b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area iou = intersect_area / (union_area + K.epsilon()) center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1) enclose_mins = K.minimum(b1_mins, b2_mins) enclose_maxes = K.maximum(b1_maxes, b2_maxes) enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0) enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1) diou = iou - 1.0 * (center_distance) / (enclose_diagonal + K.epsilon()) diou = K.expand_dims(diou, -1) return diou
def path_energy0(y, x, U, mask=None): """Path energy without boundary potential handling.""" n_classes = K.shape(x)[2] y_one_hot = K.one_hot(y, n_classes) # Tag path energy energy = K.sum(x * y_one_hot, 2) energy = K.sum(energy, 1) # Transition energy y_t = y[:, :-1] y_tp1 = y[:, 1:] U_flat = K.reshape(U, [-1]) # Convert 2-dim indices (y_t, y_tp1) of U to 1-dim indices of U_flat: flat_indices = y_t * n_classes + y_tp1 U_y_t_tp1 = K.gather(U_flat, flat_indices) if mask is not None: mask = K.cast(mask, K.floatx()) y_t_mask = mask[:, :-1] y_tp1_mask = mask[:, 1:] U_y_t_tp1 *= y_t_mask * y_tp1_mask energy += K.sum(U_y_t_tp1, axis=1) return energy
def build(self, input_shape=None): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(ConcreteDropout, self).build( ) # this is very weird.. we must call super before we add new losses # initialise p self.p_logit = self.layer.add_weight(name='p_logit', shape=(1, ), initializer=RandomUniform( self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # initialise regulariser / prior KL term input_dim = np.prod(input_shape[1:]) # we drop only last dim weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum( K.square(weight)) / (1. - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) dropout_regularizer *= self.dropout_regularizer * input_dim regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def pearson_correlation_metric_fn( y_true: tf.Tensor, y_pred: tf.Tensor, ) -> tf.Tensor: """ Pearson correlation metric function. https://github.com/WenYanger/Keras_Metrics Args: y_true (tf.Tensor): y_true y_pred (tf.Tensor): y_pred Returns: tf.contrib.metrics: pearson correlation """ x = y_true y = y_pred mx = K.mean(x, axis=0) my = K.mean(y, axis=0) xm, ym = x - mx, y - my r_num = K.sum(xm * ym) x_square_sum = K.sum(xm * xm) y_square_sum = K.sum(ym * ym) r_den = K.sqrt(x_square_sum * y_square_sum) + 1e-12 r = r_num / r_den return K.mean(r)
def call(self, inputs, mask=None, **kwargs): """Core implemention of soft attention Args: inputs (object): input tensor. Returns: object: weighted sum of input tensors. """ attention = K.tanh(K.dot(inputs, self.W) + self.b) attention = K.dot(attention, self.q) attention = K.squeeze(attention, axis=2) if mask is None: attention = K.exp(attention) else: attention = K.exp(attention) * K.cast(mask, dtype="float32") attention_weight = attention / ( K.sum(attention, axis=-1, keepdims=True) + K.epsilon()) attention_weight = K.expand_dims(attention_weight) weighted_input = inputs * attention_weight return K.sum(weighted_input, axis=1)
def loss(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * K.log(y_pred) * weights loss = -K.sum(loss, -1) return loss
def __center_loss(y_true, y_pred, centers): y_true_value = K.argmax(y_true) loss = K.variable(0.0) for label in range(CONFIG["num_classes"]): center = centers[label] indices = tf.where(tf.equal(y_true_value, label)) pred_per_class = tf.gather_nd(y_pred, indices=indices) diff = tf.subtract(pred_per_class, center) square_diff = K.pow(diff, 2) sum = K.sum(K.sum(square_diff, axis=-1), axis=-1) loss = tf.add(loss, sum) return loss
def euclidean_distance(vects): ''' Computes the euclidean distances between vects[0] and vects[1] ''' x, y = vects return K.sqrt( K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = anchors_per_level # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile( tf.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1], name='yolo_head/tile/reshape/grid_y'), [1, grid_shape[1], 1, 1]) grid_x = K.tile( tf.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1], name='yolo_head/tile/reshape/grid_x'), [grid_shape[0], 1, 1, 1]) grid = tf.concat([grid_x, grid_y], axis=-1, name='yolo_head/concatenate/grid') grid = K.cast(grid, K.dtype(feats)) feats = tf.reshape(feats, [ -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5 + NUM_ANGLES3 ], name='yolo_head/reshape/feats') # Adjust predictions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:5 + num_classes]) polygons_confidence = K.sigmoid(feats[..., 5 + num_classes + 2:5 + num_classes + NUM_ANGLES3:3]) polygons_x = K.exp(feats[..., 5 + num_classes:num_classes + 5 + NUM_ANGLES3:3]) dx = K.square(anchors_tensor[..., 0:1] / 2) dy = K.square(anchors_tensor[..., 1:2] / 2) d = K.cast(K.sqrt(dx + dy), K.dtype(polygons_x)) a = K.pow(input_shape[..., ::-1], 2) a = K.cast(a, K.dtype(feats)) b = K.sum(a) diagonal = K.cast(K.sqrt(b), K.dtype(feats)) polygons_x = polygons_x * d / diagonal polygons_y = feats[..., 5 + num_classes + 1:num_classes + 5 + NUM_ANGLES3:3] polygons_y = K.sigmoid(polygons_y) if calc_loss == True: return grid, feats, box_xy, box_wh, polygons_confidence return box_xy, box_wh, box_confidence, box_class_probs, polygons_x, polygons_y, polygons_confidence
def loss_function_jaccard(y_true, y_pred, smooth=100): # """ Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) The jaccard distance loss is usefull for unbalanced datasets. This has been shifted so it converges on 0 and is smoothed to avoid exploding or disapearing gradient. Ref: https://en.wikipedia.org/wiki/Jaccard_index @url: https://gist.github.com/wassname/f1452b748efcbeb4cb9b1d059dce6f96 @author: wassname """ intersection = K.sum(K.abs(y_true * y_pred), axis=-1) sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) jac = (intersection + smooth) / (sum_ - intersection + smooth) return (1 - jac) * smooth
def get_GRU_components(inputs, states, weight): units = weight[0].shape[0] kernel = K.variable(weight[0]) # shape = (input_dim, self.units * 3) recurrent_kernel = K.variable( weight[1]) # shape = (self.units, self.units * 3) bias = K.variable(weight[2]) # bias_shape = (3 * self.units,) inputs = K.variable(inputs) # Not sure. h_tm1 = K.variable(states) # Previous memory state. # Update gate. kernel_z = kernel[:, :units] recurrent_kernel_z = recurrent_kernel[:, :units] input_bias_z = bias[:units] # Reset gate. kernel_r = kernel[:, units:units * 2] recurrent_kernel_r = recurrent_kernel[:, units:units * 2] input_bias_r = bias[units:units * 2] # New gate. kernel_h = kernel[:, units * 2:] recurrent_kernel_h = recurrent_kernel[:, units * 2:] input_bias_h = bias[units * 2:] x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z) x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) z = hard_sigmoid(x_z + recurrent_z) # Recurrent activation = 'hard_sigmoid'. r = hard_sigmoid(x_r + recurrent_r) recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) # Get split part of recurrent_h. split_recurrent_h = K.expand_dims(h_tm1, axis=-1) * recurrent_kernel_h r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) #print(recurrent_h.shape, h_tm1.shape, recurrent_kernel_h.shape, split_recurrent_h.shape) #print(K.get_value(recompute_recurrent_h)[0, :3], np.mean(K.get_value(recompute_recurrent_h))) #print(K.get_value(recurrent_h)[0, :3], np.mean(K.get_value(recurrent_h))) delta = np.mean( np.abs(K.get_value(recompute_recurrent_h) - K.get_value(recurrent_h))) print("delta =", delta, np.mean(K.get_value(recompute_recurrent_h)), np.mean(K.get_value(recurrent_h))) assert delta < 1e-6, "r gate is wrong." hh = tanh(x_h + recurrent_h) # Activation = 'tanh'. # Previous and candidate state mixed by update gate. h = z * h_tm1 + (1 - z) * hh return K.get_value(h_tm1), K.get_value(h), K.get_value(z), K.get_value( r), K.get_value(hh), K.get_value(x_h), K.get_value(split_recurrent_h)
def __softmax_cosine_loss(y_true, y_pred, centers): y_true_value = K.argmax(y_true) base_loss = K.variable(0.0) for label in range(CONFIG["num_classes"]): center = centers[label] indices = tf.where(tf.equal(y_true_value, label)) pred_per_class = tf.gather_nd(y_pred, indices=indices) distances_per_class = __cosine(pred_per_class, center) sum = K.sum(distances_per_class, axis=-1) base_loss = tf.add(base_loss, sum) base_distances = K.zeros_like(y_true_value, dtype='float32') for label in range(CONFIG["num_classes"]): center = centers[label] distances_with_all_classes = __cosine(y_pred, center) exp_distances = K.exp(-distances_with_all_classes) base_distances = tf.add(base_distances, exp_distances) log_distances = K.log(base_distances) sum_on_batch = K.sum(log_distances) return base_loss + sum_on_batch
def squared_distance(input_x, input_y=None, weight=None): """Calculates the pairwise distance between points in X and Y. Args: input_x: n x d matrix input_y: m x d matrix weight: affinity n x m -- if provided, we normalize the distance Returns: n x m matrix of all pairwise squared Euclidean distances """ if input_y is None: input_y = input_x sum_dimensions = list(range(2, K.ndim(input_x) + 1)) input_x = K.expand_dims(input_x, axis=1) if weight is not None: # if weight provided, we normalize input_x and input_y by weight d_diag = K.expand_dims(K.sqrt(K.sum(weight, axis=1)), axis=1) input_x /= d_diag input_y /= d_diag squared_difference = K.square(input_x - input_y) distance = K.sum(squared_difference, axis=sum_dimensions) return distance
def squared_distance(X, Y=None, W=None): ''' Calculates the pairwise distance between points in X and Y X: n x d matrix Y: m x d matrix W: affinity -- if provided, we normalize the distance returns: n x m matrix of all pairwise squared Euclidean distances ''' if Y is None: Y = X # distance = squaredDistance(X, Y) sum_dimensions = list(range(2, K.ndim(X) + 1)) X = K.expand_dims(X, axis=1) if W is not None: # if W provided, we normalize X and Y by W D_diag = K.expand_dims(K.sqrt(K.sum(W, axis=1)), axis=1) X /= D_diag Y /= D_diag squared_difference = K.square(X - Y) distance = K.sum(squared_difference, axis=sum_dimensions) return distance
def _build(self): self.encoder = self._build_encoder() self.stacked_lstm = self._build_stack_lstm() self.decoder = self._build_decoder() x = Input(shape=(self.historical_len, self.num_nodes, self.num_nodes)) h = TimeDistributed(self.encoder)(x) h = Reshape((self.historical_len, -1))(h) h = Lambda(lambda x: K.sum(x, axis=1))(h) h = Reshape((self.num_nodes, -1))(h) h = self.stacked_lstm(h) y = self.decoder(h) self.model = Model(inputs=x, outputs=y)
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim e = K.reshape( K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) # e = K.dot(x, self.W) if self.bias: e += self.b e = K.tanh(e) a = K.exp(e) # apply mask after the exp. will be re-normalized next if mask is not None: # cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) c = K.sum(a * x, axis=1) return c
def gru_with_r_gate(x, weight): h_tm1, inputs, z, x_h, split_recurrent_h = x[0], x[1], x[2], x[3], x[4] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_r = weight[:units, units:units * 2] recurrent_kernel_r = weight[units:units * 2, units:units * 2] input_bias_r = weight[units * 2, units:units * 2] # Change to 1 dim. x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) r_without_activate = x_r + recurrent_r r = hard_sigmoid(r_without_activate) #r = hard_sigmoid(x_r + recurrent_r) # Recompute recurrent_h by two parts. r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) hh = tanh(x_h + recompute_recurrent_h) h = z * h_tm1 + (1 - z) * hh #return h return r
def call(self, input): f = list() fn = list() c_dict = keypoint_connections() for i in range(21): f.append(self.conv2(input)) for i in range(21): c = list() c.append(f[i]) for j in c_dict[i]: c.append(f[j]) c = K.concatenate(tuple(c), axis=-1) h = self.conv2a[i](c) g = self.conv2b[i](h) l = K.dot(g, self.W[i]) print(l.shape) fn.append( K.sum(K.concatenate((f[i], l), axis=-1), axis=-1, keepdims=True)) return K.concatenate(tuple(fn), axis=-1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def recall_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def dice_coef(y_true, y_pred): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta * box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs - pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): """Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) """ num_layers = 1 yolo_outputs = args[:num_layers] y_true = args[num_layers:] g_y_true = y_true input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * grid_size_multiplier, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for layer in range(num_layers): object_mask = y_true[layer][..., 4:5] vertices_mask = y_true[layer][..., 5 + num_classes + 2:5 + num_classes + NUM_ANGLES3:3] true_class_probs = y_true[layer][..., 5:5 + num_classes] grid, raw_pred, pred_xy, pred_wh, pol_cnf = yolo_head( yolo_outputs[layer], anchors[anchor_mask[layer]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) raw_true_xy = y_true[layer][..., :2] * grid_shapes[layer][ ..., ::-1] - grid raw_true_polygon0 = y_true[layer][..., 5 + num_classes:5 + num_classes + NUM_ANGLES3] raw_true_wh = K.log(y_true[layer][..., 2:4] / anchors[anchor_mask[layer]] * input_shape[..., ::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf raw_true_polygon_x = raw_true_polygon0[..., ::3] raw_true_polygon_y = raw_true_polygon0[..., 1::3] dx = K.square(anchors[anchor_mask[layer]][..., 0:1] / 2) dy = K.square(anchors[anchor_mask[layer]][..., 1:2] / 2) d = K.cast(K.sqrt(dx + dy), K.dtype(raw_true_polygon_x)) diagonal = K.sqrt( K.pow(input_shape[..., ::-1][0], 2) + K.pow(input_shape[..., ::-1][1], 2)) raw_true_polygon_x = K.log(raw_true_polygon_x / d * diagonal) raw_true_polygon_x = K.switch(vertices_mask, raw_true_polygon_x, K.zeros_like(raw_true_polygon_x)) box_loss_scale = 2 - y_true[layer][..., 2:3] * y_true[layer][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[layer][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy( object_mask, raw_pred[..., 4:5], from_logits=True ) + (1 - object_mask) * K.binary_crossentropy( object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:5 + num_classes], from_logits=True) polygon_loss_x = object_mask * vertices_mask * box_loss_scale * 0.5 * K.square( raw_true_polygon_x - raw_pred[..., 5 + num_classes:5 + num_classes + NUM_ANGLES3:3]) polygon_loss_y = object_mask * vertices_mask * box_loss_scale * K.binary_crossentropy( raw_true_polygon_y, raw_pred[..., 5 + num_classes + 1:5 + num_classes + NUM_ANGLES3:3], from_logits=True) vertices_confidence_loss = object_mask * K.binary_crossentropy( vertices_mask, raw_pred[..., 5 + num_classes + 2:5 + num_classes + NUM_ANGLES3:3], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf class_loss = K.sum(class_loss) / mf confidence_loss = K.sum(confidence_loss) / mf vertices_confidence_loss = K.sum(vertices_confidence_loss) / mf polygon_loss = K.sum(polygon_loss_x) / mf + K.sum(polygon_loss_y) / mf diou_loss = K.sum( object_mask * box_loss_scale * (1 - box_diou(pred_box, y_true[layer][..., 0:4]))) / mf loss += (xy_loss + wh_loss + confidence_loss + class_loss + 0.2 * polygon_loss + 0.2 * vertices_confidence_loss) / ( K.sum(object_mask) + 1) * mf return loss
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * ( confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def __euclidean(x, y): diff = tf.subtract(x, y) square_diff = K.pow(diff, 2) d = K.sqrt(K.sum(square_diff, axis=-1)) return d
def __init__(self, inputs, arch, spec_reg, y_true, y_train_labeled_onehot, n_clusters, affinity, scale_nbr, n_nbrs, batch_sizes, siamese_net=None, x_train=None, have_labeled=False): self.y_true = y_true self.y_train_labeled_onehot = y_train_labeled_onehot self.inputs = inputs self.batch_sizes = batch_sizes # generate layers self.layers = make_layer_list(arch[:-1], 'spectral', spec_reg) self.layers += [{ 'type': 'tanh', 'size': n_clusters, 'l2_reg': spec_reg, 'name': 'spectral_{}'.format(len(arch) - 1) }, { 'type': 'Orthonorm', 'name': 'orthonorm' }] # create spectralnet self.outputs = stack_layers(self.inputs, self.layers) self.net = Model(inputs=self.inputs['Unlabeled'], outputs=self.outputs['Unlabeled']) # DEFINE LOSS # generate affinity matrix W according to params if affinity == 'siamese': input_affinity = tf.concat( [siamese_net.outputs['A'], siamese_net.outputs['Labeled']], axis=0) x_affinity = siamese_net.predict(x_train, batch_sizes) elif affinity in ['knn', 'full']: input_affinity = tf.concat( [self.inputs['Unlabeled'], self.inputs['Labeled']], axis=0) x_affinity = x_train # calculate scale for affinity matrix scale = get_scale(x_affinity, self.batch_sizes['Unlabeled'], scale_nbr) # create affinity matrix if affinity == 'full': W = costs.full_affinity(input_affinity, scale=scale) elif affinity in ['knn', 'siamese']: W = costs.knn_affinity(input_affinity, n_nbrs, scale=scale, scale_nbr=scale_nbr) # if we have labels, use them if have_labeled: # get true affinities (from labeled data) W_true = tf.cast(tf.equal(costs.squared_distance(y_true), 0), dtype='float32') # replace lower right corner of W with W_true unlabeled_end = tf.shape(self.inputs['Unlabeled'])[0] W_u = W[:unlabeled_end, :] # upper half W_ll = W[unlabeled_end:, :unlabeled_end] # lower left W_l = tf.concat((W_ll, W_true), axis=1) # lower half W = tf.concat((W_u, W_l), axis=0) # create pairwise batch distance matrix self.Dy self.Dy = costs.squared_distance( tf.concat([self.outputs['Unlabeled'], self.outputs['Labeled']], axis=0)) else: self.Dy = costs.squared_distance(self.outputs['Unlabeled']) # define loss self.loss = K.sum(W * self.Dy) / (2 * batch_sizes['Unlabeled']) # create the train step update self.learning_rate = tf.Variable(0., name='spectral_net_learning_rate') self.train_step = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate).minimize( self.loss, var_list=self.net.trainable_weights) # initialize spectralnet variables K.get_session().run(tf.global_variables_initializer()) K.get_session().run( tf.variables_initializer(self.net.trainable_weights))