def _attention_regularizer(self, attention): batch_size = K.cast(K.shape(attention)[0], K.floatx()) input_len = K.shape(attention)[-1] indices = K.expand_dims(K.arange(0, input_len), axis=0) diagonal = K.expand_dims(K.arange(0, input_len), axis=-1) eye = K.cast(K.equal(indices, diagonal), K.floatx()) return self.attention_regularizer_weight * K.sum( K.square( K.batch_dot(attention, K.permute_dimensions(attention, (0, 2, 1))) - eye)) / batch_size
def yolo_head(graph, feats, anchors, num_classes): with graph.as_default(): num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def get_initial_state(self, inputs): if type(self.model.input) is not list: return [] try: batch_size = K.int_shape(inputs)[0] except: batch_size = None state_shapes = list(map(K.int_shape, self.model.input[1:])) states = [] if self.readout: state_shapes.pop() # default value for initial_readout is handled in call() for shape in state_shapes: if None in shape[1:]: raise Exception( 'Only the batch dimension of a state can be left unspecified. Got state with shape ' + str(shape)) if shape[0] is None: ndim = K.ndim(inputs) z = K.zeros_like(inputs) slices = [slice(None)] + [0] * (ndim - 1) z = z[slices] # (batch_size,) state_ndim = len(shape) z = K.reshape(z, (-1, ) + (1, ) * (state_ndim - 1)) z = K.tile(z, (1, ) + tuple(shape[1:])) states.append(z) else: states.append(K.zeros(shape)) state_initializer = self.state_initializer if state_initializer: # some initializers don't accept symbolic shapes for i in range(len(state_shapes)): if state_shapes[i][0] is None: if hasattr(self, 'batch_size'): state_shapes[i] = ( self.batch_size, ) + state_shapes[i][1:] if None in state_shapes[i]: state_shapes[i] = K.shape(states[i]) num_state_init = len(state_initializer) num_state = self.num_states assert num_state_init == num_state, 'RNN has ' + str( num_state) + ' states, but was provided ' + str( num_state_init) + ' state initializers.' for i in range(len(states)): init = state_initializer[i] shape = state_shapes[i] try: if not isinstance(init, initializers.Zeros): states[i] = init(shape) except: raise Exception( 'Seems the initializer ' + init.__class__.__name__ + ' does not support symbolic shapes(' + str(shape) + '). Try providing the full input shape (include batch dimension) for you RecurrentModel.' ) return states
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 # with tf.Session() as sess: # print(sess.run(input_shape[1])) boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def content_loss(base, combination): channel_dim = 0 if K.image_dim_ordering() == 'th' else -1 channels = K.shape(base)[channel_dim] size = img_width * img_height if args.content_loss_type == 1: multiplier = 1 / (2.0 * (channels**0.5) * (size**0.5)) elif args.content_loss_type == 2: multiplier = 1 / (channels * size) else: multiplier = 1.0 return multiplier * K.sum(K.square(combination - base))
def _call_additive_emission(self, inputs): input_shape = K.shape(inputs) batch_size, input_len = input_shape[0], input_shape[1] # h_{t, t'} = \tanh(x_t^T W_t + x_{t'}^T W_x + b_h) q = K.expand_dims(K.dot(inputs, self.Wt), 2) k = K.expand_dims(K.dot(inputs, self.Wx), 1) if self.use_additive_bias: h = K.tanh(q + k + self.bh) else: h = K.tanh(q + k) # e_{t, t'} = W_a h_{t, t'} + b_a if self.use_attention_bias: e = K.reshape( K.dot(h, self.Wa) + self.ba, (batch_size, input_len, input_len)) else: e = K.reshape(K.dot(h, self.Wa), (batch_size, input_len, input_len)) return e
def call(self, inputs, mask=None, **kwargs): input_len = K.shape(inputs)[1] if self.attention_type == Attention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == Attention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) if self.attention_width is not None: if self.history_only: lower = K.arange(0, input_len) - (self.attention_width - 1) else: lower = K.arange(0, input_len) - self.attention_width // 2 lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.expand_dims(K.arange(0, input_len), axis=0) e -= 10000.0 * (1.0 - K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx())) if mask is not None: mask = K.expand_dims(K.cast(mask, K.floatx()), axis=-1) e -= 10000.0 * ((1.0 - mask) * (1.0 - K.permute_dimensions(mask, (0, 2, 1)))) # a_{t} = \text{softmax}(e_t) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) a = e / K.sum(e, axis=-1, keepdims=True) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if self.return_attention: return [v, a] return v
def yolo_loss(self, args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = self.yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch( object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss