def step(self, inputs, states): states = list(states) if self.teacher_force: readout = states.pop() ground_truth = states.pop() assert K.ndim(ground_truth) == 3, K.ndim(ground_truth) counter = states.pop() if K.backend() == 'tensorflow': with tf.control_dependencies(None): zero = K.cast(K.zeros((1, ))[0], 'int32') one = K.cast(K.zeros((1, ))[0], 'int32') else: zero = K.cast(K.zeros((1, ))[0], 'int32') one = K.cast(K.zeros((1, ))[0], 'int32') slices = [ slice(None), counter[0] - K.switch(counter[0], one, zero) ] + [slice(None)] * (K.ndim(ground_truth) - 2) ground_truth_slice = ground_truth[slices] readout = K.in_train_phase( K.switch(counter[0], ground_truth_slice, readout), readout) states.append(readout) if self.decode: model_input = states else: model_input = [inputs] + states shapes = [] for x in model_input: if hasattr(x, '_keras_shape'): shapes.append(x._keras_shape) del x._keras_shape # Else keras internals will get messed up. model_output = _to_list(self.model.call(model_input)) for x, s in zip(model_input, shapes): setattr(x, '_keras_shape', s) if self.decode: model_output.insert(1, model_input[0]) for tensor in model_output: tensor._uses_learning_phase = self.uses_learning_phase states = model_output[1:] output = model_output[0] if self.readout: states += [output] if self.teacher_force: states.insert(-1, counter + 1) states.insert(-1, ground_truth) return output, states
def test_switch(self): val = np.random.random() xth = KTH.variable(val) xth = KTH.switch(xth >= 0.5, xth * 0.1, xth * 0.2) xtf = KTF.variable(val) xtf = KTF.switch(xtf >= 0.5, xtf * 0.1, xtf * 0.2) zth = KTH.eval(xth) ztf = KTF.eval(xtf) assert zth.shape == ztf.shape assert_allclose(zth, ztf, atol=1e-05)
def test_switch(self): val = np.random.random() xth = KTH.variable(val) xth = KTH.switch(xth >= 0.5, xth * 0.1, xth * 0.2) xtf = KTF.variable(val) xtf = KTF.switch(xtf >= 0.5, xtf * 0.1, xtf * 0.2) zth = KTH.eval(xth) ztf = KTF.eval(xtf) assert zth.shape == ztf.shape assert_allclose(zth, ztf, atol=1e-05) xth1 = KTH.variable(0.7) xth2 = KTH.variable([1, 0.2]) with pytest.raises(ValueError): xth = KTH.switch(xth1 >= 0.5, xth2 * 0.1, xth2 * 0.2) xth = KTH.switch(xth2 >= 0.5, xth1 * 0.1, xth1 * 0.2) assert_allclose(xth, KTH.variable([0.1, 0.2 * 0.2]), atol=1e-05)
def yolo_loss(self, args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = self.yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch( object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def step(self, a, states): r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2 * self.nb_layers] e_tm1 = states[2 * self.nb_layers:3 * self.nb_layers] if self.extrap_start_time is not None: t = states[-1] a = K.switch( t >= self.t_extrap, states[-2], a ) # if past self.extrap_start_time, the previous prediction will be treated as the actual c = [] r = [] e = [] # Update R units starting from the top for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) # Update feedforward path starting from the bottom for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat # compute errors e_up = self.error_activation(ahat - a) e_down = self.error_activation(a - ahat) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if self.output_layer_num == l: if self.output_layer_type == 'A': output = a elif self.output_layer_type == 'Ahat': output = ahat elif self.output_layer_type == 'R': output = r[l] elif self.output_layer_type == 'E': output = e[l] if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer if self.output_layer_type is None: if self.output_mode == 'prediction': output = frame_prediction else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) all_error = layer_error if l == 0 else K.concatenate( (all_error, layer_error), axis=-1) if self.output_mode == 'error': output = all_error else: output = K.concatenate( (K.batch_flatten(frame_prediction), all_error), axis=-1) states = r + c + e if self.extrap_start_time is not None: states += [frame_prediction, t + 1] return output, states