def call(self, inputs, mask=None): if not isinstance(inputs, list) or len(inputs) <= 1: raise TypeError('SpkLifeLongMemory must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) # (None(batch), 1), index of speaker target_spk_l = inputs[0] target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], )) if K.dtype(target_spk_l) != 'int32': target_spk_l = K.cast(target_spk_l, 'int32') # (None(batch), embed_dim) spk_vector_l = inputs[1] # Start to update life-long memory based on the learned speech vector # First do normalization spk_vector_eps = K.switch(K.equal(spk_vector_l, 0.), np.spacing(1), spk_vector_l) # avoid zero spk_vector_eps = K.sqrt(K.sum(spk_vector_eps**2, axis=1)) spk_vector_eps = spk_vector_eps.dimshuffle((0, 'x')) spk_vector = T.true_div(spk_vector_l, K.repeat_elements(spk_vector_eps, self.vec_dim, axis=1)) # Store speech vector into life-long memory according to the speaker identity. life_long_mem = T.inc_subtensor(self.life_long_mem[target_spk_l, :], spk_vector) # Normalization for memory life_long_mem_eps = K.switch(K.equal(life_long_mem, 0.), np.spacing(1), life_long_mem) # avoid 0 life_long_mem_eps = K.sqrt(K.sum(life_long_mem_eps**2, axis=1)) life_long_mem_eps = life_long_mem_eps.dimshuffle((0, 'x')) life_long_mem = T.true_div(life_long_mem, K.repeat_elements(life_long_mem_eps, self.vec_dim, axis=1)) # (None(batch), spk_size, embed_dim) return life_long_mem
def step(x): """Theano step function""" if (_BACKEND == 'tensorflow'): import tensorflow as tf return tf.select(tf.python.math_ops.greater(x, 0), K.ones_like(x), K.zeros_like(x)) else: return K.switch(x > 0, 1, 0)
def time_distributed_masked_max(x, m): """ Computes max along the first (time) dimension. In: x - input; a 3D tensor m - mask m_value - value for masking """ # place infinities where mask is off m_value = 0.0 tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0) x_with_inf = x + K.expand_dims(tmp) x_max = K.max(x_with_inf, axis=1) r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max) return r
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def residual_drop(x, input_shape, output_shape, strides=(1, 1)): global add_tables nb_filter = output_shape[0] conv = Convolution2D(nb_filter, 3, 3, subsample=strides, border_mode="same")(x) conv = BatchNormalization(axis=1)(conv) conv = Activation("relu")(conv) conv = Convolution2D(nb_filter, 3, 3, border_mode="same")(conv) conv = BatchNormalization(axis=1)(conv) if strides[0] >= 2: x = AveragePooling2D(strides)(x) if (output_shape[0] - input_shape[0]) > 0: pad_shape = (1, output_shape[0] - input_shape[0], output_shape[1], output_shape[2]) padding = K.ones(pad_shape) padding = K.repeat_elements(padding, K.shape(x)[0], axis=0) x = Lambda(lambda y: K.concatenate([y, padding], axis=1), output_shape=output_shape)(x) _death_rate = K.variable(death_rate) scale = K.ones_like(conv) - _death_rate conv = Lambda(lambda c: K.in_test_phase(scale * c, c), output_shape=output_shape)(conv) out = merge([conv, x], mode="sum") out = Activation("relu")(out) gate = K.variable(1, dtype="uint8") add_tables += [{"death_rate": _death_rate, "gate": gate}] return Lambda(lambda tensors: K.switch(gate, tensors[0], tensors[1]), output_shape=output_shape)([out, x])
def stock_loss(y_true, y_pred): alpha = 100. loss = K.switch(K.less(y_true * y_pred, 0), \ alpha*y_pred**2 - K.sign(y_true)*y_pred + K.abs(y_true), \ K.abs(y_true - y_pred) ) return K.mean(loss, axis=-1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def get_output(self, train=False): X = self.get_input(train) if train: M = K.max(X, axis=(2, 3), keepdims=True) R = K.switch(K.equal(X, M), X, 0.) return R else: return X
def call(self, x, mask=None): # x[0]: (batch_size, input_length, input_dim) # x[1]: (batch_size, 1) indices of prepositions # Optional: x[2]: (batch_size, input_length - 2) assert isinstance(x, list) or isinstance(x, tuple) encoded_sentence = x[0] prep_indices = K.squeeze(x[1], axis=-1) #(batch_size,) batch_indices = K.arange(K.shape(encoded_sentence)[0]) # (batch_size,) if self.with_attachment_probs: # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable! head_probs = x[2] head_probs_padding = K.zeros_like(x[2])[:, :2] # (batch_size, 2) # (batch_size, input_length) padded_head_probs = K.concatenate([head_probs, head_probs_padding]) # (batch_size, 1) max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1)) # (batch_size, input_length, 1) max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs)) # (batch_size, input_length, input_dim) masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence)) # (batch_size, input_dim) head_encoding = K.sum(masked_head_encoding, axis=1) else: head_indices = prep_indices - 1 # (batch_size,) head_encoding = encoded_sentence[batch_indices, head_indices, :] # (batch_size, input_dim) prep_encoding = encoded_sentence[batch_indices, prep_indices, :] # (batch_size, input_dim) child_encoding = encoded_sentence[batch_indices, prep_indices+1, :] # (batch_size, input_dim) ''' prep_indices = x[1] sentence_mask = mask[0] if sentence_mask is not None: if K.ndim(sentence_mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. sentence_mask = K.any(sentence_mask, axis=(-2, -1)) head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask, prep_indices) ''' head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, proj_dim) prep_projection = K.dot(prep_encoding, self.proj_prep) # (batch_size, proj_dim) child_projection = K.dot(child_encoding, self.proj_child) # (batch_size, proj_dim) #(batch_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC': prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, proj_dim) # (batch_size, num_classes) class_scores = K.dot(composed_projection, self.scorer) label_probabilities = K.softmax(class_scores) return label_probabilities
def _gen_local_drops(self, count, p): # Create a local droppath with at least one path arr = self._random_arr(count, p) drops = K.switch( K.any(arr), arr, self._arr_with_one(count) ) return drops
def _drop_path(self, inputs): count = len(inputs) drops = K.switch( self.is_global, self._gen_global_path(count), self._gen_local_drops(count, self.p) ) ave = K.zeros(shape=self.average_shape) for i in range(0, count): ave += inputs[i] * drops[i] sum = K.sum(drops) # Check that the sum is not 0 (global droppath can make it # 0) to avoid divByZero ave = K.switch( K.not_equal(sum, 0.), ave/sum, ave) return ave
def get_gradients(self, loss, params): grads = K.gradients(loss, params) if hasattr(self, 'scale') and self.scale != 1: grads = [g*K.variable(self.scale) for g in grads] if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [K.switch(norm >= self.clipnorm, g * self.clipnorm / norm, g) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def jacc_coef_th(y_true, y_pred, smooth=smooth_default): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) jacc = (intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + smooth) result = K.switch( jacc > 0.65, jacc, jacc * 0.1 ) return result
def __call__(self, loss): if not hasattr(self, 'layer'): raise Exception('Need to call `set_layer` on ' 'MaskRegularizer instance ' 'before calling the instance.') min_tag_size = self.mask_size**2 * self.min_covered factor = min_tag_size / self.max_loss out = self.layer.output out_sum = out.sum(axis=(1, 2, 3)) reg_loss = K.switch(out_sum <= min_tag_size, factor*(out_sum - min_tag_size)**2, 0) return K.in_train_phase(loss + reg_loss.mean(), loss)
def step(self, x, states): # assert len(states) == 3 h_tm1 = states[0] t = states[1] p_tm1 = states[2] x_t = K.dot(x, self.xh) + self.b p = x_t + K.dot(h_tm1, self.hh * self.mask) p_t = K.switch(K.equal(t[0] % self.period, 0), p, p_tm1) h = self.activation(p_t) return h, [h, t+1, p_t]
def step(self, x, states): prev_output = states[0] time_step = states[1] B_U = states[2] B_W = states[3] period = states[4] if self.consume_less == 'cpu': h = x else: h = K.dot(x * B_W, self.W) + self.b output = self.activation(h + K.dot(prev_output * B_U, self.U)) output = K.switch(K.equal(time_step % period, 0.), output, prev_output) return output, [output, time_step+1]
def basic_block(self, z, nb_filter, column, reset_gates=True): if reset_gates: self.flush_gates(column) fz = self.fc_block(z, nb_filter) if column >= 1: fc1 = self.basic_block(z, nb_filter, column-1, False) fc2 = self.basic_block(fc1, nb_filter, column-1, False) M1 = merge([fz,fc2], mode='ave') M1 = Activation("relu")(M1) gate = K.variable(1, dtype="uint8") self.gates[column].append(gate) return Lambda(lambda outputs: K.switch(gate, outputs[0], outputs[1]), output_shape= lambda x: x[0])([fz, M1]) else: return fz
def _shortcut(input, residual): stride_width = input._keras_shape[2] / residual._keras_shape[2] stride_height = input._keras_shape[3] / residual._keras_shape[3] equal_channels = residual._keras_shape[1] == input._keras_shape[1] shortcut = input if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Convolution2D(nb_filter=residual._keras_shape[1], nb_row=1, nb_col=1, subsample=(stride_width, stride_height), init="he_normal", border_mode="valid", W_regularizer=l2(weight_decay))(input) shortcut = Activation("relu")(shortcut) M1 = merge([shortcut, residual], mode="sum") M1 = Activation("relu")(M1) gate = K.variable(0.0, dtype="uint8") decay_rate = 1 name = 'residual_'+str(len(gates)+1) gates[name]=[decay_rate, gate] return Lambda(lambda outputs: K.switch(gate, outputs[0], outputs[1]), output_shape= lambda x: x[0], name=name)([shortcut, M1])
def _dream_step(x, states): # input + states assert len(states) == 2*self.depth + 1 x = states[-1] x = K.switch(K.equal(x, K.max(x, axis=-1, keepdims=True)), 1., 0.) states = states[:-1] h = [] for i, (h_tm1, c_tm1) in enumerate(zip(states[:-1:2], states[1::2])): x, new_states = self.lstms[i].step(x, [h_tm1, c_tm1]) h.extend(new_states) if self.readout: h += [self.readout_layer(h[-2])] final = h[-1] else: h += [h[-2]] final = h[-2] return final, h
def get_model(): model = Sequential() model.add(Lambda( lambda x: K.switch(K.T.le(x, 0), 0, K.T.log2(x)), input_shape=(nrows, ncols) )) model.add(Reshape((1, nrows, ncols))) model.add(Convolution2D(32, 3, 3, border_mode='same')) model.add(Activation("relu")) model.add(Convolution2D(32, 3, 3, border_mode='same')) model.add(Activation("relu")) model.add(Convolution2D(32, 3, 3, border_mode='same')) model.add(Activation("relu")) model.add(Flatten()) # model.add(Dense(500)) # model.add(BatchNormalization()) # model.add(Activation("relu")) # model.add(Dropout(0.5)) model.add(Dense(500)) # model.add(BatchNormalization()) model.add(Activation("relu")) # model.add(Dropout(0.5)) model.add(Dense(output_dim=4)) model.add(Activation("softmax")) model.compile(loss='mse', optimizer="adam") return model
def clip_norm(g, c, n): if c > 0: if K.backend() == 'tensorflow': import tensorflow as tf import copy condition = n >= c then_expression = tf.scalar_mul(c / n, g) else_expression = g if hasattr(then_expression, 'get_shape'): g_shape = copy.copy(then_expression.get_shape()) elif hasattr(then_expression, 'dense_shape'): g_shape = copy.copy(then_expression.dense_shape) if condition.dtype != tf.bool: condition = tf.cast(condition, 'bool') g = K.tensorflow_backend.control_flow_ops.cond( condition, lambda: then_expression, lambda: else_expression) if hasattr(then_expression, 'get_shape'): g.set_shape(g_shape) elif hasattr(then_expression, 'dense_shape'): g._dense_shape = g_shape else: g = K.switch(n >= c, g * c / n, g) return g
def get_updates(self, params, constraints, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = self.iterations + 1 loss_prev = K.variable(0) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] ch_fact_lbound = K.switch(K.greater(loss, loss_prev), 1+self.thl, 1/(1+self.thu)) ch_fact_ubound = K.switch(K.greater(loss, loss_prev), 1+self.thu, 1/(1+self.thl)) loss_ch_fact = loss / loss_prev loss_ch_fact = K.switch(K.lesser(loss_ch_fact, ch_fact_lbound), ch_fact_lbound, loss_ch_fact) loss_ch_fact = K.switch(K.greater(loss_ch_fact, ch_fact_ubound), ch_fact_ubound, loss_ch_fact) loss_hat = K.switch(K.greater(t, 1), loss_prev * loss_ch_fact, loss) d_den = K.switch(K.greater(loss_hat, loss_prev), loss_prev, loss_hat) d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * K.abs((loss_hat - loss_prev) / d_den) d_t = K.switch(K.greater(t, 1), d_t, 1.) self.updates.append(K.update(self.d, d_t)) for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g mhat_t = m_t / (1. - K.pow(self.beta_1, t)) self.updates.append(K.update(m, m_t)) v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) vhat_t = v_t / (1. - K.pow(self.beta_2, t)) self.updates.append(K.update(v, v_t)) p_t = p - (self.lr / (1. + (self.iterations * self.decay))) * mhat_t / ((K.sqrt(vhat_t) * d_t) + self.epsilon) self.updates.append(K.update(p, p_t)) self.updates.append(K.update(loss_prev, loss_hat)) return self.updates
def step(x, alpha=0): return K.switch(alpha, 0, 1)
def _mask_loss(y_true, y_pred, y_mask, element_wise_loss): l = K.switch(y_mask, element_wise_loss(y_true, y_pred), K.zeros_like(y_mask, dtype=K.floatx())) return K.sum(l) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) beta2_t = self.beta_2 ** t N_sma_max = 2 / (1 - self.beta_2) - 1 N_sma = N_sma_max - 2 * t * beta2_t / (1 - beta2_t) # apply weight decay if self.weight_decay != 0.: p_wd = p - self.weight_decay * lr * p else: p_wd = None if p_wd is None: p_ = p else: p_ = p_wd def gt_path(): step_size = lr * K.sqrt( (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - self.beta_1 ** t) denom = K.sqrt(v_t) + self.epsilon p_t = p_ - step_size * (m_t / denom) return p_t def lt_path(): step_size = lr / (1 - self.beta_1 ** t) p_t = p_ - step_size * m_t return p_t p_t = K.switch(N_sma > 5, gt_path, lt_path) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def _wta(X): M = K.max(X, axis=-1, keepdims=True) R = K.switch(K.equal(X, M), X, 0.) return R
def GRU_merge(self, self_act, a, b, act): lower_a, upper_a = a.get_lu() lower_b, upper_b = b.get_lu() fa_lower, fa_upper = lower_a, upper_a fb_lower, fb_upper = act(lower_b), act(upper_b) lower_x, upper_x = self.get_lu() fx_lower, fx_upper = self_act(lower_x), self_act(upper_x) partial_fx_lower = tf.gradients(fx_lower, lower_x)[0] partial_fx_upper = tf.gradients(fx_upper, upper_x)[0] def lower_a_greater_zero(): uz_x_Phi = K.minimum(partial_fx_upper * fa_upper, (fx_upper - fx_lower) * fa_upper / (upper_x - lower_x)) ax_right_upper = fx_upper * fa_upper ax_left_upper = uz_x_Phi * (lower_x - upper_x) + ax_right_upper lz_x_Phi = K.minimum(partial_fx_lower * fa_lower, (fx_lower - fx_upper) * fa_lower / (lower_x - upper_x)) ax_left_lower = fx_lower * fa_lower ax_right_lower = lz_x_Phi * (upper_x - lower_x) + ax_left_lower return [ ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper ] def lower_b_greater_zero(): uz_x_Phi = K.maximum(-partial_fx_lower * fb_upper, (-fx_upper + fx_lower) * fb_upper / (upper_x - lower_x)) bx_left_upper = (1 - fx_lower) * fb_upper bx_right_upper = uz_x_Phi * (upper_x - lower_x) + bx_left_upper lz_x_Phi = K.maximum(-partial_fx_upper * fb_lower, (-fx_lower + fx_upper) * fb_lower / (lower_x - upper_x)) bx_right_lower = (1 - fx_upper) * fb_lower bx_left_lower = lz_x_Phi * (lower_x - upper_x) + bx_right_lower return [ bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper ] def upper_a_less_zero(): uz_x_Phi = K.maximum(partial_fx_lower * fa_upper, (fx_lower - fx_upper) * fa_upper / (lower_x - upper_x)) ax_left_upper = fx_lower * fa_upper ax_right_upper = uz_x_Phi * (upper_x - lower_x) + ax_left_upper lz_x_Phi = K.maximum(partial_fx_upper * fa_lower, (fx_upper - fx_lower) * fa_lower / (upper_x - lower_x)) ax_right_lower = fx_upper * fa_lower ax_left_lower = lz_x_Phi * (lower_x - upper_x) + ax_right_lower return [ ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper ] def upper_b_less_zero(): uz_x_Phi = K.minimum(-partial_fx_upper * fb_upper, (-fx_upper + fx_lower) * fb_upper / (upper_x - lower_x)) bx_right_upper = (1 - fx_upper) * fb_upper bx_left_upper = uz_x_Phi * (lower_x - upper_x) + bx_right_upper lz_x_Phi = K.minimum(-partial_fx_lower * fb_lower, (-fx_lower + fx_upper) * fb_lower / (lower_x - upper_x)) bx_left_lower = (1 - fx_lower) * fb_lower bx_right_lower = lz_x_Phi * (upper_x - lower_x) + bx_left_lower return [ bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper ] def otherwise_a(): uz_x_Phi = K.minimum(partial_fx_upper * fa_upper, (fx_upper - fx_lower) * fa_upper / (upper_x - lower_x)) ax_right_upper = fx_upper * fa_upper ax_left_upper = uz_x_Phi * (lower_x - upper_x) + ax_right_upper lz_x_Phi = K.maximum(partial_fx_upper * fa_lower, (fx_upper - fx_lower) * fa_lower / (upper_x - lower_x)) ax_right_lower = fx_upper * fa_lower ax_left_lower = lz_x_Phi * (lower_x - upper_x) + ax_right_lower return [ ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper ] def otherwise_b(): uz_x_Phi = K.maximum(-partial_fx_lower * fb_upper, (-fx_upper + fx_lower) * fb_upper / (upper_x - lower_x)) bx_left_upper = (1 - fx_lower) * fb_upper bx_right_upper = uz_x_Phi * (upper_x - lower_x) + bx_left_upper lz_x_Phi = K.minimum(-partial_fx_lower * fb_lower, (-fx_lower + fx_upper) * fb_lower / (lower_x - upper_x)) bx_left_lower = (1 - fx_lower) * fb_lower bx_right_lower = lz_x_Phi * (upper_x - lower_x) + bx_left_lower return [ bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper ] a_anchors = otherwise_a() anchors_lower_a_greater_zero = lower_a_greater_zero() anchors_upper_a_less_zero = upper_a_less_zero() for i in range(4): a_anchors[i] = K.switch(K.greater(lower_a, K.zeros_like(lower_a)), anchors_lower_a_greater_zero[i], a_anchors[i]) a_anchors[i] = K.switch(K.less(upper_a, K.zeros_like(upper_a)), anchors_upper_a_less_zero[i], a_anchors[i]) b_anchors = otherwise_b() anchors_lower_b_greater_zero = lower_b_greater_zero() anchors_upper_b_less_zero = upper_b_less_zero() for i in range(4): b_anchors[i] = K.switch(K.greater(lower_b, K.zeros_like(lower_b)), anchors_lower_b_greater_zero[i], b_anchors[i]) b_anchors[i] = K.switch(K.less(upper_b, K.zeros_like(upper_b)), anchors_upper_b_less_zero[i], b_anchors[i]) for i in range(4): a_anchors[i] += b_anchors[i] lower_z = K.minimum(a_anchors[0], a_anchors[2]) upper_z = K.maximum(a_anchors[1], a_anchors[3]) return AI((lower_z + upper_z) / 2, (upper_z - lower_z) / 2, None, True)
def call(self, x, mask=None): R = T.reshape(x,(T.shape(x)[0],T.shape(x)[1]/self.OneOnX,self.OneOnX)) M = K.max(R, axis=(2), keepdims=True) R = K.switch(K.equal(R, M), R, 0.) R = T.reshape(R,(T.shape(x)[0],T.shape(x)[1])) return R
def call(self, inputs, states, training=None): """We need to reimplmenet `call` entirely rather than reusing that from `GRUCell` since there are lots of differences. Args: inputs: One tensor which is stacked by 3 inputs (x, m, s) x and m are of shape (n_batch * input_dim). s is of shape (n_batch, 1). states: states and other values from the previous step. (h_tm1, x_keep_tm1, s_prev_tm1) """ # Get inputs and states input_x = inputs[:, :self.true_input_dim] # inputs x, m, s input_m = inputs[:, self.true_input_dim:-1] input_s = inputs[:, -1:] # Need to add broadcast for time_stamp if using theano backend. if K.backend() == 'theano': input_s = K.pattern_broadcast(input_s, [False, True]) h_tm1, x_keep_tm1, s_prev_tm1 = states # previous memory ([n_batch * self.units]) # previous input x ([n_batch * input_dim]) # and the subtraction term (of delta_t^d in Equation (2)) # ([n_batch * input_dim]) input_1m = K.cast_to_floatx(1.) - input_m input_d = input_s - s_prev_tm1 # Get dropout if 0. < self.dropout < 1. and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(input_x), self.dropout, training=training, count=3) if (0. < self.recurrent_dropout < 1. and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(h_tm1), self.recurrent_dropout, training=training, count=3) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if self.feed_masking: if 0. < self.dropout < 1. and self._masking_dropout_mask is None: self._masking_dropout_mask = _generate_dropout_mask( K.ones_like(input_m), self.dropout, training=training, count=3) m_dp_mask = self._masking_dropout_mask # Compute decay if any if self.input_decay is not None: gamma_di = input_d * self.input_decay_kernel if self.use_decay_bias: gamma_di = K.bias_add(gamma_di, self.input_decay_bias) gamma_di = self.input_decay(gamma_di) if self.hidden_decay is not None: gamma_dh = K.dot(input_d, self.hidden_decay_kernel) if self.use_decay_bias: gamma_dh = K.bias_add(gamma_dh, self.hidden_decay_bias) gamma_dh = self.hidden_decay(gamma_dh) if self.feed_masking and self.masking_decay is not None: gamma_dm = input_d * self.masking_decay_kernel if self.use_decay_bias: gamma_dm = K.bias_add(gamma_dm, self.masking_decay_bias) gamma_dm = self.masking_decay(gamma_dm) # Get the imputed or decayed input if needed # and `x_keep_t` for the next time step if self.input_decay is not None: x_keep_t = K.switch(input_m, input_x, x_keep_tm1) x_t = K.switch(input_m, input_x, gamma_di * x_keep_t) elif self.x_imputation == 'forward': x_t = K.switch(input_m, input_x, x_keep_tm1) x_keep_t = x_t elif self.x_imputation == 'zero': x_t = K.switch(input_m, input_x, K.zeros_like(input_x)) x_keep_t = x_t elif self.x_imputation == 'raw': x_t = input_x x_keep_t = x_t else: raise ValueError('No input decay or invalid x_imputation ' '{}.'.format(self.x_imputation)) # Get decayed hidden if needed if self.hidden_decay is not None: h_tm1d = gamma_dh * h_tm1 else: h_tm1d = h_tm1 # Get decayed masking if needed if self.feed_masking: m_t = input_1m if self.masking_decay is not None: m_t = gamma_dm * m_t # Apply the dropout if 0. < self.dropout < 1.: x_z, x_r, x_h = x_t * dp_mask[0], x_t * dp_mask[1], x_t * dp_mask[2] if self.feed_masking: m_z, m_r, m_h = (m_t * m_dp_mask[0], m_t * m_dp_mask[1], m_t * m_dp_mask[2]) else: x_z, x_r, x_h = x_t, x_t, x_t if self.feed_masking: m_z, m_r, m_h = m_t, m_t, m_t if 0. < self.recurrent_dropout < 1.: h_tm1_z, h_tm1_r = ( h_tm1d * rec_dp_mask[0], h_tm1d * rec_dp_mask[1], ) else: h_tm1_z, h_tm1_r = h_tm1d, h_tm1d # Get z_t, r_t, hh_t z_t = K.dot(x_z, self.kernel_z) + K.dot(h_tm1_z, self.recurrent_kernel_z) r_t = K.dot(x_r, self.kernel_r) + K.dot(h_tm1_r, self.recurrent_kernel_r) hh_t = K.dot(x_h, self.kernel_h) if self.feed_masking: z_t += K.dot(m_z, self.masking_kernel_z) r_t += K.dot(m_r, self.masking_kernel_r) hh_t += K.dot(m_h, self.masking_kernel_h) if self.use_bias: z_t = K.bias_add(z_t, self.input_bias_z) r_t = K.bias_add(r_t, self.input_bias_r) hh_t = K.bias_add(hh_t, self.input_bias_h) z_t = self.recurrent_activation(z_t) r_t = self.recurrent_activation(r_t) if 0. < self.recurrent_dropout < 1.: h_tm1_h = r_t * h_tm1d * rec_dp_mask[2] else: h_tm1_h = r_t * h_tm1d hh_t = self.activation(hh_t + K.dot(h_tm1_h, self.recurrent_kernel_h)) # get h_t h_t = z_t * h_tm1 + (1 - z_t) * hh_t if 0. < self.dropout + self.recurrent_dropout: if training is None: h_t._uses_learning_phase = True # get s_prev_t s_prev_t = K.switch(input_m, K.tile(input_s, [1, self.state_size[-1]]), s_prev_tm1) return h_t, [h_t, x_keep_t, s_prev_t]
def masked_loss(y_true, y_pred): y_mask = K.cast(K.any(y_true, axis=-1), "float32") loss = K.switch(y_mask, K.sparse_categorical_crossentropy(y_true, y_pred), K.zeros_like(y_mask, dtype=K.floatx())) return K.sum(loss) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False, normalize=True): # 一共有三个特征层 num_layers = len(anchors) // 3 #---------------------------------------------------------------------------------------------------# # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 #---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] #-----------------------------------------------------------# # 13x13的特征层对应的anchor是[116,90],[156,198],[373,326] # 26x26的特征层对应的anchor是[30,61],[62,45],[59,119] # 52x52的特征层对应的anchor是[10,13],[16,30],[33,23] #-----------------------------------------------------------# anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 得到input_shpae为416,416 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) #-----------------------------------------------------------# # 得到网格的shape为[13,13]; [26,26]; [52,52] #-----------------------------------------------------------# grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 num_pos = 0 #-----------------------------------------------------------# # 取出每一张图片 # m的值就是batch_size #-----------------------------------------------------------# m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) #---------------------------------------------------------------------------------------------------# # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 #---------------------------------------------------------------------------------------------------# for l in range(num_layers): #-----------------------------------------------------------# # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) #-----------------------------------------------------------# object_mask = y_true[l][..., 4:5] #-----------------------------------------------------------# # 取出其对应的种类(m,13,13,3,80) #-----------------------------------------------------------# true_class_probs = y_true[l][..., 5:] #-----------------------------------------------------------# # 将yolo_outputs的特征层输出进行处理、获得四个返回值 # 其中: # grid (13,13,1,2) 网格坐标 # raw_pred (m,13,13,3,85) 尚未处理的预测结果 # pred_xy (m,13,13,3,2) 解码后的中心坐标 # pred_wh (m,13,13,3,2) 解码后的宽高坐标 #-----------------------------------------------------------# grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) #-----------------------------------------------------------# # pred_box是解码后的预测的box的位置 # (m,13,13,3,4) #-----------------------------------------------------------# pred_box = K.concatenate([pred_xy, pred_wh]) #-----------------------------------------------------------# # 找到负样本群组,第一步是创建一个数组,[] #-----------------------------------------------------------# ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') #-----------------------------------------------------------# # 对每一张图片计算ignore_mask #-----------------------------------------------------------# def loop_body(b, ignore_mask): #-----------------------------------------------------------# # 取出n个真实框:n,4 #-----------------------------------------------------------# true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) #-----------------------------------------------------------# # 计算预测框与真实框的iou # pred_box 13,13,3,4 预测框的坐标 # true_box n,4 真实框的坐标 # iou 13,13,3,n 预测框和真实框的iou #-----------------------------------------------------------# iou = box_iou(pred_box[b], true_box) #-----------------------------------------------------------# # best_iou 13,13,3 每个特征点与真实框的最大重合程度 #-----------------------------------------------------------# best_iou = K.max(iou, axis=-1) #-----------------------------------------------------------# # 判断预测框和真实框的最大iou小于ignore_thresh # 则认为该预测框没有与之对应的真实框 # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #-----------------------------------------------------------# ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask #-----------------------------------------------------------# # 在这个地方进行一个循环、循环是对每一张图片进行的 #-----------------------------------------------------------# _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) #-----------------------------------------------------------# # ignore_mask用于提取出作为负样本的特征点 # (m,13,13,3) #-----------------------------------------------------------# ignore_mask = ignore_mask.stack() # (m,13,13,3,1) ignore_mask = K.expand_dims(ignore_mask, -1) #-----------------------------------------------------------# # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss #-----------------------------------------------------------# raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) #-----------------------------------------------------------# # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 #-----------------------------------------------------------# raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) #-----------------------------------------------------------# # 真实框越大,比重越小,小框的比重更大。 #-----------------------------------------------------------# box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] #-----------------------------------------------------------# # 利用binary_crossentropy计算中心点偏移情况,效果更好 #-----------------------------------------------------------# xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) #-----------------------------------------------------------# # wh_loss用于计算宽高损失 #-----------------------------------------------------------# wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) #------------------------------------------------------------------------------# # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,那么计算0与置信度的交叉熵 # 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #------------------------------------------------------------------------------# confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) + \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) #-----------------------------------------------------------# # 将所有损失求和 #-----------------------------------------------------------# xy_loss = K.sum(xy_loss) wh_loss = K.sum(wh_loss) confidence_loss = K.sum(confidence_loss) class_loss = K.sum(class_loss) #-----------------------------------------------------------# # 计算正样本数量 #-----------------------------------------------------------# num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1) loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, tf.shape(ignore_mask) ], summarize=100, message='loss: ') if normalize: loss = loss / num_pos else: loss = loss / mf return loss
def compute_loss(yolo_outputs, y_true, anchors, num_classes, ignore_thresh=ignore_thresh, print_loss=False): # yolo_outputs = YOLO_outputs # y_true = Y_true # output of preprocess_true_boxes [3, None, 13, 13, 3, 2] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_IoU(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss with tf.name_scope("losses"): tf.summary.scalar("coordinate_loss", xy_loss) tf.summary.scalar("dimensions_loss", wh_loss) tf.summary.scalar("confidence_loss", confidence_loss) tf.summary.scalar("class_loss", class_loss) tf.summary.scalar("total_loss", loss) return loss
def build_object_untargeted_loss(self): yolo_outputs = self.model.output y_true = self.encoded_labels anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head( yolo_outputs[l], self.anchors[anchor_mask[l]], self.num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / self.anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch( object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < 0.45, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_cross-entropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = 0 confidence_loss += object_mask * K.binary_crossentropy( object_mask, raw_pred[..., 4:5], from_logits=True) confidence_loss += (1 - object_mask) * K.binary_crossentropy( object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss return -loss
def call(self, x, mask=None): from keras import backend as K j = K.softplus((x - 1) / self.sigma) * self.sigma v = self.amplitude / (self.tau_ref + self.tau_rc*K.log(1 + 1/j)) return K.switch(j > 0, v, 0)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor. This is wrapped as a layer Output For convenience using Keras. Parameters ---------- yolo_outputs (args[:num_layers]): list of tensor, the output of yolo_body or tiny_yolo_body y_true (args[numlayers:]): list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the IoU threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,), the summed losses for bounding boxes, objectness, and class prob ''' num_layers = len(anchors) // 3 # default setting # separate out the args yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # save some shapes for convenience input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) # init the loss loss = 0 for l in range(num_layers): # whether an object is in this anchor object_mask = y_true[l][..., 4:5] # 1's and 0's of the true class presence in anchor true_class_probs = y_true[l][..., 5:] # parse out the predictions # raw_pred is everything, before applying sigmoid to xy and wh grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # Make into one tensor, for convenience pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. # Mask is the conditional for ignoring objects that are not # within the specified IoU ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. # Okay, this is a weird implementation, but here goes for an explanation: # YOLO xy outputs need to pass through a sigmoid before interpretation, but that isn't yet done here # when from_logits=true, the log odds is not calculated so this becomes: # xy_pred *(1 - xy_real) + log(1 + exp(-abs(xy_pred))) xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) # both are already "log" scaled, so just take the square difference wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # this is straigh forard, right out of the YOLO paper confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask # class loss based on if the class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 if self.initial_total_steps > 0: warmup_steps = self.total_steps * self.warmup_proportion decay_steps = self.total_steps - warmup_steps lr = K.switch( t <= warmup_steps, lr * (t / warmup_steps), lr * (1.0 - K.minimum(t, decay_steps) / decay_steps), ) ms = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i)) for (i, p) in enumerate(params) ] vs = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i)) for (i, p) in enumerate(params) ] if self.amsgrad: vhats = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vhat_' + str(i)) for (i, p) in enumerate(params) ] else: vhats = [ K.zeros(1, name='vhat_' + str(i)) for i in range(len(params)) ] self.weights = [self.iterations] + ms + vs + vhats beta_1_t = K.pow(self.beta_1, t) beta_2_t = K.pow(self.beta_2, t) sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0 sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t) for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) m_corr_t = m_t / (1.0 - beta_1_t) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t) + self.epsilon) r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) / (sma_inf - 2.0) * sma_inf / sma_t) p_t = K.switch(sma_t > 5, r_t * m_corr_t / v_corr_t, m_corr_t) if self.initial_weight_decay > 0: p_t += self.weight_decay * p p_t = p - lr * p_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): num_layers = len(anchors) // 3 # 得到先验框的个数整除3 # 将args的值分割出来 y_true = args[num_layers:] # y真实值 yolo_outputs = args[:num_layers] # 预测的三个特征 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 先验框 input_shape = (416, 416) # 输入形状 # 得到13x13,26,26,52,52网格 grid_shape = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # 用于存放最后返回的loss值 m = K.shape(yolo_outputs[0])[0] # 取出图片数量 mf = K.cast(m, K.dtype(yolo_outputs[0])) # 改变变量的类型 for l in range(num_layers): object_mask = y_true[1][..., 4:5] # 图片中是否有物体用01表示 true_class_probs = y_true[1][..., 5:] # 图片的类 grid, raw_pred, pred_xy, pred_wh = yolo_head( # 对输入的预测特征进行解码 yolo_outputs[1], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # xy_wh进行拼接 ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 转bool def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[1][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 正确的先验框 iou = box_iou(pred_box[b], true_box) # 预测和真实进行求面积交集和并集的差 best_iou = K.max(iou, axis=-1) # ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) # 如果iou小于50%,就将图片和先验框写入 return b + 1, ignore_mask def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[1][b, ..., :4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(iou < ignore_thresh), K.dtype(true_box)) _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) raw_true_xy = y_true[1][..., :2] * grid_shape[1][:] - grid raw_true_wh = K.log(y_true[1][..., 2:4] / anchors[anchor_mask[1]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[1][..., 2:3] * y_true[l][..., 3:4] xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy( object_mask, raw_pred[..., 4:5], from_logits=True) + (1 - object_mask) * K.binary_crossentropy( object_mask, raw_pred[4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss = xy_loss + wh_loss + confidence_loss + class_loss return loss
def call(self, inputs): return K.switch(tf.constant(1), inputs[0], inputs[1])
def bbox_ciou(boxes1, boxes2): ''' 计算ciou = iou - p2/c2 - av :param boxes1: (8, 13, 13, 3, 4) pred_xywh :param boxes2: (8, 13, 13, 3, 4) label_xywh :return: ''' # 变成左上角坐标、右下角坐标 boxes1_x0y0x1y1 = tf.concat([ boxes1[..., :2] - boxes1[..., 2:] * 0.5, boxes1[..., :2] + boxes1[..., 2:] * 0.5 ], axis=-1) boxes2_x0y0x1y1 = tf.concat([ boxes2[..., :2] - boxes2[..., 2:] * 0.5, boxes2[..., :2] + boxes2[..., 2:] * 0.5 ], axis=-1) ''' 逐个位置比较boxes1_x0y0x1y1[..., :2]和boxes1_x0y0x1y1[..., 2:],即逐个位置比较[x0, y0]和[x1, y1],小的留下。 比如留下了[x0, y0] 这一步是为了避免一开始w h 是负数,导致x0y0成了右下角坐标,x1y1成了左上角坐标。 ''' boxes1_x0y0x1y1 = tf.concat([ tf.minimum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:]), tf.maximum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:]) ], axis=-1) boxes2_x0y0x1y1 = tf.concat([ tf.minimum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:]), tf.maximum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:]) ], axis=-1) # 两个矩形的面积 boxes1_area = (boxes1_x0y0x1y1[..., 2] - boxes1_x0y0x1y1[..., 0]) * ( boxes1_x0y0x1y1[..., 3] - boxes1_x0y0x1y1[..., 1]) boxes2_area = (boxes2_x0y0x1y1[..., 2] - boxes2_x0y0x1y1[..., 0]) * ( boxes2_x0y0x1y1[..., 3] - boxes2_x0y0x1y1[..., 1]) # 相交矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2) left_up = tf.maximum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2]) right_down = tf.minimum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:]) # 相交矩形的面积inter_area。iou inter_section = tf.maximum(right_down - left_up, 0.0) inter_area = inter_section[..., 0] * inter_section[..., 1] union_area = boxes1_area + boxes2_area - inter_area iou = inter_area / union_area # 包围矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2) enclose_left_up = tf.minimum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2]) enclose_right_down = tf.maximum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = K.pow(enclose_wh[..., 0], 2) + K.pow(enclose_wh[..., 1], 2) # 两矩形中心点距离的平方 p2 = K.pow(boxes1[..., 0] - boxes2[..., 0], 2) + K.pow( boxes1[..., 1] - boxes2[..., 1], 2) # 增加av。分母boxes2[..., 3]可能为0,所以加上除0保护防止nan。 atan1 = tf.atan(boxes1[..., 2] / boxes1[..., 3]) temp_a = K.switch(boxes2[..., 3] > 0.0, boxes2[..., 3], boxes2[..., 3] + 1.0) atan2 = tf.atan(boxes2[..., 2] / temp_a) v = 4.0 * K.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - iou + v) ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v return ciou
def step(self, a, states): r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2*self.nb_layers] e_tm1 = states[2*self.nb_layers:3*self.nb_layers] if self.extrap_start_time is not None: t = states[-1] a = K.switch(t >= self.t_extrap, states[-2], a) # if past self.extrap_start_time, the previous prediction will be treated as the actual c = [] r = [] e = [] for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat # compute errors e_up = self.error_activation(ahat - a) e_down = self.error_activation(a - ahat) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer if self.output_mode == 'prediction': output = frame_prediction elif self.output_mode == 'all_R': #for l in range(3, self.nb_layers): #start at 1, to grab the last 3 layers for l in range(2, 4): #Grab last 2 layers layer_R = K.batch_flatten(r[l]) output = layer_R if l == 2 else K.concatenate((output, layer_R), axis=-1) else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1) if self.output_mode == 'error': output = all_error else: output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1) states = r + c + e if self.extrap_start_time is not None: states += [frame_prediction, t + 1] return output, states
def bool_match(y_true, y_pred): return K.switch(K.any(y_true - y_pred.round()), K.variable(0), K.variable(1))
def myMax(dist): dist1, dist2, sth = dist ma = K.max((dist1, dist2), axis=0) ma = K.dot(ma, ma) return K.switch(ma > sth, 1, 0)
def compute_loss(self, input, output, input_mask=None, output_mask=None): l = K.switch(input < self.low, K.abs(input - self.low), 0) h = K.switch(input > self.high, K.abs(input - self.high), 0) return K.in_train_phase(self.weight*K.mean(h + l), 0)
def contrastive_accuracy(y_true, y_pred): y_class = K.switch(y_pred > 1, 1, 0) return K.mean(K.equal(y_true, y_class), axis=-1)
def get_output(self, train=False): X = self.get_input(train) return K.expand_dims(K.switch(K.sum(X, -1), 1, 0))
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting # y_pred yolo_outputs = args[:num_layers] # y_true y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # input shape is obtained by multiplying 32 to the output grid of 1st detection layer input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # grid shape is the index 1,2 location of the y_pred # (14,14), (28,28), (56,56) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # batch size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): # confidence score for grid-cell with object =1, rest all are 0 # index of the cell where an object is present object_mask = y_true[l][..., 4:5] # class probabilities at index 5:55 true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. # The xywh of the true_boxes need to converted wrt to the top-left corner of the grid-cell. # Original true-box are normalized wrt to the image size (448, 448) raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # if else to remove -inf because of log operation on zero values in previous step raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask if TF_VERSION2: _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) else: _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def clip_norm(g, c, n): if c > 0: g = K.switch(n >= c, g * c / n, g) return g
def weighted_sum(first, second, sigma, first_threshold=-np.inf, second_threshold=np.inf): logit_probs = first * sigma + second * (1.0 - sigma) infty_tensor = kb.ones_like(logit_probs) * INFTY logit_probs = kb.switch(kb.greater(first, first_threshold), logit_probs, infty_tensor) logit_probs = kb.switch(kb.greater(second, second_threshold), logit_probs, infty_tensor) return logit_probs
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' # YOLOv3 outputs are 3 numpy array of shape (batch_size, height, width, num_anchors * (5 + num_classes)) yolo_outputs = args[:3] # Ground truth is a list of 3 numpy array of shape (batch_size, height, width, num_anchors, 5 + num_classes) y_true = args[3:] # Anchors size decrease during top-down upsampling path way anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # Input shape is 32 times more than the first detection layer shape input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # Shapes of each detection layer grid_shapes = [ K.cast(K.shape(yolo_outputs[layer])[1:3], K.dtype(y_true[0])) for layer in range(3) ] # Initialize loss xy_loss = 0 wh_loss = 0 confidence_loss = 0 class_loss = 0 loss = 0 batch_size = K.shape(yolo_outputs[0])[0] for layer in range(3): # True objectness score/confidence (either 1 or 0) of each anchor-offset at each grid cell object_mask = y_true[layer][..., 4:5] # True one-hot encoded class probabilities of each ground truth box true_class_probs = y_true[layer][..., 5:] grid, y_pred, pred_xy, pred_wh, anchors_tensor = yolo_head( feats=yolo_outputs[layer], anchors=anchors[anchor_mask[layer]], num_classes=num_classes, input_shape=input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) true_xy = y_true[layer][..., :2] * grid_shapes[layer][::-1] - grid true_wh = K.log(y_true[layer][..., 2:4] * input_shape[::-1] / anchors_tensor) # Avoid log(0) = -inf true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh)) box_loss_scale = 2 - y_true[layer][..., 2:3] * y_true[layer][..., 3:4] ignore_mask = tf.TensorArray(dtype=K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(tensor=y_true[layer][b, ..., 0:4], mask=object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(index=b, value=K.cast( best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(cond=lambda b, *args: b < batch_size, body=loop_body, loop_vars=[0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, axis=-1) xy_loss += K.sum(object_mask * box_loss_scale * K.binary_crossentropy(true_xy, y_pred[..., 0:2])) wh_loss += K.sum(object_mask * box_loss_scale * 0.5 * K.square(true_wh - y_pred[..., 2:4])) # log_weight = ignore_mask + (ignore_mask - 1) * object_mask #confidence_loss = (1 - object_mask) * y_pred[..., 4] + \ # log_weight * K.log(1 + K.exp(0 - K.abs(y_pred[..., 4]))) + \ # K.relu(0 - y_pred[..., 4]) confidence_loss += K.sum(object_mask * K.binary_crossentropy(object_mask, y_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred[..., 4:5], from_logits=True) * ignore_mask) # confidence_loss = tf.nn.weighted_cross_entropy_with_logits(object_mask, y_pred[..., 4:5], ignore_mask) class_loss += K.sum(object_mask * K.binary_crossentropy( true_class_probs, y_pred[..., 5:], from_logits=True)) loss += xy_loss + wh_loss + confidence_loss + class_loss return loss
def clip_norm(g, c, n): ''' Clip gradients ''' if c > 0: g = K.switch(K.ge(n, c), g * c / n, g) return g
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=False): '''Return yolo4_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = sigmoid_focal_loss(object_mask, raw_pred[...,4:5]) else: confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,5:]) else: class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask * K.expand_dims(K.categorical_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) if use_giou_loss: # Calculate GIoU loss as location loss raw_true_box = y_true[l][...,0:4] giou = box_giou(pred_box, raw_true_box) giou_loss = object_mask * box_loss_scale * (1 - giou) giou_loss = K.sum(giou_loss) / mf location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss raw_true_box = y_true[l][...,0:4] diou = box_diou(pred_box, raw_true_box) diou_loss = object_mask * box_loss_scale * (1 - diou) diou_loss = K.sum(diou_loss) / mf location_loss = diou_loss else: # Standard YOLO location loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf location_loss = xy_loss + wh_loss confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss # Fit for tf 2.0.0 loss shape loss = K.expand_dims(loss, axis=-1) return loss #, total_location_loss, total_confidence_loss, total_class_loss
def call(self, x, mask=None): condition, then_expr, else_expr = x pattern = (0, 1) + ('x',) * (K.ndim(then_expr) - 2) return K.switch(condition.dimshuffle(*pattern), then_expr, else_expr)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): # 一共有三层 num_layers = len(anchors) // 3 # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 y_true = args[num_layers:] yolo_outputs = args[:num_layers] # 先验框 # 678为116,90, 156,198, 373,326 # 345为30,61, 62,45, 59,119 # 012为10,13, 16,30, 33,23, anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为416,416 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # 得到网格的shape为13,13;26,26;52,52 grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) object_mask = y_true[l][..., 4:5] # 取出其对应的种类(m,13,13,3,80) true_class_probs = y_true[l][..., 5:] # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 这个是解码后的预测的box的位置 # (m,13,13,3,4) pred_box = K.concatenate([pred_xy, pred_wh]) # 找到负样本群组,第一步是创建一个数组,[] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 对每一张图片计算ignore_mask def loop_body(b, ignore_mask): # 取出第b副图内,真实存在的所有的box的参数 # n,4 true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 计算预测结果与真实情况的iou # pred_box为13,13,3,4 # 计算的结果是每个pred_box和其它所有真实框的iou # 13,13,3,n iou = box_iou(pred_box[b], true_box) # 13,13,3,1 best_iou = K.max(iou, axis=-1) # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框 # 则被认为是这幅图的负样本 ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # 遍历所有的图片 _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # 将每幅图的内容压缩,进行处理 ignore_mask = ignore_mask.stack() #(m,13,13,3,1,1) ignore_mask = K.expand_dims(ignore_mask, -1) # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本 # best_iou<ignore_thresh用于限制负样本数量 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def call(self, x, mask=None): if mask is None: return K.max(x, axis=1) else: return K.max(K.switch(mask[:, :, np.newaxis], x, -np.inf), axis=1)
def smooth_L1(y_true, y_pred, clip_delta=0.5): x = K.abs(y_true - y_pred)[:, :1] x = K.switch(x < clip_delta, 0.5 * x ** 2, clip_delta * (x - 0.5 * clip_delta)) return K.mean(x)
def call(self, inputs, states, training=None): samples, inFeatures = states[0].shape h_tm1 = states[0] # previous state time_step = states[1] if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones( inputs, K.shape(inputs)[-1]), self.dropout, training=training) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( _generate_dropout_ones(inputs, self.units), self.recurrent_dropout, training=training) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask if dp_mask is not None: inputs *= dp_mask if rec_dp_mask is not None: h_tm1 *= rec_dp_mask if self.split_method: # Update State, module-by-module h_mod = [] unitsPerMod = self.units // self.clock_numPeriods def if_true(): hModule = K.dot(h_tm1[:, s:], self.rec_kernel_c_mod[i]) + K.dot( inputs, self.kernel_c_mod[i]) if self.use_bias: hModule = K.bias_add(hModule, self.bias_mod[i]) if self.recurrent_activation is not None: hModule = self.recurrent_activation(hModule) return hModule def if_false(): return hModule for i, period in enumerate(self.clock_periods): s = i * unitsPerMod e = (i + 1) * unitsPerMod hModule = h_tm1[:, s:e] h_mod.append( tf.cond(K.equal(K.tf.mod(time_step[0][0], period), 0), if_true, if_false)) hidden = K.concatenate(h_mod) else: # Update State, all at once, then only use certain updates h = K.dot(inputs, self.kernel) + K.dot( h_tm1, self.recurrent_kernel_c * self.cw_mask) if self.bias is not None: h = K.bias_add(h, self.bias) if self.recurrent_activation is not None: h = self.recurrent_activation(h) h = K.switch(K.equal(K.tf.mod(time_step, self.cw_periods), 0), h, h_tm1) hidden = h # Calculate Output output = K.dot(hidden, self.recurrent_kernel_o) if self.activation is not None: output = self.activation(output) # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: if training is None: output._uses_learning_phase = True return output, [hidden, time_step + 1]
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): """Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) """ yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta * box_delta_scale) confidence_loss = object_mask * K.square(1 - pred_confidence) + \ (1 - object_mask) * K.square(0 - pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs - pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def fcn_norm_loss_graph(target_masks, pred_heatmap): ''' Mask binary cross-entropy loss for the masks head. target_masks: [batch, height, width, num_classes]. pred_heatmap: [batch, height, width, num_classes] float32 tensor ''' # Reshape for simplicity. Merge first two dimensions into one. print('\n>>> fcn_norm_loss_graph ') print(' target_masks shape :', target_masks.shape) print(' pred_heatmap shape :', pred_heatmap.shape) print( '\n L2 normalization ------------------------------------------------------' ) pred_shape = KB.shape(pred_heatmap) print(' pred_shape: KB.shape:', pred_shape, ' tf.get_shape(): ', pred_heatmap.get_shape(), ' pred_maks.shape:', pred_heatmap.shape, 'tf.shape :', tf.shape(pred_heatmap)) output_flatten = KB.reshape(pred_heatmap, (pred_shape[0], -1, pred_shape[-1])) output_norm1 = KB.l2_normalize(output_flatten, axis=1) output_norm = KB.reshape(output_norm1, pred_shape) print(' output_flatten : ', KB.int_shape(output_flatten), output_flatten.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_flatten)) print(' output_norm1 : ', KB.int_shape(output_norm1), output_norm1.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_norm1)) print(' output_norm final : ', KB.int_shape(output_norm), output_norm.get_shape(), ' Keras tensor ', KB.is_keras_tensor(output_norm)) print( '\n L2 normalization ------------------------------------------------------' ) target_shape = KB.shape(target_masks) print(' target shape is :', target_shape, ' ', target_masks.get_shape(), target_masks.shape, tf.shape(target_masks)) gauss_flatten = KB.reshape(target_masks, (target_shape[0], -1, target_shape[-1])) gauss_norm1 = KB.l2_normalize(gauss_flatten, axis=1) gauss_norm = KB.reshape(gauss_norm1, target_shape) print(' guass_flatten : ', gauss_flatten.shape, gauss_flatten.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_flatten)) print(' gauss_norm shape : ', gauss_norm1.shape, gauss_norm1.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_norm1)) print(' gauss_norm final shape: ', gauss_norm.shape, gauss_norm.get_shape(), 'Keras tensor ', KB.is_keras_tensor(gauss_norm)) pred_heatmap1 = output_norm target_masks1 = gauss_norm # pred_shape = KB.shape(target_masks1) # print(' pred_shape shape :', pred_shape.eval(), KB.int_shape(pred_shape)) target_masks1 = KB.reshape(target_masks1, (-1, pred_shape[1], pred_shape[2])) print(' target_masks1 shape :', target_masks1.get_shape(), KB.int_shape(target_masks1)) pred_heatmap1 = KB.reshape(pred_heatmap1, (-1, pred_shape[1], pred_shape[2])) print(' pred_heatmap1 shape :', pred_heatmap1.get_shape()) # Compute binary cross entropy. If no positive ROIs, then return 0. # shape: [batch, roi, num_classes] # Smooth-L1 Loss loss = KB.switch( tf.size(target_masks1) > 0, smooth_l1_loss(y_true=target_masks1, y_pred=pred_heatmap1), tf.constant(0.0)) loss = KB.mean(loss) loss = KB.reshape(loss, [1, 1]) print(' loss type is :', type(loss)) return loss