def call(self, inputs, **kwargs): if self.mode == self.MODE_EXPAND: if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') return K.gather( self.embeddings, K.minimum(K.maximum(inputs, -self.input_dim), self.input_dim) + self.input_dim, ) input_shape = K.shape(inputs) if self.mode == self.MODE_ADD: batch_size, seq_len, output_dim = input_shape[0], input_shape[ 1], input_shape[2] else: batch_size, seq_len, output_dim = input_shape[0], input_shape[ 1], self.output_dim pos_embeddings = K.tile( K.expand_dims(self.embeddings[:seq_len, :self.output_dim], axis=0), [batch_size, 1, 1], ) if self.mode == self.MODE_ADD: return inputs + pos_embeddings return K.concatenate([inputs, pos_embeddings], axis=-1)
def _preprocess_conv2d_input(input_tensor, data_format): """Transpose and cast the input before the conv2d. Parameters ---------- input_tensor: tensor The input that requires transposing and casting data_format: str `"channels_last"` or `"channels_first"` Returns ------- tensor The transposed and cast input tensor """ if K.dtype(input_tensor) == "float64": input_tensor = tf.cast(input_tensor, "float32") if data_format == "channels_first": # Tensorflow uses the last dimension as channel dimension, instead of the 2nd one. # Theano input shape: (samples, input_depth, rows, cols) # Tensorflow input shape: (samples, rows, cols, input_depth) input_tensor = tf.transpose(input_tensor, (0, 2, 3, 1)) return input_tensor
def call(self, inputs, mask=None): inputs, pos_input = inputs batch_size, seq_len, output_dim = self._get_shape(inputs) if self.mode == self.MODE_EXPAND: pos_input = inputs if K.dtype(pos_input) != K.floatx(): pos_input = K.cast(pos_input, K.floatx()) evens = K.arange(0, output_dim // 2) * 2 odds = K.arange(0, output_dim // 2) * 2 + 1 even_embd = K.sin( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) odd_embd = K.cos( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) embd = K.stack([even_embd, odd_embd], axis=-1) output = K.reshape(embd, [-1, seq_len, output_dim]) if self.mode == self.MODE_CONCAT: output = K.concatenate([inputs, output], axis=-1) if self.mode == self.MODE_ADD: output += inputs return output
def new_get_updates(self, loss, params): self.slow_params = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] update_iter = [K.update_add(self.lookahead_iterations, 1)] def just_copy_func(): copy_slow_params = [ K.update(p, q) for p, q in zip(self.slow_params, params) ] return tf.group(*copy_slow_params) def update_func(): update_params = [ K.update(q, p * self.slow_ratio + q * self.fast_ratio) for p, q in zip(self.slow_params, params) ] with tf.control_dependencies(update_params): reset_slow_params = [ K.update(p, q) for p, q in zip(self.slow_params, params) ] return tf.group(*(reset_slow_params + update_iter)) def just_iter_func(): return tf.group(*update_iter) # copy params to self.slow_params at iteration 0 copy_switch = K.equal(self.lookahead_iterations, 0) copy_params = [K.switch(copy_switch, just_copy_func, tf.no_op())] with tf.control_dependencies(copy_params): # do the 'slow weights update' every 'k' iterations update_switch = K.equal(self.lookahead_iterations % self.k, 0) with tf.control_dependencies(self.orig_get_updates(loss, params)): self.updates = [ K.switch(update_switch, update_func, just_iter_func) ] return self.updates
def _compute_target_mask(self, inputs, mask=None): input_shape = K.shape(inputs) input_type = K.dtype(inputs) mask_threshold = K.constant(1e8, dtype=input_type) channel_num = int(inputs.shape[-1]) channel_dim = K.prod(input_shape[:-1]) masked_inputs = inputs if mask is not None: masked_inputs = K.switch( K.cast(mask, K.floatx()) > 0.5, masked_inputs, K.ones_like(masked_inputs, dtype=input_type) * mask_threshold ) norm = K.abs(masked_inputs) channeled_norm = K.transpose(K.reshape(norm, (channel_dim, channel_num))) weight_num = K.sum( K.reshape(K.cast(masked_inputs < mask_threshold, K.floatx()), (channel_dim, channel_num)), axis=0, ) indices = K.stack( [ K.arange(channel_num, dtype='int32'), K.cast(self.target_rate * weight_num, dtype='int32') - 1, ], axis=-1, ) threshold = -tf.gather_nd(tf.nn.top_k(-channeled_norm, k=K.max(indices[:, 1]) + 1).values, indices) threshold = K.reshape(tf.tile(threshold, [channel_dim]), input_shape) target_mask = K.switch( norm <= threshold, K.ones_like(inputs, dtype=K.floatx()), K.zeros_like(inputs, dtype=K.floatx()), ) return target_mask
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self._iterations, 1)] lr = self._lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self._iterations, K.dtype(self.decay)))) t = K.cast(self._iterations, K.floatx()) + 1 if self.initial_total_steps > 0: warmup_steps = self.total_steps * self.warmup_proportion decay_steps = self.total_steps - warmup_steps lr = K.switch( t <= warmup_steps, lr * (t / warmup_steps), lr * (1.0 - K.minimum(t, decay_steps) / decay_steps), ) ms = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i)) for (i, p) in enumerate(params) ] vs = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i)) for (i, p) in enumerate(params) ] if self.amsgrad: vhats = [ K.zeros(K.int_shape(p), dtype=K.dtype(p), name='vhat_' + str(i)) for (i, p) in enumerate(params) ] else: vhats = [ K.zeros(1, name='vhat_' + str(i)) for i in range(len(params)) ] self._weights = [self._iterations] + ms + vs + vhats beta_1_t = K.pow(self.beta_1, t) beta_2_t = K.pow(self.beta_2, t) sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0 sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t) for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) m_corr_t = m_t / (1.0 - beta_1_t) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t) + self.epsilon) r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) / (sma_inf - 2.0) * sma_inf / sma_t) p_t = K.switch(sma_t > 5, r_t * m_corr_t / v_corr_t, m_corr_t) if self.initial_weight_decay > 0: p_t += self.weight_decay * p p_t = p - lr * p_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def call(self, inputs): if K.dtype(inputs) != 'float32': inputs = K.cast(inputs, 'float32') inner_out = K.relu(K.dot(inputs, self.weights_inner) + self.bais_inner) outputs = K.dot(inner_out, self.weights_out) + self.bais_out return outputs
def yolo_loss(args, input_shape, anchors, anchors_mask, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False): num_layers = len(anchors_mask) #---------------------------------------------------------------------------------------------------# # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为: # (m,13,13,3,85) # (m,26,26,3,85) # (m,52,52,3,85) # yolo_outputs是一个列表,包含三个特征层,shape分别为: # (m,13,13,3,85) # (m,26,26,3,85) # (m,52,52,3,85) #---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] #-----------------------------------------------------------# # 得到input_shpae为416,416 #-----------------------------------------------------------# input_shape = K.cast(input_shape, K.dtype(y_true[0])) #-----------------------------------------------------------# # 得到网格的shape为[13,13]; [26,26]; [52,52] #-----------------------------------------------------------# grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] #-----------------------------------------------------------# # 取出每一张图片 # m的值就是batch_size #-----------------------------------------------------------# m = K.shape(yolo_outputs[0])[0] loss = 0 num_pos = 0 #---------------------------------------------------------------------------------------------------# # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 #---------------------------------------------------------------------------------------------------# for l in range(num_layers): #-----------------------------------------------------------# # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) #-----------------------------------------------------------# object_mask = y_true[l][..., 4:5] #-----------------------------------------------------------# # 取出其对应的种类(m,13,13,3,80) #-----------------------------------------------------------# true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) #-----------------------------------------------------------# # 将yolo_outputs的特征层输出进行处理、获得四个返回值 # 其中: # grid (13,13,1,2) 网格坐标 # raw_pred (m,13,13,3,85) 尚未处理的预测结果 # pred_xy (m,13,13,3,2) 解码后的中心坐标 # pred_wh (m,13,13,3,2) 解码后的宽高坐标 #-----------------------------------------------------------# grid, raw_pred, pred_xy, pred_wh = get_anchors_and_decode(yolo_outputs[l], anchors[anchors_mask[l]], num_classes, input_shape, calc_loss=True) #-----------------------------------------------------------# # pred_box是解码后的预测的box的位置 # (m,13,13,3,4) #-----------------------------------------------------------# pred_box = K.concatenate([pred_xy, pred_wh]) #-----------------------------------------------------------# # 找到负样本群组,第一步是创建一个数组,[] #-----------------------------------------------------------# ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') #-----------------------------------------------------------# # 对每一张图片计算ignore_mask #-----------------------------------------------------------# def loop_body(b, ignore_mask): #-----------------------------------------------------------# # 取出n个真实框:n,4 #-----------------------------------------------------------# true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) #-----------------------------------------------------------# # 计算预测框与真实框的iou # pred_box 13,13,3,4 预测框的坐标 # true_box n,4 真实框的坐标 # iou 13,13,3,n 预测框和真实框的iou #-----------------------------------------------------------# iou = box_iou(pred_box[b], true_box) #-----------------------------------------------------------# # best_iou 13,13,3 每个特征点与真实框的最大重合程度 #-----------------------------------------------------------# best_iou = K.max(iou, axis=-1) #-----------------------------------------------------------# # 判断预测框和真实框的最大iou小于ignore_thresh # 则认为该预测框没有与之对应的真实框 # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #-----------------------------------------------------------# ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask #-----------------------------------------------------------# # 在这个地方进行一个循环、循环是对每一张图片进行的 #-----------------------------------------------------------# _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) #-----------------------------------------------------------# # ignore_mask用于提取出作为负样本的特征点 # (m,13,13,3) #-----------------------------------------------------------# ignore_mask = ignore_mask.stack() # (m,13,13,3,1) ignore_mask = K.expand_dims(ignore_mask, -1) #-----------------------------------------------------------# # 真实框越大,比重越小,小框的比重更大。 #-----------------------------------------------------------# box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] #-----------------------------------------------------------# # 计算Ciou loss #-----------------------------------------------------------# raw_true_box = y_true[l][...,0:4] ciou = box_ciou(pred_box, raw_true_box) ciou_loss = object_mask * box_loss_scale * (1 - ciou) #------------------------------------------------------------------------------# # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,那么计算0与置信度的交叉熵 # 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 #------------------------------------------------------------------------------# confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) #-----------------------------------------------------------# # 将所有损失求和 #-----------------------------------------------------------# location_loss = K.sum(ciou_loss) confidence_loss = K.sum(confidence_loss) class_loss = K.sum(class_loss) #-----------------------------------------------------------# # 计算正样本数量 #-----------------------------------------------------------# num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1) loss += location_loss + confidence_loss + class_loss loss = loss / num_pos return loss
def batched_yolo3_postprocess(args, anchors, num_classes, max_boxes=100, confidence=0.1, iou_threshold=0.4): """Postprocess for YOLOv3 model on given input and return filtered boxes.""" num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] image_shape = args[num_layers] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 batch_size = K.shape(image_shape)[0] # batch size, tensor # print("yolo_outputs",yolo_outputs) boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = batched_yolo3_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=1) box_scores = K.concatenate(box_scores, axis=1) mask = box_scores >= confidence max_boxes_tensor = K.constant(max_boxes, dtype='int32') def single_image_nms(b, batch_boxes, batch_scores, batch_classes): boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes[b], mask[b, :, c]) class_box_scores = tf.boolean_mask(box_scores[b, :, c], mask[b, :, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) batch_boxes = batch_boxes.write(b, boxes_) batch_scores = batch_scores.write(b, scores_) batch_classes = batch_classes.write(b, classes_) return b+1, batch_boxes, batch_scores, batch_classes batch_boxes = tf.TensorArray(K.dtype(boxes), size=1, dynamic_size=True) batch_scores = tf.TensorArray(K.dtype(box_scores), size=1, dynamic_size=True) batch_classes = tf.TensorArray(dtype=tf.int32, size=1, dynamic_size=True) _, batch_boxes, batch_scores, batch_classes = tf.while_loop(lambda b,*args: b<batch_size, single_image_nms, [0, batch_boxes, batch_scores, batch_classes]) batch_boxes = batch_boxes.stack() batch_scores = batch_scores.stack() batch_classes = batch_classes.stack() return batch_boxes, batch_scores, batch_classes
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] if self.decay_epochs: ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs)) hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs)) #print(hit_decay_epoch) lr = K.switch(hit_decay_epoch, self.lr['all']*self.decay['all'], self.lr['all']) #K.print_tensor(self.lr['all']) #a = K.switch(hit_decay_epoch, # K.print_tensor(self.lr['all'],message='Decays:'), # K.print_tensor(self.lr['all'],message=' ')) self.updates.append(K.update(self.lr['all'],lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey= set_pattern_find(p.name,self.lr.keys()) if lrptrkey: if self.verbose>0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) lr = self.lr[lrptrkey] dcptrkey=set_pattern_find(p.name,self.decay.keys()) if self.decay_epochs and dcptrkey: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay[dcptrkey], self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey],lr)) if self.verbose>0: print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay[dcptrkey]) elif self.decay_epochs: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey]*self.decay['all'],self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey],lr)) if self.verbose>0: print("Added decay to ", p.name, ": ", K.eval(lr),",",self.decay['all']) else: lr = self.lr[lrptrkey] else: lr = self.lr['all'] momptrkey = set_pattern_find(p.name,self.momentum.keys()) if momptrkey: if self.verbose>0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] v = momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + momentum * (momentum * m - lr * g) - lr * g else: new_p = p + momentum * m - lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p*K.constant(self.UPCLIP))) min_value = _to_tensor(p-margin, p.dtype.base_dtype) max_value = _to_tensor(p+margin, p.dtype.base_dtype) max_v = K.maximum(min_value, max_value) min_v = K.minimum(min_value, max_value) new_p = _clip_by_val(new_p, min_v, max_v) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name,self.clips.keys()) if self.clips_val and clptrkey: if self.verbose>0: print("Clipping variable",p.name," to ", self.clips[clptrkey]) c = K.eval(self.clips[clptrkey]) new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def __call__(self, labels, outputs, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, elim_grid_sense=True, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=True): # pylint: disable=R0915 """ YOLOv3 loss function. :param yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body :param y_true: list of array, the output of preprocess_true_boxes :param anchors: array, shape=(N, 2), wh :param num_classes: integer :param ignore_thresh: float, the iou threshold whether to ignore object confidence loss :return loss: tensor, shape=(1,) """ anchors = np.array(anchors).astype(float).reshape(-1, 2) num_layers = len(anchors) // 3 # default setting yolo_outputs = list(outputs.values()) # args[:num_layers] y_true = list(labels.values()) # args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # batch size, tensor batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for i in range(num_layers): object_mask = y_true[i][..., 4:5] true_class_probs = y_true[i][..., 5:] if label_smoothing: true_class_probs = self._smooth_labels(true_class_probs, label_smoothing) true_objectness_probs = self._smooth_labels( object_mask, label_smoothing) else: true_objectness_probs = object_mask raw_pred, pred_xy, pred_wh = self.yolo3_decode( yolo_outputs[i], anchors[anchor_mask[i]], num_classes, input_shape, scale_x_y=scale_x_y[i]) pred_box = K.concatenate([pred_xy, pred_wh]) box_loss_scale = 2 - y_true[i][..., 2:3] * y_true[i][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[i][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = self.box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < batch_size, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) raw_pred = raw_pred + K.epsilon() if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = self.sigmoid_focal_loss( true_objectness_probs, raw_pred[..., 4:5]) else: confidence_loss = (object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)) \ + ((1-object_mask) * ignore_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)) if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = self.softmax_focal_loss( true_class_probs, raw_pred[..., 5:]) else: class_loss = self.sigmoid_focal_loss( true_class_probs, raw_pred[..., 5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask \ * K.expand_dims(K.categorical_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask \ * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) raw_true_box = y_true[i][..., 0:4] diou = self.box_diou(raw_true_box, pred_box) diou_loss = object_mask * box_loss_scale * (1 - diou) diou_loss = K.sum(diou_loss) / batch_size_f location_loss = diou_loss confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss loss = K.expand_dims(loss, axis=-1) return loss, total_location_loss, total_confidence_loss, total_class_loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False): # 一共有2层 num_layers = len(anchors) // 3 # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。 # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。 y_true = args[num_layers:] yolo_outputs = args[:num_layers] # 先验框 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为608,608 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)。 # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,255),(m,26,26,255)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) object_mask = y_true[l][..., 4:5] # 取出其对应的种类(m,13,13,3,80) true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 这个是解码后的预测的box的位置 # (m,13,13,3,4) pred_box = K.concatenate([pred_xy, pred_wh]) # 找到负样本群组,第一步是创建一个数组,[] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 对每一张图片计算ignore_mask def loop_body(b, ignore_mask): # 取出第b副图内,真实存在的所有的box的参数 # n,4 true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 计算预测结果与真实情况的iou # pred_box为13,13,3,4 # 计算的结果是每个pred_box和其它所有真实框的iou # 13,13,3,n iou = box_iou(pred_box[b], true_box) # 13,13,3 best_iou = K.max(iou, axis=-1) # 如果某些预测框和真实框的重合程度大于0.5,则忽略。 ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # 遍历所有的图片 _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # 将每幅图的内容压缩,进行处理 ignore_mask = ignore_mask.stack() #(m,13,13,3,1) ignore_mask = K.expand_dims(ignore_mask, -1) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Calculate ciou loss as location loss raw_true_box = y_true[l][..., 0:4] ciou = box_ciou(pred_box, raw_true_box) ciou_loss = object_mask * box_loss_scale * (1 - ciou) ciou_loss = K.sum(ciou_loss) / mf location_loss = ciou_loss # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本 # best_iou<ignore_thresh用于限制负样本数量 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += location_loss + confidence_loss + class_loss # if print_loss: # loss = tf.Print(loss, [loss, confidence_loss, class_loss, location_loss], message='loss: ') loss = K.expand_dims(loss, axis=-1) return loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=0.5, print_loss=False): """Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body_full or yolo_body_tiny y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) """ num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] \ if num_layers == 3 else [[3, 4, 5], [0, 1, 2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[layer_idx])[1:3], K.dtype(y_true[0])) for layer_idx in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for layer_idx in range(num_layers): object_mask = y_true[layer_idx][..., 4:5] true_class_probs = y_true[layer_idx][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[layer_idx], anchors[anchor_mask[layer_idx]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[layer_idx][..., :2] * grid_shapes[layer_idx][::-1] - grid raw_true_wh = K.log(y_true[layer_idx][..., 2:4] / anchors[anchor_mask[layer_idx]] * input_shape[::-1]) # Keras switch allows scalr condition, bit here is expected to have elemnt-wise # also the `object_mask` has in last dimension 1 but the in/out puts has 2 (some replication) # raw_true_wh = tf.where(tf.greater(K.concatenate([object_mask] * 2), 0), # raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[layer_idx][..., 2:3] * y_true[layer_idx][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def _loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[layer_idx][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou_xywh(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = while_loop( lambda b, *args: b < m, _loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. ce = K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2], from_logits=True) xy_loss = object_mask * box_loss_scale * ce wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4]) ce_loss = K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) confidence_loss = object_mask * ce_loss + (1 - object_mask) * ce_loss * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') # see: https://github.com/qqwweee/keras-yolo3/issues/129#issuecomment-408855511 return K.expand_dims(loss, axis=0)
def yolo5_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, elim_grid_sense=True, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=True, use_diou_loss=False): ''' YOLOv5 loss function. Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] # gains for box, class and confidence loss # from https://github.com/ultralytics/yolov5/blob/master/data/hyp.scratch.yaml box_loss_gain = 0.05 class_loss_gain = 0.5 confidence_loss_gain = 1.0 # balance weights for confidence (objectness) loss # on different predict heads (x/8, x/16, x/32) # from https://github.com/ultralytics/yolov5/blob/master/utils/loss.py#L109 confidence_balance_weights = [4.0, 1.0, 0.4] if num_layers == 3: anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # YOLOv5 enable "elim_grid_sense" by default scale_x_y = [2.0, 2.0, 2.0] #if elim_grid_sense else [None, None, None] else: anchor_mask = [[3, 4, 5], [0, 1, 2]] scale_x_y = [1.05, 1.05] #if elim_grid_sense else [None, None] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[i])[1:3], K.dtype(y_true[0])) for i in range(num_layers) ] loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # batch size, tensor batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for i in range(num_layers): object_mask = y_true[i][..., 4:5] true_class_probs = y_true[i][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) #true_objectness_probs = _smooth_labels(object_mask, label_smoothing) #else: #true_objectness_probs = object_mask grid, raw_pred, pred_xy, pred_wh = yolo5_decode( yolo_outputs[i], anchors[anchor_mask[i]], num_classes, input_shape, scale_x_y=scale_x_y[i], calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[i][..., :2] * grid_shapes[i][::-1] - grid raw_true_wh = K.log(y_true[i][..., 2:4] / anchors[anchor_mask[i]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf #box_loss_scale = 2 - y_true[i][...,2:3]*y_true[i][...,3:4] # Find ignore mask, iterate over each of batch. #ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) #object_mask_bool = K.cast(object_mask, 'bool') #def loop_body(b, ignore_mask): #true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0]) #iou = box_iou(pred_box[b], true_box) #best_iou = K.max(iou, axis=-1) #ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) #return b+1, ignore_mask #_, ignore_mask = tf.while_loop(lambda b,*args: b<batch_size, loop_body, [0, ignore_mask]) #ignore_mask = ignore_mask.stack() #ignore_mask = K.expand_dims(ignore_mask, -1) if use_giou_loss: # Calculate GIoU loss as location loss raw_true_box = y_true[i][..., 0:4] giou = box_giou(raw_true_box, pred_box) giou_loss = object_mask * (1 - giou) location_loss = giou_loss iou = giou elif use_diou_loss: # Calculate DIoU loss as location loss raw_true_box = y_true[i][..., 0:4] diou = box_diou(raw_true_box, pred_box) diou_loss = object_mask * (1 - diou) location_loss = diou_loss iou = diou else: raise ValueError('Unsupported IOU loss type') # Standard YOLOv3 location loss # K.binary_crossentropy is helpful to avoid exp overflow. #xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) #wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) #xy_loss = K.sum(xy_loss) / batch_size_f #wh_loss = K.sum(wh_loss) / batch_size_f #location_loss = xy_loss + wh_loss # use box iou for positive sample as objectness ground truth, # to calculate confidence loss # from https://github.com/ultralytics/yolov5/blob/master/utils/loss.py#L127 true_objectness_probs = object_mask * iou if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = sigmoid_focal_loss( true_objectness_probs, raw_pred[..., 4:5]) * confidence_balance_weights[i] else: confidence_loss = K.binary_crossentropy( true_objectness_probs, raw_pred[..., 4:5], from_logits=True) * confidence_balance_weights[i] #confidence_loss = object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)+ \ #(1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = softmax_focal_loss(true_class_probs, raw_pred[..., 5:]) else: class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[..., 5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask * K.expand_dims( K.categorical_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) confidence_loss = confidence_loss_gain * K.sum( confidence_loss) / batch_size_f class_loss = class_loss_gain * K.sum(class_loss) / batch_size_f location_loss = box_loss_gain * K.sum(location_loss) / batch_size_f loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss # Fit for tf 2.0.0 loss shape loss = K.expand_dims(loss, axis=-1) return loss, total_location_loss, total_confidence_loss, total_class_loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): # 一共有三层 num_layers = len(anchors) // 3 # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 y_true = args[num_layers:] yolo_outputs = args[:num_layers] # 先验框 # 678为116,90, 156,198, 373,326 # 345为30,61, 62,45, 59,119 # 012为10,13, 16,30, 33,23, anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为416,416 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # 得到网格的shape为13,13;26,26;52,52 grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) object_mask = y_true[l][..., 4:5] # 取出其对应的种类(m,13,13,3,80) true_class_probs = y_true[l][..., 5:] # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 这个是解码后的预测的box的位置 # (m,13,13,3,4) pred_box = K.concatenate([pred_xy, pred_wh]) # 找到负样本群组,第一步是创建一个数组,[] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 对每一张图片计算ignore_mask def loop_body(b, ignore_mask): # 取出第b副图内,真实存在的所有的box的参数 # n,4 true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 计算预测结果与真实情况的iou # pred_box为13,13,3,4 # 计算的结果是每个pred_box和其它所有真实框的iou # 13,13,3,n iou = box_iou(pred_box[b], true_box) # 13,13,3,1 best_iou = K.max(iou, axis=-1) # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框 # 则被认为是这幅图的负样本 ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # 遍历所有的图片 _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # 将每幅图的内容压缩,进行处理 ignore_mask = ignore_mask.stack() #(m,13,13,3,1,1) ignore_mask = K.expand_dims(ignore_mask, -1) # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本 # best_iou<ignore_thresh用于限制负样本数量 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') loss = K.expand_dims(loss, axis=-1) return loss
def call(self, inputs, training=None): if self.quant_mode not in [None, 'extrinsic', 'hybrid', 'intrinsic']: raise ValueError( 'Invalid quantization mode. The \'quant_mode\' argument must be one of \'extrinsic\' , \'intrinsic\' , \'hybrid\' or None.' ) if isinstance(self.quantizer, list) and len(self.quantizer) == 3: quantizer_input = self.quantizer[0] quantizer_weight = self.quantizer[1] quantizer_output = self.quantizer[2] else: quantizer_input = self.quantizer quantizer_weight = self.quantizer quantizer_output = self.quantizer input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None if self.quant_mode in ['hybrid', 'intrinsic']: broadcast_moving_mean = quantizer_weight.quantize( broadcast_moving_mean) broadcast_moving_variance = quantizer_weight.quantize( broadcast_moving_variance) if self.center: broadcast_beta = quantizer_weight.quantize( broadcast_beta) if self.scale: broadcast_gamma = quantizer_weight.quantize( broadcast_gamma) if self.quant_mode in ['hybrid', 'intrinsic']: quantized_inputs = quantizer_input.quantize(inputs) if self.quant_mode == 'intrinsic': return QuantizedBatchNormalizationCore( quantized_inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, self.epsilon, quantizer_output) elif self.quant_mode == 'hybrid': output = K.batch_normalization(quantized_inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) return quantizer_output.quantize(output) elif self.quant_mode == 'extrinsic': output = K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) return quantizer_output.quantize(output) elif self.quant_mode is None: return K.batch_normalization(inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, axis=self.axis, epsilon=self.epsilon) else: if self.quant_mode in ['hybrid', 'intrinsic']: moving_mean = quantizer_weight.quantize(self.moving_mean) moving_variance = quantizer_weight.quantize( self.moving_variance) if self.center: beta = quantizer_weight.quantize(self.beta) else: beta = self.beta if self.scale: gamma = quantizer_weight.quantize(self.gamma) else: gamma = self.gamma if self.quant_mode in ['hybrid', 'intrinsic']: quantized_inputs = quantizer_input.quantize(inputs) if self.quant_mode == 'intrinsic': return QuantizedBatchNormalizationCore( quantized_inputs, moving_mean, moving_variance, beta, gamma, self.epsilon, quantizer_output) elif self.quant_mode == 'hybrid': output = K.batch_normalization(quantized_inputs, moving_mean, moving_variance, beta, gamma, axis=self.axis, epsilon=self.epsilon) return quantizer_output.quantize(output) elif self.quant_mode == 'extrinsic': output = K.batch_normalization(inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, axis=self.axis, epsilon=self.epsilon) return quantizer_output.quantize(output) elif self.quant_mode == None: return K.batch_normalization(inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, axis=self.axis, epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if not training: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod( [K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def yolo_loss(y_true, y_pred): label_class = y_true[..., :1] # ? * 7 * 7 * 1 # 分类 label_box = y_true[..., 1:5] # ? * 7 * 7 * 4 # BB1的坐标 response_mask = y_true[..., 5] # ? * 7 * 7 # BB1的置信度 response_mask = K.expand_dims(response_mask) # ? * 7 * 7 * 1 predict_class = y_pred[..., :1] # ? * 7 * 7 * 1 # 分类 predict_trust = y_pred[..., 1:3] # ? * 7 * 7 * 2 # BB1和BB2的置信度 predict_box = y_pred[..., 3:] # ? * 7 * 7 * 8 # BB1和BB2的坐标 _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4]) _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4]) label_xy, label_wh = yolo_head( _label_box, img_size=224) # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2 label_xy = K.expand_dims(label_xy, 3) # ? * 7 * 7 * 1 * 1 * 2 label_wh = K.expand_dims(label_wh, 3) # ? * 7 * 7 * 1 * 1 * 2 label_xy_min, label_xy_max = X_Y_W_H_To_Min_Max( label_xy, label_wh) # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2 predict_xy, predict_wh = yolo_head( _predict_box, img_size=224) # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2 predict_xy = K.expand_dims(predict_xy, 4) # ? * 7 * 7 * 2 * 1 * 2 predict_wh = K.expand_dims(predict_wh, 4) # ? * 7 * 7 * 2 * 1 * 2 predict_xy_min, predict_xy_max = X_Y_W_H_To_Min_Max( predict_xy, predict_wh) # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2 iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min, label_xy_max) # ? * 7 * 7 * 2 * 1 best_ious = K.max(iou_scores, axis=4) # ? * 7 * 7 * 2 best_box = K.max(best_ious, axis=3, keepdims=True) # ? * 7 * 7 * 1 box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious)) # ? * 7 * 7 * 2 no_object_loss = 0.5 * ( 1 - box_mask * response_mask) * K.square(0 - predict_trust) object_loss = box_mask * response_mask * K.square(1 - predict_trust) confidence_loss = no_object_loss + object_loss confidence_loss = K.sum(confidence_loss) class_loss = response_mask * K.square(label_class - predict_class) class_loss = K.sum(class_loss) _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4]) _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4]) label_xy, label_wh = yolo_head( _label_box, img_size=224) # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2 predict_xy, predict_wh = yolo_head( _predict_box, img_size=224) # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2 box_mask = K.expand_dims(box_mask) response_mask = K.expand_dims(response_mask) box_loss = 5 * box_mask * response_mask * K.square( (label_xy - predict_xy) / 224) box_loss += 5 * box_mask * response_mask * K.square( (K.sqrt(label_wh) - K.sqrt(predict_wh)) / 224) box_loss = K.sum(box_loss) loss = confidence_loss + class_loss + box_loss return loss
def yolo2_loss(args, anchors, num_classes, label_smoothing=0, use_crossentropy_loss=False, use_crossentropy_obj_loss=False, rescore_confidence=False, use_giou_loss=False, use_diou_loss=False): """YOLOv2 loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. y_true : array output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6] anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. Returns ------- total_loss : float total mean YOLOv2 loss across minibatch """ (yolo_output, y_true) = args num_anchors = len(anchors) yolo_output_shape = K.shape(yolo_output) input_shape = K.cast(yolo_output_shape[1:3] * 32, K.dtype(y_true)) grid_shape = K.cast(yolo_output_shape[1:3], K.dtype(y_true)) # height, width batch_size_f = K.cast(yolo_output_shape[0], K.dtype(yolo_output)) # batch size, float tensor object_scale = 5 no_object_scale = 1 class_scale = 1 location_scale = 1 grid, raw_pred, pred_xy, pred_wh = yolo2_head(yolo_output, anchors, num_classes, input_shape, calc_loss=True) pred_confidence = K.sigmoid(raw_pred[..., 4:5]) pred_class_prob = K.softmax(raw_pred[..., 5:]) object_mask = y_true[..., 4:5] # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_boxes = K.concatenate([pred_xy, pred_wh]) pred_boxes = K.expand_dims(pred_boxes, 4) raw_true_boxes = y_true[..., 0:4] raw_true_boxes = K.expand_dims(raw_true_boxes, 4) iou_scores = box_iou(pred_boxes, raw_true_boxes) iou_scores = K.squeeze(iou_scores, axis=0) # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # Determine confidence weights from object and no_object weights. # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - object_mask)) if use_crossentropy_obj_loss: no_objects_loss = no_object_weights * K.binary_crossentropy( K.zeros(K.shape(pred_confidence)), pred_confidence, from_logits=False) if rescore_confidence: objects_loss = (object_scale * object_mask * K.binary_crossentropy( best_ious, pred_confidence, from_logits=False)) else: objects_loss = ( object_scale * object_mask * K.binary_crossentropy(K.ones(K.shape(pred_confidence)), pred_confidence, from_logits=False)) else: no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * object_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * object_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLOv2 does not use categorical cross-entropy loss. # Here we try it. matching_classes = K.cast(y_true[..., 5], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) if label_smoothing: matching_classes = _smooth_labels(matching_classes, label_smoothing) if use_crossentropy_loss: classification_loss = ( class_scale * object_mask * K.expand_dims(K.categorical_crossentropy( matching_classes, pred_class_prob, from_logits=False), axis=-1)) else: classification_loss = (class_scale * object_mask * K.square(matching_classes - pred_class_prob)) if use_giou_loss: # Calculate GIoU loss as location loss giou = box_giou(pred_boxes, raw_true_boxes) giou = K.squeeze(giou, axis=-1) giou_loss = location_scale * object_mask * (1 - giou) location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss diou = box_diou(pred_boxes, raw_true_boxes) diou = K.squeeze(diou, axis=-1) diou_loss = location_scale * object_mask * (1 - diou) location_loss = diou_loss else: # YOLOv2 location loss for matching detection boxes. # Darknet trans box to calculate loss. trans_true_xy = y_true[..., :2] * grid_shape[..., ::-1] - grid trans_true_wh = K.log(y_true[..., 2:4] / anchors * input_shape[..., ::-1]) trans_true_wh = K.switch( object_mask, trans_true_wh, K.zeros_like(trans_true_wh)) # avoid log(0)=-inf trans_true_boxes = K.concatenate([trans_true_xy, trans_true_wh]) # Unadjusted box predictions for loss. trans_pred_boxes = K.concatenate( (K.sigmoid(raw_pred[..., 0:2]), raw_pred[..., 2:4]), axis=-1) location_loss = (location_scale * object_mask * K.square(trans_true_boxes - trans_pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) / batch_size_f classification_loss_sum = K.sum(classification_loss) / batch_size_f location_loss_sum = K.sum(location_loss) / batch_size_f total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + location_loss_sum) # Fit for tf 2.0.0 loss shape total_loss = K.expand_dims(total_loss, axis=-1) return total_loss, location_loss_sum, confidence_loss_sum, classification_loss_sum
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] # Cycling Gaussian LR # I implement this lr_f = lambda x,b,c,s: b+ s*np.exp(-(x-c)**2/(c*0.5)**2) def gauss_lr(min_lr, max_lr, center, lrsigma,i): return (min_lr+ max_lr*K.exp(-(i-center)**2/(center*lrsigma)**2)) ite_casted = K.cast(self.iterations, K.dtype(self.peaklriter)) all_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter,self.lrsigma,ite_casted) #current_lr = self.min_lr['all'] + #self.peak_lr['all']*K.exp(((ite_casted-self.peaklriter)**2)/(self.dropsigma*self.peaklriter)**2) ############################################################################ self.updates.append(K.update(self.lr['all'],all_lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey= set_pattern_find(p.name,self.lr.keys()) if lrptrkey: if self.verbose>0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) if set_pattern_find(p.name,self.min_lr.keys()) and set_pattern_find(p.name,self.peak_lr.keys()): p_lr = gauss_lr(self.min_lr[lrptrkey], self.peak_lr[lrptrkey], self.peaklriter,self.lrsigma,ite_casted) else: p_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter,self.lrsigma,ite_casted) else: p_lr = self.lr['all'] momptrkey = set_pattern_find(p.name,self.momentum.keys()) if momptrkey: if self.verbose>0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] if self.nesterov: updt = momentum * (momentum * m - p_lr * g) - p_lr * g else: updt = momentum * m - p_lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p))*K.constant(self.UPCLIP) #margin = K.mean(K.abs(p*K.constant(self.UPCLIP))) #min_value = _to_tensor(-margin, p.dtype.base_dtype) #max_value = _to_tensor(margin, p.dtype.base_dtype) #max_v = K.maximum(min_value, max_value) min_v = K.zeros_like(margin) updt_sign = K.sign(updt) updt_val = _clip_by_val(K.abs(updt), min_v, margin) v = updt_sign * updt_val # velocity new_p = p + v self.updates.append(K.update(m, v)) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name,self.clips.keys()) if self.clips_val and clptrkey: c = K.eval(self.clips[clptrkey]) if self.verbose>0: print("Clipping variable",p.name," to ", c) #input() new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def call(self, x): """Postprocess part for YOLOv3 model except NMS.""" assert isinstance(x, list) #num_layers = len(anchors)//3 # default setting yolo_outputs, image_shape = x #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] # default setting #input_shape = K.shape(yolo_outputs[0])[1:3] * 32 batch_size = K.shape(image_shape)[0] # batch size, tensor boxes = [] box_scores = [] for l in range(self.num_layers): # get anchor set for each feature layer if self.num_layers == 3: #YOLOv3 arch if l == 0: anchorset = self.anchors[6:] grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32] elif l == 1: anchorset = self.anchors[3:6] grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16] elif l == 2: anchorset = self.anchors[:3] grid_shape = [self.input_dim[0]//8, self.input_dim[1]//8] elif self.num_layers == 2: # Tiny YOLOv3 arch if l == 0: anchorset = self.anchors[3:] grid_shape = [self.input_dim[0]//32, self.input_dim[1]//32] elif l == 1: anchorset = self.anchors[:3] grid_shape = [self.input_dim[0]//16, self.input_dim[1]//16] else: raise ValueError('Invalid layer number') feats = yolo_outputs[l] # Convert final layer features to bounding box parameters num_anchors = len(anchorset) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchorset), [1, 1, 1, num_anchors, 2]) #grid_shape = K.shape(feats)[1:3] # height, width # get total anchor number for each feature layer total_anchor_num = grid_shape[0] * grid_shape[1] * num_anchors grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) reshape_feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, self.num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(reshape_feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(reshape_feats)) box_wh = K.exp(reshape_feats[..., 2:4]) * anchors_tensor / K.cast(self.input_dim[::-1], K.dtype(reshape_feats)) box_confidence = K.sigmoid(reshape_feats[..., 4:5]) box_class_probs = K.sigmoid(reshape_feats[..., 5:]) # correct boxes to the original image shape input_shape = K.cast(self.input_dim, K.dtype(box_xy)) image_shape = K.cast(image_shape, K.dtype(box_xy)) #new_shape = K.round(image_shape * K.min(input_shape/image_shape)) new_shape = K.cast(image_shape * K.min(input_shape/image_shape), dtype='int32') new_shape = K.cast(new_shape, dtype='float32') offset = (input_shape-new_shape)/2./input_shape scale = input_shape/new_shape box_xy = (box_xy - offset) * scale box_wh *= scale box_mins = box_xy - (box_wh / 2.) box_maxes = box_xy + (box_wh / 2.) _boxes = K.concatenate([ box_mins[..., 0:1], # x_min box_mins[..., 1:2], # y_min box_maxes[..., 0:1], # x_max box_maxes[..., 1:2] # y_max ]) # Scale boxes back to original image shape. _boxes *= K.concatenate([image_shape, image_shape]) # Reshape boxes to flatten the boxes _boxes = K.reshape(_boxes, [-1, total_anchor_num, 4]) _box_scores = box_confidence * box_class_probs _box_scores = K.reshape(_box_scores, [-1, total_anchor_num, self.num_classes]) boxes.append(_boxes) box_scores.append(_box_scores) # Merge boxes for all feature layers, for further NMS option boxes = K.concatenate(boxes, axis=1) box_scores = K.concatenate(box_scores, axis=1) return boxes, box_scores
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) # print(feats.dtype) conv_index = K.cast(conv_index, feats.dtype) # 原本是K.dtype(feats),但是不知道为什么报错 feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') embeddings = K.gather(self.embeddings, inputs) embeddings *= self._model_dim**0.5 # Scale return embeddings
def yolo3_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, elim_grid_sense=False, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=True): ''' YOLOv3 loss function. Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] if num_layers == 3: anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] scale_x_y = [1.05, 1.1, 1.2] if elim_grid_sense else [None, None, None] else: anchor_mask = [[3,4,5], [0,1,2]] scale_x_y = [1.05, 1.05] if elim_grid_sense else [None, None] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[i])[1:3], K.dtype(y_true[0])) for i in range(num_layers)] loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # batch size, tensor batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for i in range(num_layers): object_mask = y_true[i][..., 4:5] true_class_probs = y_true[i][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) true_objectness_probs = _smooth_labels(object_mask, label_smoothing) else: true_objectness_probs = object_mask grid, raw_pred, pred_xy, pred_wh = yolo3_decode(yolo_outputs[i], anchors[anchor_mask[i]], num_classes, input_shape, scale_x_y=scale_x_y[i], calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[i][..., :2]*grid_shapes[i][::-1] - grid raw_true_wh = K.log(y_true[i][..., 2:4] / anchors[anchor_mask[i]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[i][...,2:3]*y_true[i][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = tf.while_loop(lambda b,*args: b<batch_size, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = sigmoid_focal_loss(true_objectness_probs, raw_pred[...,4:5]) else: confidence_loss = object_mask * K.binary_crossentropy(true_objectness_probs, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,5:]) else: class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask * K.expand_dims(K.categorical_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) if use_giou_loss: # Calculate GIoU loss as location loss raw_true_box = y_true[i][...,0:4] giou = box_giou(raw_true_box, pred_box) giou_loss = object_mask * box_loss_scale * (1 - giou) giou_loss = K.sum(giou_loss) / batch_size_f location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss raw_true_box = y_true[i][...,0:4] diou = box_diou(raw_true_box, pred_box) diou_loss = object_mask * box_loss_scale * (1 - diou) diou_loss = K.sum(diou_loss) / batch_size_f location_loss = diou_loss else: # Standard YOLOv3 location loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) xy_loss = K.sum(xy_loss) / batch_size_f wh_loss = K.sum(wh_loss) / batch_size_f location_loss = xy_loss + wh_loss confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss # Fit for tf 2.0.0 loss shape loss = K.expand_dims(loss, axis=-1) return loss, total_location_loss, total_confidence_loss, total_class_loss
def Yolov1Loss(y_true, y_pred): label_class = y_true[..., :20] # ? * 7 * 7 * 20 label_box = y_true[..., 20:24] # ? * 7 * 7 * 4 responsible_mask = y_true[..., 24] # ? * 7 * 7 responsible_mask = K.expand_dims(responsible_mask) # ? * 7 * 7 * 1 predict_class = y_pred[..., :20] # ? * 7 * 7 * 20 predict_bbox_confidences = y_pred[..., 20:22] # ? * 7 * 7 * 2 predict_box = y_pred[..., 22:] # ? * 7 * 7 * 8 _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4]) # ? * 7 * 7 * 1 * 4 (4 -> 1 * 4) _predict_box = K.reshape( predict_box, [-1, 7, 7, 2, 4]) # ? * 7 * 7 * 2 * 4 (8 -> 2 * 4) label_xy, label_wh = yolo_head( _label_box) # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2 label_xy = K.expand_dims(label_xy, 3) # ? * 7 * 7 * 1 * 1 * 2 label_wh = K.expand_dims(label_wh, 3) # ? * 7 * 7 * 1 * 1 * 2 label_xy_min, label_xy_max = xywh2minmax( label_xy, label_wh) # ? * 7 * 7 * 1 * 1 * 2, ? * 7 * 7 * 1 * 1 * 2 predict_xy, predict_wh = yolo_head( _predict_box) # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2 predict_xy = K.expand_dims(predict_xy, 4) # ? * 7 * 7 * 2 * 1 * 2 predict_wh = K.expand_dims(predict_wh, 4) # ? * 7 * 7 * 2 * 1 * 2 predict_xy_min, predict_xy_max = xywh2minmax( predict_xy, predict_wh) # ? * 7 * 7 * 2 * 1 * 2, ? * 7 * 7 * 2 * 1 * 2 iou_scores = iou(predict_xy_min, predict_xy_max, label_xy_min, label_xy_max) # ? * 7 * 7 * 2 * 1 best_ious = K.max(iou_scores, axis=4) # ? * 7 * 7 * 2 best_box = K.max(best_ious, axis=3, keepdims=True) # ? * 7 * 7 * 1 box_mask = K.cast(best_ious >= best_box, K.dtype(best_ious)) # ? * 7 * 7 * 2 # Loss 함수 4번 (with lambda_noobj 0.5) no_object_loss = 0.5 * (1 - box_mask * responsible_mask ) * K.square(0 - predict_bbox_confidences) # Loss 함수 3번 (without lambda_noobj) object_loss = box_mask * responsible_mask * K.square( 1 - predict_bbox_confidences) # tf.print("\n- no_object_loss:", K.sum(no_object_loss), output_stream=sys.stdout) # tf.print("- object_loss:", K.sum(object_loss), output_stream=sys.stdout) confidence_loss = no_object_loss + object_loss confidence_loss = K.sum(confidence_loss) # Loss 함수 5번 class_loss = responsible_mask * K.square(label_class - predict_class) # Loss 함수 5번 총합 class_loss = K.sum(class_loss) _label_box = K.reshape(label_box, [-1, 7, 7, 1, 4]) _predict_box = K.reshape(predict_box, [-1, 7, 7, 2, 4]) label_xy, label_wh = yolo_head( _label_box) # ? * 7 * 7 * 1 * 2, ? * 7 * 7 * 1 * 2 predict_xy, predict_wh = yolo_head( _predict_box) # ? * 7 * 7 * 2 * 2, ? * 7 * 7 * 2 * 2 box_mask = K.expand_dims(box_mask) responsible_mask = K.expand_dims(responsible_mask) # Loss 함수 1번 box_loss = 5 * box_mask * responsible_mask * K.square( (label_xy - predict_xy) / 448) # tf.print("- xy_loss:", K.sum(5 * box_mask * responsible_mask * K.square((label_xy - predict_xy) / 448)), output_stream=sys.stdout) # Loss 함수 2번 #box_loss += 5 * box_mask * responsible_mask * K.square(K.sqrt(label_wh/ 448) - K.sqrt(predict_wh/ 448)) wh_loss = 5 * box_mask * responsible_mask * K.square((label_wh / 448) - (predict_wh / 448)) box_loss += wh_loss # box_loss = 5 * box_mask * responsible_mask * K.square((label_xy - predict_xy) / 448) # box_loss += 5 * box_mask * responsible_mask * K.square((label_wh / 448) - (predict_wh / 448)) # box_loss = K.sum(box_loss) # tf.print("- wh_loss:", K.sum(wh_loss), # output_stream=sys.stdout) # 1번+2번 총합 box_loss = K.sum(box_loss) loss = confidence_loss + class_loss + box_loss tf.print("\n- confidence_loss:", confidence_loss, output_stream=sys.stdout) tf.print("- class_loss:", class_loss, output_stream=sys.stdout) tf.print("- box_loss:", box_loss, output_stream=sys.stdout) return loss
def yolo_loss(args, anchors, ignore_thresh=.5, att_loss_weight=0.1, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:1] pred_att = args[1:2] # mask_prob=args[1] # co_enegy=args[2] # y_true = args[3:4] y_true = args[2:3] att_map = args[3::] # mask_gt=args[4] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ [0, 1, 2] ] ##due to deleting 2 scales change [[6,7,8], [3,4,5], [0,1,2]] to [[0,1,2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # x32 is original size grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] #3 degree scales output loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] # true_class_probs = y_true[l][..., 5:] #... ==???? grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) def smooth_L1(y_true, y_pred, sigma=3.0): """ Create a smooth L1 loss functor. Args sigma: This argument defines the point where the loss changes from L2 to L1. Returns A functor for computing the smooth L1 loss given target data and predicted data. """ sigma_squared = sigma**2 # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff = y_true - y_pred regression_diff = K.abs(regression_diff) regression_loss = tf.where( K.less(regression_diff, 1.0 / sigma_squared), 0.5 * sigma_squared * K.pow(regression_diff, 2), regression_diff - 0.5 / sigma_squared) return regression_loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * smooth_L1( raw_true_wh, raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask # seg_loss=K.binary_crossentropy(mask_gt, mask_prob, from_logits=True) att_loss = K.binary_crossentropy(att_map[l], pred_att[l], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf att_loss = K.sum(att_loss) / mf # seg_loss = K.sum(seg_loss) / mf # co_enegy_loss=cem_loss(co_enegy) / mf # remove RES and CEM loss seg_loss_weight = 0. co_weight = 0. # loss += xy_loss+ wh_loss+ confidence_loss+seg_loss*seg_loss_weight+co_enegy_loss*co_weight loss += xy_loss + wh_loss + confidence_loss + att_loss_weight * att_loss if print_loss: # loss = tf.Print(loss, ['\n''co_peak_loss: ',co_enegy_loss,'co_peak_energe: ', K.sum(co_enegy)/mf], message='loss: ') loss = tf.Print(loss, [], message='loss: ') return K.expand_dims(loss, axis=0)
def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[i][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask
def call(self, inputs, training=None): input_shape = K.int_shape(inputs) # Prepare broadcasting shape. ndim = len(input_shape) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] # Determines whether broadcasting is needed. needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) def normalize_inference(): if needs_broadcasting: # In this case we must explicitly broadcast all parameters. broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape) broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape) if self.center: broadcast_beta = K.reshape(self.beta, broadcast_shape) else: broadcast_beta = None if self.scale: broadcast_gamma = K.reshape(self.gamma, broadcast_shape) else: broadcast_gamma = None return tf.nn.batch_normalization( #K.batch_normalization( inputs, broadcast_moving_mean, broadcast_moving_variance, broadcast_beta, broadcast_gamma, #axis=self.axis, self.epsilon) #epsilon=self.epsilon) else: return tf.nn.batch_normalization( #K.batch_normalization( inputs, self.moving_mean, self.moving_variance, self.beta, self.gamma, #axis=self.axis, self.epsilon) #epsilon=self.epsilon) # If the learning phase is *static* and set to inference: if training in {0, False}: return normalize_inference() # If the learning is either dynamic, or set to training: normed_training, mean, variance = _regular_normalize_batch_in_training( #K.normalize_batch_in_training( inputs, self.gamma, self.beta, reduction_axes, epsilon=self.epsilon) if K.backend() != 'cntk': sample_size = K.prod( [K.shape(inputs)[axis] for axis in reduction_axes]) sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) # sample variance - unbiased estimator of population variance variance *= sample_size / (sample_size - (1.0 + self.epsilon)) self.add_update([ K.moving_average_update(self.moving_mean, mean, self.momentum), K.moving_average_update(self.moving_variance, variance, self.momentum) ], inputs) # Pick the normalized form corresponding to the training phase. return K.in_train_phase(normed_training, normalize_inference, training=training)
def yolo_loss(args, anchors, num_classes, ignore_threshold=0.5, normalize=True): num_layers = len(anchors) // 3 y_true = args[num_layers:] yolo_outputs = args[:num_layers] anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) loss = 0 m = K.shape(yolo_outputs[0])[0] # Batch size mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): # feature maps location object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] # predict grid, class, xy, wh grid, raw_pred, pred_xy, pred_wh = yolo_head( yolo_outputs[l], anchors=anchors[anchors_mask[l]], num_classes=num_classes, input_shape=input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_threshold, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # True bounding box is to bigger, weights is less. box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # compute ciou loss raw_true_box = y_true[l][..., 0:4] ciou = box_ciou(pred_box, raw_true_box) ciou_loss = object_mask * box_loss_scale * ciou location_loss = K.sum(ciou_loss) / mf confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf # Compute positive sample loss += location_loss + confidence_loss + class_loss return loss
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss