def obj_mix(Y_true,Y_pred): """ Calculates the binary cross entropy on the aggregated outputs of each batch. Use the max when y_true == 1 but the mean when y_true== 0 """ y_true = K.mean(Y_true,axis=0) if y_true == 1: y_pred = K.max(Y_pred,axis=0) return(K.mean(K.binary_crossentropy(y_pred, y_true))) elif y_true == 0: return(K.mean(K.binary_crossentropy(Y_pred,Y_true))) else: print('unexpected value of y_true',y_true) return(K.mean(K.binary_crossentropy(Y_pred,Y_true)))
def _build(self): fake, _, _, g_additional_losses = self.g.run_internal_graph(self.g.inputs) real = self.d.inputs[0] data = concat([fake, real], axis=0) realness, _, _, d_additional_losses = self.d.run_internal_graph( [data] + self.d.inputs[1:]) nb_fakes = fake.shape[0] fake_realness = realness[:nb_fakes] real_realness = realness[nb_fakes:] split = 2*nb_fakes // 3 g_fake_realness = fake_realness[:split] d_fake_realness = fake_realness[split:] outputs = OrderedDict() g_loss = K.mean(K.binary_crossentropy(g_fake_realness, K.ones_like(real_realness))) outputs['g_loss'] = g_loss g_reg_loss = sum([v for v in g_additional_losses.values()]) if g_reg_loss != 0: outputs['g_reg_loss'] = g_reg_loss g_total_loss = g_loss + g_reg_loss d_loss = K.mean(K.binary_crossentropy(real_realness, K.ones_like(real_realness))) d_loss += K.mean(K.binary_crossentropy(d_fake_realness, K.zeros_like(real_realness))) outputs['d_loss'] = d_loss d_reg_loss = sum([v for v in d_additional_losses.values()]) if d_reg_loss != 0: outputs['d_reg_loss'] = d_reg_loss d_total_loss = d_loss + d_reg_loss inputs = {i.name: i for i in self.g.inputs + self.d.inputs} inputs_list = [] for name in self.input_names: inputs_list.append(inputs[name]) g_updates = self.g_optimizer.get_updates( collect_trainable_weights(self.g), self.g.constraints, g_total_loss) d_updates = self.d_optimizer.get_updates( collect_trainable_weights(self.d), self.d.constraints, d_total_loss) if self.uses_learning_phase: lr_phase = [K.learning_phase()] else: lr_phase = [] self.metrics_names = list(outputs.keys()) self._train_function = K.function(inputs_list + lr_phase, list(outputs.values()), updates=g_updates + d_updates)
def binary_crossentropy_with_ranking(y_true, y_pred): """ Trying to combine ranking loss with numeric precision""" # first get the log loss like normal logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # next, build a rank loss # clip the probabilities to keep stability y_pred_clipped = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) # translate into the raw scores before the logit y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # determine what the maximum score for a zero outcome is y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true <1)) # determine how much each score is above or below it rankloss = y_pred_score - y_pred_score_zerooutcome_max # only keep losses for positive outcomes rankloss = rankloss * y_true # only keep losses where the score is below the max rankloss = K.square(K.clip(rankloss, -100, 0)) # average the loss for just the positive outcomes rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1) # return (rankloss + 1) * logloss - an alternative to try return rankloss + logloss
def letor_binary_crossentropy(y_true, y_pred): signed = 2 * y_pred * (y_true - 0.5) pos = signed[0::2] neg = signed[1::2] s = pos - neg es = K.exp(p) p = es / (1 + es) return K.mean(K.binary_crossentropy(p, y_true), axis=-1)
def vae_loss(y_true, y_pred): global mu, log_sigma """ Calculate loss = reconstruction loss + KL loss for each data in minibatch """ # E[log P(X|z)] recon = K.sum(K.binary_crossentropy(y_pred, y_true), axis=1) # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian kl = 0.5 * K.sum(K.exp(log_sigma) + K.square(mu) - 1. - log_sigma, axis=1) return recon + kl
def loss_function(y_true, y_pred): skill = y_true[:,:,0:num_skills] obs = y_true[:,:,num_skills] rel_pred = Th.sum(y_pred * skill, axis=2) # keras implementation does a mean on the last dimension (axis=-1) which # it assumes is a singleton dimension. But in our context that would # be wrong. return K.binary_crossentropy(rel_pred, obs)
def __init__(self, replay_filename, group_name, model_filename=''): # Set learning phase to TEST self.learning_phase = TEST_MODE # If not informed, defaults to '_model' suffix if model_filename == '': model_filename = '{}_model.h5'.format(group_name) # Loads Keras model self.model = load_model(model_filename) # Loads ReplayData file self.replay_data = h5py.File('{}'.format(replay_filename), 'r') self.group_name = group_name self.group = self.replay_data[self.group_name] # Retrieves some basic information from the replay data self.inputs = self.group['inputs'][:] self.targets = self.group['targets'][:] self.n_epochs = self.group.attrs['n_epochs'] self.n_layers = self.group.attrs['n_layers'] # Retrieves weights as a list, each element being one epoch self.weights = self._retrieve_weights() # Gets Tensors for the weights in the same order as the layers # Keras' model.weights returns the Tensors in a different order! self._model_weights = [w for layer in self.model.layers for w in layer.weights] ### Functions # Keras function to get the outputs, given inputs and weights self._get_output = K.function(inputs=[K.learning_phase()] + self.model.inputs + self._model_weights, outputs=[self.model.layers[-1].output]) # Keras function to get the loss and metrics, given inputs, targets, weights and sample weights self._get_metrics = K.function(inputs=[K.learning_phase()] + self.model.inputs + self.model.targets + self._model_weights + self.model.sample_weights, outputs=[self.model.total_loss] + self.model.metrics_tensors) # Keras function to compute the binary cross entropy, given inputs, targets, weights and sample weights self._get_binary_crossentropy = K.function(inputs=[K.learning_phase()] + self.model.inputs + self.model.targets + self._model_weights + self.model.sample_weights, outputs=[K.binary_crossentropy(self.model.targets[0], self.model.outputs[0])]) # Attributes for the visualizations - Data self._feature_space_data = None self._loss_hist_data = None self._loss_and_metric_data = None self._prob_hist_data = None self._decision_boundary_data = None # Attributes for the visualizations - Plot objects self._feature_space_plot = None self._loss_hist_plot = None self._loss_and_metric_plot = None self._prob_hist_plot = None self._decision_boundary_plot = None
def weighted_binary_crossentropy(y_true, y_pred): # Original binary crossentropy (see losses.py): # K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) # Calculate the binary crossentropy b_ce = K.binary_crossentropy(y_true, y_pred) # Apply the weights weight_vector = y_true * one_weight + (1. - y_true) * zero_weight weighted_b_ce = weight_vector * b_ce # Return the mean error return K.mean(weighted_b_ce)
def f(y_true, y_pred): final_mask = y_true * weights[1] + (1.0 - y_true) * weights[0] return K.binary_crossentropy(y_true, y_pred) * final_mask
def rpn_loss_cls_fixed_num(y_true, y_pred): return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors])
def weighted_binary_crossentropy(y_true,y_pred): weightsPerTaskRep = y_true*w1_weights[None,:] + (1-y_true)*w0_weights[None,:] nonAmbig = K.cast((y_true != ambig_val),'float32') nonAmbigTimesWeightsPerTask = nonAmbig * weightsPerTaskRep return K.mean(K.binary_crossentropy(y_true, y_pred)*nonAmbigTimesWeightsPerTask, axis=-1);
def create_vae(dims, loss_metric="CrossEntropy", optimizer="Adam", learning_rate=0.001, epsilon_std=1., print_summary=False, distloss="ELBO", alpha=0, lambda_=1): ''' Alpha and lambda are terms from the generalize VAE form offered in InfoVAE paper - see Eq. 6 in https://arxiv.org/pdf/1706.02262.pdf. As noted, when alpha is 0 and lambda is 1, the objective is the typical VAE. When lambda is >0 and alpha is 1-lambda, the objective is the BetaVAE form, which simply weighs the KL divergence part of the objective more highly. Generally, this will mean that alpha will be negative. One recommendation for setting lambda is so that the loss on the third term is similar in magnitude to the reconstruction loss. One way of roughly doing this is running the training with distloss=None, then finding an alpha=0, lambda=X value that roughly doubles it. ''' assert alpha <= 1 assert lambda_ >= 0 vae = (distloss != 'None') # The dims arg should be a string of format '1000-200-100-50', # going from the input size to the hidden layer sizes and finally the latent # dimensions assert len(dims) > 2 original_dim, hidden_dims, latent_dim = dims[0], dims[1:-1], dims[-1] activation = 'relu' activation_out = 'sigmoid' kernel_regularizer=regularizers.l2(0.00005) # Build Encoder inputs = Input(shape=(original_dim,)) for i, hdim in enumerate(hidden_dims): layer = Dense(hdim, activation=activation, kernel_regularizer=kernel_regularizer, name='HiddenLayer%d' %i) if i == 0: h = layer(inputs) else: h = layer(h) if vae: z_mean = Dense(latent_dim, name='z_mean')(h) z_log_var = Dense(latent_dim, name='z_log_var')(h) # To allow for a model that can be compiled, use a layer that adds the ELBO loss function # If alpha is 1, this will be ignored. if distloss in ["ELBO", "MMD"] and (alpha < 1): z_mean, z_log_var = KLDivergenceLayer(alpha=alpha)([z_mean, z_log_var]) # The latent codes, z, are sampled from the mean and standard deviation, through random normal noise # Since the model uses log variance, the standard deviation (sigma) is derived in another layer z_sigma = Lambda(lambda x: K.exp(x/2.))(z_log_var) def sampling(inputs): z_mean, z_sigma = inputs epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=epsilon_std) return z_mean + z_sigma * epsilon z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_sigma]) if (distloss == "MMD"): # Fail when the parameters would make this term ignored assert (alpha+lambda_) != 1 z = MMDLayer(alpha, lambda_)(z) encoder = Model(inputs, outputs=[z], name='encoder') else: z = Dense(latent_dim, activation='relu', name='z')(h) encoder = Model(inputs, outputs=[z], name='encoder') if print_summary: encoder.summary() # build Decoder latent_inputs = Input(shape=(latent_dim,), name='DecoderInput') for i, hdim in enumerate(hidden_dims[::-1]): j = len(hidden_dims) - i layer = Dense(hdim, activation=activation, kernel_regularizer=kernel_regularizer, name="DecoderHLayer%d" % j) if i == 0: h_decoded = layer(latent_inputs) else: h_decoded = layer(h_decoded) decoder_outputs = Dense(original_dim, activation=activation_out, name="ReconstructedOutput")(h_decoded) decoder = Model(latent_inputs, decoder_outputs, name='decoder') if print_summary: decoder.summary() # instantiate VAE model # Use the reconstructed version ('z', index=0) of the input data as the output if vae: enc = encoder(inputs) else: enc = encoder(inputs) outputs = decoder(enc) vae = Model(inputs, outputs, name='vae') # Automatically use all available GPUs try: vae = multi_gpu_model(vae) except: pass # RMSProp Optimizer. see https://keras.io/optimizers/ if optimizer == "RMSProp": optimizer = optimizers.RMSprop(lr=learning_rate, rho=0.9, epsilon=None, decay=0.0) elif optimizer == "Adam": # Adam should be better with the sparsity seen in text optimizer = optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) reconstruction_loss = K.sum(K.binary_crossentropy(inputs, outputs), axis=-1) # I take the mean reconstruction loss, because the full scalar of losses causes problems # for callbacks vae.add_loss(K.mean(reconstruction_loss)) vae.compile(optimizer=optimizer) #vae.compile(optimizer=optimizer, loss=calc_vae_loss) return vae
def __init__(self, replay_filename, group_name, model_filename=''): # Set learning phase to TEST self.learning_phase = TEST_MODE # Loads ReplayData file self.replay_data = h5py.File('{}'.format(replay_filename), 'r') try: self.group = self.replay_data[group_name] except KeyError: self.group = self.replay_data[group_name + '_init'] group_name += '_init' self.group_name = group_name # If not informed, defaults to '_model' suffix if model_filename == '': model_filename = '{}_model.h5'.format(group_name) # Loads Keras model self.model = load_model(model_filename) # Retrieves some basic information from the replay data self.inputs = self.group['inputs'][:] self.targets = self.group['targets'][:] self.n_epochs = self.group.attrs['n_epochs'] self.n_layers = self.group.attrs['n_layers'] # Generates ranges for the number of different weight arrays in each layer self.n_weights = [range(len(self.group['layer{}'.format(l)])) for l in range(self.n_layers)] # Retrieves weights as a list, each element being one epoch self.weights = self._retrieve_weights() # Gets Tensors for the weights in the same order as the layers # Keras' model.weights returns the Tensors in a different order! self._model_weights = [w for layer in self.model.layers for w in layer.weights] ### Functions # Keras function to get the outputs, given inputs and weights self._get_output = K.function(inputs=[K.learning_phase()] + self.model.inputs + self._model_weights, outputs=[self.model.layers[-1].output]) # Keras function to get the loss and metrics, given inputs, targets, weights and sample weights self._get_metrics = K.function(inputs=[K.learning_phase()] + self.model.inputs + self.model.targets + self._model_weights + self.model.sample_weights, outputs=[self.model.total_loss] + self.model.metrics_tensors) # Keras function to compute the binary cross entropy, given inputs, targets, weights and sample weights self._get_binary_crossentropy = K.function(inputs=[K.learning_phase()] + self.model.inputs + self.model.targets + self._model_weights + self.model.sample_weights, outputs=[K.binary_crossentropy(self.model.targets[0], self.model.outputs[0])]) # Keras function to compute the gradients for trainable weights, given inputs, targets, weights and # sample weights self.__trainable_weights = [w for layer in self.model.layers for w in layer.trainable_weights if layer.trainable and ('bias' not in w.op.name)] self.__trainable_gradients = self.model.optimizer.get_gradients(self.model.total_loss, self.__trainable_weights) self._get_gradients = K.function(inputs=[K.learning_phase()] + self.model.inputs + self.model.targets + self._model_weights + self.model.sample_weights, outputs=self.__trainable_gradients) def get_z_op(layer): op = layer.output.op if op.type in Z_OPS: return layer.output else: op_layer_name = op.name.split('/')[0] for input in op.inputs: input_layer_name = input.name.split('/')[0] if (input.op.type in Z_OPS) and (op_layer_name == input_layer_name): return input return None __z_layers = np.array([i for i, layer in enumerate(self.model.layers) if get_z_op(layer) is not None]) __act_layers = np.array([i for i, layer in enumerate(self.model.layers) if layer.output.op.type.lower() in ACTIVATIONS]) __z_layers = np.array([__z_layers[np.argmax(layer < __z_layers) - 1] for layer in __act_layers]) self.z_act_layers = [self.model.layers[i].name for i in __z_layers] self._z_layers = ['inputs'] + [self.model.layers[i].name for i in __z_layers] self._z_tensors = [K.identity(self.model.inputs)] + list(filter(lambda t: t is not None, [get_z_op(self.model.layers[i]) for i in __z_layers])) self._activation_layers = ['inputs'] + [self.model.layers[i].name for i in __act_layers] self._activation_tensors = [K.identity(self.model.inputs)] + [self.model.layers[i].output for i in __act_layers] # Keras function to compute the Z values given inputs and weights self._get_zvalues = K.function(inputs=[K.learning_phase()] + self.model.inputs + self._model_weights, outputs=self._z_tensors) # Keras function to compute the activation values given inputs and weights self._get_activations = K.function(inputs=[K.learning_phase()] + self.model.inputs + self._model_weights, outputs=self._activation_tensors) # Gets names of all layers with arrays of weights of lengths 1 (no biases) or 2 (with biases) # Layers without weights (e.g. Activation, BatchNorm) are not included self.weights_layers = [layer.name for layer, weights in zip(self.model.layers, self.n_weights) if len(weights) in (1, 2)] # Attributes for the visualizations - Data self._feature_space_data = None self._loss_hist_data = None self._loss_and_metric_data = None self._prob_hist_data = None self._decision_boundary_data = None self._weights_violins_data = None self._activations_violins_data = None self._zvalues_violins_data = None self._gradients_data = None # Attributes for the visualizations - Plot objects self._feature_space_plot = None self._loss_hist_plot = None self._loss_and_metric_plot = None self._prob_hist_plot = None self._decision_boundary_plot = None self._weights_violins_plot = None self._activations_violins_plot = None self._zvalues_violins_plot = None self._gradients_plot = None
def discriminator_on_generator_loss(y_true,y_pred): BATCH_SIZE=10 return K.mean(K.binary_crossentropy(K.flatten(y_pred), K.ones_like(K.flatten(y_pred))), axis=-1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) (1)args包括两部分,第一部分是*model_body.output,就是三组 (batchsize, grid, grid, 75)的darknet输出;第二部分是*y_true,就是上一篇文章咱们说到的 三组(batchsize, grid, grid, 3, 25)的Y真实值 (2)anchors就是[[10,13], [16,30], [33,23], [30,61], [62,45], [59,119], [116,90], [156,198], [373,326]]9组anchors (3)num_classes=20(COCO数据集为80) (4)ignore_thresh指的是iou的最小达标值 ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers]#这里存储的是输出 y_true = args[num_layers:]#这里存储的是ground truth anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5]#置信度 true_class_probs = y_true[l][..., 5:]# 类别概率 # 这个yolo_head 因为calc_loss=True 返回 grid 特征图 xy wh,特征图是最原始输出,xy是相对于特征图,wh是相对于整张图像 grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) #将xy与wh进行拼接 pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. # #darknet 原始盒子 来计算损失 #将y_ture转换成最原始 的 没有加经过处理的输出 是yolo_head函数中转换xy wh的逆过程 raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid #xy true一开始存储的是xy相对于整张图像的比例值大小 经过操作后 就变成 相对于相对于当前cell的偏移值了 raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] """ 大框给小权重,小框给大权重,因为大框的xywh不需要学得那么好,而小框则对xywh很敏感 为了调整不同大小的预测框所占损失的比重,真值框越小, box_loss_scale越大,这样越小的框的损失占比越大,和v1,v2里采用sqrt(w)的目的一样 """ # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') #object 4:5 存储的是置信度 将其转换为bool类型 def loop_body(b, ignore_mask): # 这里看了下tf.boolean_mast 函数 将置信度为1 (即含有目标) 赋值给true_box true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) #遍历第b(即mini_batch_size)个图像 这个图像上所有的预测box和 当前尺度下的所有gt做iou iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) #如果一张图片的最大iou 都小于阈值 认为这张图片没有目标 ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) """ 如果某个anchor不负责预测GT,且该anchor预测的框与图中所有GT的IOU都小于某个阈值,则让它预测背景, 如果大于阈值则不参与损失计算 """ ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. # 现在raw_true_xy,raw_pred 都是特征图得直接输出 没有经过任何处理 # 这里会对raw_pred进行sigmod操作 所以xyloss 输入交叉熵的都是相对于 当前cell左上角偏移值 的 一个交叉熵 # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) #object_mask就是置信度 #box_loss_scale可以理解为2-w*h #raw_true_xy就是真实的xy坐标点了 #raw_pred[..., :2]是xy预测坐标点 """ 1-object_mast 说明该这个anchor 不负责 预测GT Object_mask=y_ture 可以再仔细看下y_true的存储格式 如果某个anchor不负责预测GT,且该anchor预测的框与图中所有GT的IOU都小于某个阈值,则让它预测背景, 如果大于阈值则不参与损失计算 """ xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def vae_loss(y_true, y_pred): # E[log P(X|z)] recon = K.sum(K.binary_crossentropy(y_pred, y_true), axis=1) # D_KL(Q(z|X) || P(z|X)) kl = 0.5 * K.sum(K.exp(log_sigma) + K.square(mu) - 1. - log_sigma, axis=1) return recon + kl
def recon_loss(y_true, y_pred): return (K.sum(K.binary_crossentropy(y_pred, y_true), axis=1))
def custom_bce(self, y_true, y_pred): b = K.not_equal(y_true, -K.ones_like(y_true)) b = K.cast(b, dtype='float32') ans = K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) * K.mean(b, axis=-1) ans = K.cast(ans, dtype='float32') return np.sum(ans)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 yolo_outputs = args[:num_layers] y_true = args[num_layers:] # 先验框 # 678为116,90, 156,198, 373,326 # 345为30,61, 62,45, 59,119 # 012为10,13, 16,30, 33,23, anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] # 得到input_shpae为416,416 input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # 得到网格的shape为13,13;26,26;52,52 grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) # y_true[l](x_offset,y_offset,w,h,confidence,class) 在train.py的preprocess_true_boxes中已经将 # IOu最大的框对应的置信度置1 object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # y_true[l](x_offset,y_offset,w,h,confidence,class) 在train.py的preprocess_true_boxes中已经将 # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask #_, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
# 重参数层,相当于给输入加入噪声 z = Lambda(sampling, output_shape=(latent_dim, ))([z_mean, z_log_var]) # 解码层,也就是生成器部分 decoder_h = Dense(intermediate_dim, activation='relu') decoder_mean = Dense(xy_dims, activation='sigmoid') h_decoded = decoder_h(z) x_decoded_mean = decoder_mean(h_decoded) # 建立模型 vae = Model([x, y], [x_decoded_mean, y_mean, y_var]) # xent_loss是重构loss,kl_loss是KL loss xent_loss = K.sum(K.binary_crossentropy(x, x_decoded_mean), axis=-1) # 只需要修改K.square(z_mean)为K.square(z_mean - yh),也就是让隐变量向类内均值看齐 kl_loss = -0.5 * K.sum( 1 + z_log_var - y_var - K.square(z_mean - y_mean) - K.square(K.sqrt(K.exp(z_log_var)) - K.sqrt(K.exp(y_var))), axis=-1) vae_loss = K.mean(xent_loss + kl_loss) # add_loss是新增的方法,用于更灵活地添加各种loss vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() # generator if need
def mrcnn_mask_loss_graph(reshaped_input_gt_true_mask, pred_masks): return K.binary_crossentropy(target=reshaped_input_gt_true_mask, output=pred_masks)
def jaccard_coef_loss(y_true, y_pred): return -K.log(jaccard_coef(y_true, y_pred)) + binary_crossentropy( y_pred, y_true)
def MSE_BCE(y_true, y_pred, alpha=1000, beta=10): mse = K.mean(K.square(y_true - y_pred), axis=-1) bce = K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) return alpha * mse + beta * bce
def binary_crossentrop2(y_true, y_pred): return K.mean(2 * K.abs(y_true - 0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1)
def weighted_binary_crossentropy(y_true, y_pred): class_loglosses = K.mean(K.binary_crossentropy(y_true, y_pred), axis=[0, 1, 2]) return K.sum(class_loglosses * K.constant(class_weights))
def stegoGAN(in_w=(w_hei, w_wid, 1), in_c=(height, width, 3)): G_model = G() R_model = R() D1_model = D1() D2_model = D2() C = Input(shape=in_c, name='C') W = Input(shape=in_w, name='W') M = G_model([C, W]) ## models for traning # a. G connected to R W_prime = R_model(M) GR_model = Model(inputs=[C, W], outputs=[M, W_prime]) #GR_model.compile(optimizer='adam', \ # loss=[SSIM_LOSS, 'binary_crossentropy'], \ # loss_weights=[1., 1.] # ) ssim_loss = SSIM_LOSS(C, M) w_loss = K.mean(K.binary_crossentropy(W, W_prime)) gr_loss = ssim_loss + w_loss GR_model.add_loss(gr_loss) GR_model.compile(optimizer='adam') print("===========================") print("Model GR:CW->M->W_prime") GR_model.summary() # b. G connected to D1 score1_M = D1_model(M) score1_C = D1_model(C) #d1_loss = - K.mean(K.log(score1_C + 1e-6) + K.log(1 - score1_M + 1e-6)) #d1_loss = - K.sum(K.log(score1_C + 1e-6) + K.log(1 - score1_M + 1e-6)) d1_loss = perceptual_loss(C, M) GD1_model = Model(inputs=[C, W], outputs=[score1_M, score1_C]) GD1_model.add_loss(d1_loss) GD1_model.compile(optimizer='adam') print("===========================") print("Model GD1:CW->M->D1") GD1_model.summary() # c. G connected to D2 C_shuffle = Lambda(shuffling)(C) W_shuffle = Lambda(shuffling)(W) score2_t = D2_model([M, C, W]) score2_f = D2_model([M, C_shuffle, W_shuffle]) #d2_loss = - K.mean(K.log(score2_t + 1e-6) + K.log(1 - score2_f + 1e-6)) d2_loss = -K.sum(K.log(score2_t + 1e-6) + K.log(1 - score2_f + 1e-6)) GD2_model = Model(inputs=[C, W], outputs=[score2_t, score2_f]) GD2_model.add_loss(d2_loss) GD2_model.compile(optimizer='adam') print("===========================") print("Model GD2:CW->M, MCW->D2") GD2_model.summary() return GR_model, GD1_model, GD2_model, G_model, R_model
def obj_max(Y_true,Y_pred): y_true = K.mean(Y_true,axis=0) y_pred = K.max(Y_pred,axis=0) return(K.mean(K.binary_crossentropy(y_pred,y_true)))
def weighted_binary_crossentropy(y_true, y_pred, weight_map): return tf.reduce_mean( (K.binary_crossentropy(y_true, y_pred) * weight_map)) / (tf.reduce_sum(weight_map) + K.epsilon())
def binary_crossentropy(y_true, y_pred): return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
def vae_gan(self): # encoder self.input_img = Input(shape=(64,64,3,)) conv1 = Conv2D(64,kernel_size=5,activation='relu',strides=2)(self.input_img) batch_norm1 = BatchNormalization(momentum=0.8)(conv1) conv2 = Conv2D(128,kernel_size=5,activation='relu',strides=2)(batch_norm1) batch_norm2 = BatchNormalization(momentum=0.8)(conv2) conv3 = Conv2D(256,kernel_size=5,activation='relu',strides=2)(batch_norm2) batch_norm3 = BatchNormalization(momentum=0.8)(conv3) dense_layer = Flatten()(batch_norm3) z_mean = Dense(2048,activation='relu')(dense_layer) z_mean_bn = BatchNormalization(momentum=0.8)(z_mean) z_log_var = Dense(2048)(dense_layer) z_sigma = Lambda(self.convert)(z_log_var) z = Lambda(self.sample)([z_mean_bn,z_sigma]) self.encoder = Model(self.input_img,[z,z_mean_bn,z_sigma]) plot_model(self.encoder,to_file='demo.png',show_shapes=True,show_layer_names=True) display(Image(filename='demo.png')) # decoder self.latent_input = Input(shape=(2048,)) dense_layer_dec = Dense(8*8*256)(self.latent_input) dense_layer_dec_bn = BatchNormalization(momentum=0.8)(dense_layer_dec) dec_img = Reshape((8,8,256))(dense_layer_dec_bn) deconv1 = Conv2DTranspose(256,kernel_size=3,strides=2,activation='relu')(dec_img) batch_norm1_dec = BatchNormalization(momentum=0.8)(deconv1) deconv2 = Conv2DTranspose(128,kernel_size=2,strides=2,activation='relu')(batch_norm1_dec) batch_norm2_dec = BatchNormalization(momentum=0.8)(deconv2) deconv3 = Conv2DTranspose(32,kernel_size=2,activation='relu',strides=2)(batch_norm2_dec) batch_norm3_dec = BatchNormalization(momentum=0.8)(deconv3) dec_output = Conv2D(3,kernel_size=5,activation='tanh')(batch_norm3_dec) self.decoder = Model(self.latent_input,dec_output) plot_model(self.decoder,to_file='demo2.png',show_shapes=True,show_layer_names=True) display(Image(filename='demo2.png')) # discriminator self.real_input = Input(shape=(64,64,3,)) conv1_dis = Conv2D(32,kernel_size=5,activation='relu',strides=2)(self.real_input) conv2_dis = Conv2D(128,kernel_size=5,activation='relu',strides=2)(conv1_dis) batch_norm1_dis = BatchNormalization(momentum=0.8)(conv2_dis) conv3_dis = Conv2D(256,kernel_size=5,activation='relu',strides=2)(batch_norm1_dis) batch_norm2_dis = BatchNormalization(momentum=0.8)(conv3_dis) conv4_dis = Conv2D(256,kernel_size=5,activation='relu',strides=2)(batch_norm2_dis) batch_norm3_dis = BatchNormalization(momentum=0.8)(conv4_dis) dense_layer_dis = Flatten()(batch_norm3_dis) fc_dis = Dense(512,activation='relu')(dense_layer_dis) fc_dis_bn = BatchNormalization(momentum=0.8)(fc_dis) dis_output = Dense(1,activation='sigmoid')(fc_dis_bn) self.discriminator = Model(self.real_input,[dis_output,fc_dis_bn]) plot_model(self.discriminator,to_file='demo3.png',show_shapes=True,show_layer_names=True) display(Image(filename='demo3.png')) #vae model for testing self.dec_output = self.decoder(self.encoder(self.input_img)[0]) self.vae_model = Model(self.input_img,self.dec_output) print("") print("VAE") print("") plot_model(self.vae_model,to_file='demo4.png',show_shapes=True,show_layer_names=True) display(Image(filename='demo4.png')) # Connecting the graphs to construct the VAE-GAN as mentioned in the paper "Autoencoding beyond pixels using a learned similarity metric" z,z_mean,z_sigma = self.encoder(self.input_img) x_enc = self.decoder(z) x_dec = self.decoder(self.latent_input) dis_real,disl_x = self.discriminator(self.input_img) dis_out_enc,disl_x_enc = self.discriminator(x_enc) dis_out_dec = self.discriminator(x_dec)[0] # initializing models for network-wise training encoder_training = Model(self.input_img,disl_x_enc) decoder_training = Model([self.input_img,self.latent_input],[dis_out_enc,dis_out_dec]) discriminator_training = Model([self.input_img,self.latent_input],[dis_out_enc,dis_real,dis_out_dec]) real_img_score = np.full((self.batch_size,1),fill_value=1) fake_img_score = np.full((self.batch_size,1),fill_value=0) real_img_score = tf.convert_to_tensor(real_img_score + 0.05*np.random.random(real_img_score.shape),dtype='float32') fake_img_score = tf.convert_to_tensor(fake_img_score + 0.05*np.random.random(fake_img_score.shape),dtype='float32') self.decoder.trainable = False self.discriminator.trainable = False self.encoder.trainable = True loss1 = K.mean(K.square(z_sigma) + K.square(z_mean_bn) - K.log(z_sigma) - 1) loss2 = 0.5*(mean_squared_error(K.flatten(disl_x),K.flatten(disl_x_enc))) loss = loss1 + loss2 encoder_training.add_loss(loss) encoder_training.compile(optimizer=self.optimizer) self.encoder.trainable = False self.decoder.trainable = True loss3 = K.binary_crossentropy(dis_out_enc,real_img_score,from_logits=True) loss4 = K.binary_crossentropy(dis_out_dec,real_img_score,from_logits=True) loss_dec = 64*loss2 + loss3 + loss4 decoder_training.add_loss(loss_dec) decoder_training.compile(optimizer=self.optimizer) self.decoder.trainable = False self.discriminator.trainable = True loss5 = K.binary_crossentropy(dis_out_enc,fake_img_score,from_logits=True) loss6 = K.binary_crossentropy(dis_real,real_img_score,from_logits=True) loss7 = K.binary_crossentropy(dis_out_dec,fake_img_score,from_logits=True) loss_dis = loss5 + loss6 + loss7 discriminator_training.add_loss(loss_dis) discriminator_training.compile(optimizer=self.optimizer) num_of_batches = int(len(self.x_train)/self.batch_size) for e in range(self.epochs+1): a = 0 b = self.batch_size for i in range(num_of_batches): real_images = self.x_train[a:b,:,:,:] z = np.random.normal(0,1,(self.batch_size,2048)) z1 = np.random.normal(0,1,(11,2048)) self.encoder.trainable = False self.decoder.trainable = False self.discriminator.trainable = True dis_loss = discriminator_training.train_on_batch([real_images,z],None) self.discriminator.trainable = False self.decoder.trainable = True dec_loss = decoder_training.train_on_batch([real_images,z],None) self.decoder.trainable = False self.encoder.trainable = True enc_loss = encoder_training.train_on_batch(real_images,None) if(i%20==0): print('Epoch: '+str(e+1)) print('Batch Number: '+str(i+1)) print('Encoder Loss') print(enc_loss) print('Decoder Loss') print(dec_loss) print('Discriminator Loss') print(dis_loss) print("")
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' # default setting num_layers = len(anchors) // 3 yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) # 分三层计算 for l in range(num_layers): ## 置信率 object_mask = y_true[l][..., 4:5] ## 分类 true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def log_loss(y_true, y_pred): return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1)
def _objective(y_true, y_pred): '''最適化したい誤差関数''' from keras import backend as K return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
def jaccard_coef_loss(y_true, y_pred): return -K.log(jaccard_coef(y_true, y_pred)) + binary_crossentropy(y_pred, y_true)
def ambig_binary_crossentropy(y_true,y_pred): nonAmbig = K.cast((y_true != ambig_val),'float32') return K.mean(K.binary_crossentropy(y_true, y_pred)*nonAmbig, axis=-1);
def mycost(y_true, y_pred): return K.mean(mymask(y_true) * (10 * K.square(K.square(K.sqrt(y_pred) - K.sqrt(y_true))) + K.square(K.sqrt(y_pred) - K.sqrt(y_true)) + 0.01 * K.binary_crossentropy(y_pred, y_true)), axis=-1)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): # 一共有三层 num_layers = len(anchors) // 3 # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,25),(m,26,26,3,25),(m,52,52,3,25)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,25),(m,26,26,3,25),(m,52,52,3,25)。 y_true = args[num_layers:] # y yolo_outputs = args[:num_layers] # y_hat 也就是featuremaps # 先验框的宽高 # 678为116,90, 156,198, 373,326 # 345为30,61, 62,45, 59,119 # 012为10,13, 16,30, 33,23, anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为416,416 ,根据featuremap和缩放比例得出原图的长宽 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # 得到网格的shape为13,13;26,26;52,52 grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) object_mask = y_true[l][..., 4:5] # 4+1+20 置信度 # 取出其对应的种类(m,13,13,3,80) true_class_probs = y_true[l][..., 5:] # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 网格坐标、特征、预测框坐标 # 这个是解码后的预测的box的位置,(m,13,13,3,4) pred_box = K.concatenate([pred_xy, pred_wh]) # 找到负样本群组,第一步是创建一个数组,[] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 对每一张图片计算ignore_mask def loop_body(b, ignore_mask): # 取出第b幅图内,真实存在的所有的box的参数 # n,4 true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 计算预测结果与真实情况的iou # pred_box为13,13,3,4 # 计算的结果是每个pred_box和其它所有真实框的iou # 13,13,3,n iou = box_iou(pred_box[b], true_box) # 13,13,3,1 best_iou = K.max(iou, axis=-1) # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框 # 则被认为是这幅图的负样本 ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # 遍历所有的图片 _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # 将每幅图的内容压缩,进行处理 ignore_mask = ignore_mask.stack() #(m,13,13,3,1,1) ignore_mask = K.expand_dims(ignore_mask, -1) # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本 # best_iou<ignore_thresh用于限制负样本数量 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def binary_crossnetropy_no_NaN(y_true, y_pred): return K.sum(K.binary_crossentropy(K.switch(K.is_nan(y_true), y_pred, y_true), y_pred), axis = -1)
def rpn_loss_cls_fixed_num(y_true, y_pred): if K.image_dim_ordering() == 'tf': return lambda_rpn_class * K.sum(y_true[:, :, :, :num_anchors] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, :, :, num_anchors:])) / K.sum(epsilon + y_true[:, :, :, :num_anchors]) else: return lambda_rpn_class * K.sum(y_true[:, :num_anchors, :, :] * K.binary_crossentropy(y_pred[:, :, :, :], y_true[:, num_anchors:, :, :])) / K.sum(epsilon + y_true[:, :num_anchors, :, :])
def binary_crossentropy_with_nan(y_true, y_pred): not_nan = tf.logical_not(tf.is_nan(y_true)) y_true = tf.boolean_mask(y_true, not_nan) y_pred = tf.boolean_mask(y_pred, not_nan) return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
def bernoulli(y_true, y_pred): return K.sum(K.binary_crossentropy(y_pred, y_true), axis=-1)
k2v = Lambda(seq_gather)([t, k2]) kv = Average()([k1v, k2v]) t = Add()([t, kv]) po1 = Dense(num_classes, activation='sigmoid')(t) po2 = Dense(num_classes, activation='sigmoid')(t) object_model = Model([t1_in, t2_in, k1_in, k2_in], [po1, po2]) # 输入text和subject,预测object及其关系 train_model = Model([t1_in, t2_in, s1_in, s2_in, k1_in, k2_in, o1_in, o2_in], [ps1, ps2, po1, po2]) s1 = K.expand_dims(s1, 2) s2 = K.expand_dims(s2, 2) s1_loss = K.binary_crossentropy(s1, ps1) s1_loss = K.sum(s1_loss * mask) / K.sum(mask) s2_loss = K.binary_crossentropy(s2, ps2) s2_loss = K.sum(s2_loss * mask) / K.sum(mask) o1_loss = K.sum(K.binary_crossentropy(o1, po1), 2, keepdims=True) o1_loss = K.sum(o1_loss * mask) / K.sum(mask) o2_loss = K.sum(K.binary_crossentropy(o2, po2), 2, keepdims=True) o2_loss = K.sum(o2_loss * mask) / K.sum(mask) loss = (s1_loss + s2_loss) + (o1_loss + o2_loss) train_model.add_loss(loss) train_model.compile(optimizer=Adam(learning_rate)) train_model.summary()
def f(y_true, y_pred): ce = K.binary_crossentropy(y_pred, y_true) ce = timeweights * ce return K.mean(ce, axis=-1)
def rpn_loss_cls_fixed_num(y_true,y_pred): return lambda_rpn_class * K.sum( y_true[:,:,:,:num_anchors]* K.binary_crossentropy(y_pred[:,:,:,:],y_true[:,:,:,num_anchors:]) )/K.sum(epsilon+y_true[:,:,:,:num_anchors])
def discriminator_loss(y_true,y_pred): BATCH_SIZE=10 return K.mean(K.binary_crossentropy(K.flatten(y_pred), K.concatenate([K.ones_like(K.flatten(y_pred[:BATCH_SIZE,:,:,:])),K.zeros_like(K.flatten(y_pred[:BATCH_SIZE,:,:,:])) ]) ), axis=-1)
def binary_crossentropy(y_true, y_pred, weight=None): if weight == None: return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) return K.mean(weight * K.binary_crossentropy(y_true, y_pred), axis=-1)
def binary_crossentropy_with_logits(ground_truth, predictions): return K.mean(K.binary_crossentropy(ground_truth, predictions, from_logits=True), axis=-1)
def loss_function(category_size, attribute_size, y_true, y_pred): category_loss = K.categorical_crossentropy(y_true[:,:category_size], y_pred[:,:category_size]) attribute_loss = K.binary_crossentropy(y_true[:, category_size:], y_pred[:, category_size:]) return category_loss + K.mean(attribute_loss, -1)
def binary_crossentrop2(y_true, y_pred): return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_true, y_pred), axis=-1)
def masked_loss_function(y_true, y_pred): mask = K.cast(K.not_equal(y_true, mask_value), K.floatx()) return K.binary_crossentropy(y_true * mask, y_pred * mask)
def log_bernoulli(x, p): return -K.sum(K.binary_crossentropy(p, x), axis=-1)
def nll(y_true, y_pred): """ Negative log likelihood (Bernoulli). """ # keras.losses.binary_crossentropy gives the mean # over the last axis. we require the sum return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)
def loss(y_true, y_pred): value = tf.cast(y_true > -1, dtype=tf.float32) return K.binary_crossentropy(y_true * value, y_pred * value)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): """参考 https://blog.csdn.net/weixin_42078618/article/details/85005428 ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss 返回值 ------- loss: tensor, shape=(1,) """ num_layers = len(anchors) // 3 # 层的数量,是anchors数量的3分之1 # 分离args,前3个是yolo_outputs预测值,后3个是y_true真值 yolo_outputs = args[:num_layers] y_true = args[num_layers:] # 678对应13x13,345对应26x26,012对应52x52 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # K.shape(yolo_outputs[0])[1:3],第1个预测矩阵yolo_outputs[0]的结构(shape)的第1~2位,即(?, 13, 13, 18)中的(13, 13)。 # 再x32,就是YOLO网络的输入尺寸,即(416, 416),因为在网络中,含有5个步长为(2, 2)的卷积操作,降维32=5^2倍 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # grid_shapes:即[(13, 13), (26, 26), (52, 52)] grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] m = K.shape(yolo_outputs[0])[0] # 输入模型的图片总量,即批次数 batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) # m的float类型 loss = 0 for l in range(num_layers): # 获取物体置信度object_mask,最后1个维度的第4位,第0~3位是框,第4位是物体置信度 object_mask = y_true[l][..., 4:5] # 类别置信度true_class_probs,最后1个维度的第5位 true_class_probs = y_true[l][..., 5:] # 接着,调用yolo_head重构预测图 grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 再将xy和wh组合成预测框pred_box,结构是(?, 13, 13, 3, 4) pred_box = K.concatenate([pred_xy, pred_wh]) # 生成真值数据 # 在网格中的中心点xy,偏移数据,值的范围是0~1;y_true的第0和1位是中心点xy的相对位置,范围是0~1 raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid # 在网络中的wh针对于anchors的比例,再转换为log形式,范围是有正有负;y_true的第2和3位是宽高wh的相对位置,范围是0~1 raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf # box_loss_scale = 2 - w * h,计算wh权重,取值范围(1~2) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # 接着,根据IoU忽略阈值生成ignore_mask,将预测框pred_box和真值框true_box计算IoU, # 抑制不需要的anchor框的值,即IoU小于最大阈值的anchor框。 # ignore_mask的shape是(?, ?, ?, 3, 1),第0位是批次数,第1~2位是特征图尺寸 ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. # 中心点的损失值。object_mask是y_true的第4位,即是否含有物体,含有是1,不含是0。 # box_loss_scale的值,与物体框的大小有关,2减去相对面积,值得范围是(1~2)。binary_crossentropy是二值交叉熵 xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) # 宽高的损失值。除此之外,额外乘以系数0.5,平方K.square() wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # 框的损失值。两部分组成,第1部分是存在物体的损失值, # 第2部分是不存在物体的损失值,其中乘以忽略掩码ignore_mask,忽略预测框中IoU大于阈值的框 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask # class_loss:类别损失值 class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) # 将各部分损失值的和,除以均值,累加,作为最终的图片损失值 xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss