예제 #1
0
	def basic_loss(self, y_true, y_pred, go_backwards=False):
		"""y_true需要是整数形式(非one hot)
		"""
		# 导出mask并转换数据类型
		mask = K.all(K.greater(y_pred, -1e6), axis=2)
		mask = K.cast(mask, K.floatx())
		# y_true需要重新明确一下shape和dtype
		y_true = K.reshape(y_true, K.shape(y_pred)[:-1])
		y_true = K.cast(y_true, 'int32')
		# 反转相关
		if self.hidden_dim is None:
			if go_backwards:  # 是否反转序列
				y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
				trans = K.transpose(self.trans)
			else:
				trans = self.trans
			histoty = K.gather(trans, y_true)
		else:
			if go_backwards:  # 是否反转序列
				y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
				r_trans, l_trans = self.l_trans, self.r_trans
			else:
				l_trans, r_trans = self.l_trans, self.r_trans
			histoty = K.gather(l_trans, y_true)
			histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans)
		# 计算loss
		histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1)
		y_pred = (y_pred + histoty) / 2
		loss = K.sparse_categorical_crossentropy(
			y_true, y_pred, from_logits=True
		)
		return K.sum(loss * mask) / K.sum(mask)
예제 #2
0
파일: hw4_mnist.py 프로젝트: tj-kim/DLHW4
    def forward(self, X, Ytrue=None):
        # Define various tensors that make up the model and potentially its
        # loss (if Ytrue is given).

        _logits: tf.Tensor = None
        _probits: tf.Tensor = None
        _preds: tf.Tensor = None
        _loss: tf.Tensor = None

        c = X
        parts = []
        for l in self.layers:
            c = l(c)
            parts.append(c)

        _logits = parts[-2]
        _probits = parts[-1]

        _preds = tf.argmax(_probits, axis=1)

        if Ytrue is not None:
            # Same as the loss specified in train below.
            _loss = K.mean(
                K.sparse_categorical_crossentropy(self.Ytrue, _probits))

        return {
            'logits': _logits,
            'probits': _probits,
            'preds': _preds,
            'loss': _loss,
        }
예제 #3
0
파일: layers.py 프로젝트: adowu/bert4one
    def basic_loss(self, y_true, y_pred, go_backwards=False):
        """y_true需要是整数形式(非one hot)
            y_true: [B, T]
            y_pred: [B, T, Labels]
        """
        mask = self.output_mask
        y_true = K.cast(y_true, 'int32')
        y_true = K.reshape(y_true, [K.shape(y_true)[0], -1])

        if go_backwards:
            y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask)
            trans = K.transpose(self.trans)
        else:
            trans = self.trans

        # loss
        # [B, T, Labels]
        history = K.gather(trans, y_true)
        # y_pred[:, :1]  [B, 1, Labels]
        # history[:, :-1] [B, T-1, Labels]
        # history [B, T, Labels]
        # TODO 这个concatenate 没看明白 怎么个意思
        history = K.concatenate([y_pred[:, :1], history[:, :-1]], 1)
        y_pred = (y_pred + history) / 2
        loss = K.sparse_categorical_crossentropy(
            y_true, y_pred, from_logits=True)
        if mask is None:
            return K.mean(loss)

        else:
            return K.sum(loss*mask) / K.sum(mask)
예제 #4
0
def model_perplexity(model, test_X, test_y, mask_zeros=True):
    tot_cce = 0.0
    for i in range(len(test_y)):
        x = test_X[i]
        x = x[np.newaxis, :]
        y_true = test_y[i]
        y_pred = model.predict(x)

        timesteps = len(y_true)
        if mask_zeros:
            for t in range(len(y_true)):
                if y_true[t] == 0:
                    y_true = y_true[0:t]
                    y_pred = y_pred[:, 0:t, :]
                    timesteps = t
                    break

        print(i)
        cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
        avg_cce = K.mean(cross_entropy).numpy()
        tot_cce += avg_cce
        #if avgcce > 1.0 or avgcce < 0.0:
        #    print(cross_entropy).numpy()
    cross_entropy = tot_cce / len(test_y)
    perplexity = K.exp(cross_entropy).numpy()
    return perplexity, cross_entropy
    def forward(self, X, Ytrue=None):
        _features: tf.Tensor = None # new tensor to build
        _logits: tf.Tensor = None
        _probits: tf.Tensor = None
        _preds: tf.Tensor = None
        _loss: tf.Tensor = None

        # >>> Your code here <<<
        c = X
        parts = []
        for l in self.layers:
            c = l(c)
            parts.append(c)
        _features = parts[-4]
        _logits = parts[-2]
        _probits = parts[-1]

        _preds = tf.argmax(_probits, axis=1)

        if Ytrue is not None:
            # Same as the loss specified in train below.
            _loss = K.mean(K.sparse_categorical_crossentropy(
                self.Ytrue,
                _probits
            ))


        return {
            'features': _features,
            'logits': _logits,
            'probits': _probits,
            'preds': _preds,
            'loss': _loss,
        }
예제 #6
0
 def call(self, label, y_pred):
     loss = K.sparse_categorical_crossentropy(
         label, y_pred) * tf.dtypes.cast(tf.less(label, 9),
                                         tf.dtypes.float32)
     loss *= tf.dtypes.cast(tf.size(loss), tf.dtypes.float32)
     loss /= K.sum(tf.dtypes.cast(tf.less(label, 9), tf.dtypes.float32))
     return loss
예제 #7
0
 def loss(y_true, y_pred):
     
     local_weights = tf.gather(weights, tf.argmax(y_true, axis = -1))
     ce_3d = K.sparse_categorical_crossentropy(y_true, y_pred)
     ce_3d_weighted = ce_3d * local_weights
     final = K.mean(K.mean(ce_3d_weighted, axis = (1,2)))
     return final
def perplexity(y_true, y_pred):
    """
    Popular metric for evaluating language modelling architectures.
    More info: http://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf
    """
    cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
    return K.mean(K.exp(K.mean(cross_entropy, axis=-1)))
    def __call__(self, y_true, y_pred):
        cross_entropy = K.sparse_categorical_crossentropy(y_true,
                                                          y_pred,
                                                          from_logits=True)

        ppl = math.e**K.mean(cross_entropy)

        return ppl
    def __call__(self, y_true, y_pred):
        cross_entropy = K.sparse_categorical_crossentropy(y_true,
                                                          y_pred,
                                                          from_logits=True)

        cross_entropy *= ((self.n_tokens - 1) / (self.n_original_tokens - 1))

        return cross_entropy
예제 #11
0
 def loss(y_true, y_pred):
     y_true = K.cast(
         y_true, dtype='int32'
     )  # cast needed for some reason, even though the output of the dataset is int32
     t_vector = tf.gather_nd(emb_matrix, y_true)
     p_vector = K.dot(y_pred, emb_matrix)
     wav_term = l * tf.norm(p_vector - t_vector)
     return K.sparse_categorical_crossentropy(y_true, y_pred) + wav_term
예제 #12
0
    def loss(y_true, y_pred):
        y_true = K.cast(y_true, K.floatx())
        mask = K.equal(y_true, mask_value)
        mask = 1 - K.cast(mask, K.floatx())
        y_true = y_true * mask

        loss = K.sparse_categorical_crossentropy(y_true, y_pred) * mask
        return K.sum(loss) / K.sum(mask)
예제 #13
0
def masked_sparse_categorical_crossentropy(label, y_pred):
    # label1 = tf.dtypes.cast(label * tf.less(label, 9), tf.dtypes.int32)
    # mask2 = tf.dtypes.cast(mask1, tf.dtypes.float32)
    loss = K.sparse_categorical_crossentropy(label, y_pred) * tf.dtypes.cast(
        tf.less(label, 9), tf.dtypes.float32)
    loss *= tf.dtypes.cast(tf.size(loss), tf.dtypes.float32)
    loss /= K.sum(tf.dtypes.cast(tf.less(label, 9), tf.dtypes.float32))
    return loss
예제 #14
0
 def compute_loss(self, inputs, mask=None):
     y_true, y_pred = inputs
     y_true = tf.cast(y_true, tf.float32)
     y_true = y_true[:, 1:]
     y_mask = tf.cast(y_true > 0, tf.float32)
     y_pred = y_pred[:, :-1]
     loss = K.sparse_categorical_crossentropy(y_true, y_pred)
     loss = K.sum(loss * y_mask) / K.sum(y_mask)
     return loss
예제 #15
0
def perplexity(y_true, y_pred):
    """
    The perplexity metric. Why isn't this part of Keras yet?!
    https://stackoverflow.com/questions/41881308/how-to-calculate-perplexity-of-rnn-in-tensorflow
    https://github.com/keras-team/keras/issues/8267
    """
    cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred)
    perplexity = K.exp(cross_entropy)
    return perplexity
예제 #16
0
def sparse_categorical_crossentropy(y_true, y_pred):

    if tf.size(y_true) == 0:

        return tf.constant(0.0, dtype=tf.float32)

    loss = backend.sparse_categorical_crossentropy(y_true, y_pred)

    return backend.mean(loss)
예제 #17
0
def loss_function(real, pred):

    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = K.sparse_categorical_crossentropy(real, pred, from_logits=False)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)
예제 #18
0
def masked_perplexity_loss(y_true, y_pred, PAD_token=0):
    """Construct customer masked perplexity loss."""
    mask = K.all(K.equal(y_true, PAD_token),
                 axis=-1)  # Label padding as zero in y_true
    mask = 1 - K.cast(mask, K.floatx())
    nomask = K.sum(mask)
    loss = K.sparse_categorical_crossentropy(
        y_true,
        y_pred) * mask  # Multiply categorical_crossentropy with the mask
    return tf.exp(K.sum(loss) / nomask)
예제 #19
0
파일: generate.py 프로젝트: xbqnl/NLP-model
 def compute_loss(self, inputs, mask=None):
     y_true, y_mask, y_pred = inputs
     y_true = tf.cast(y_true,tf.float32)
     y_mask = tf.cast(y_mask,tf.float32)
     y_true = y_true[:, 1:]  # 目标token_ids
     y_mask = y_mask[:, 1:]  # segment_ids,刚好指示了要预测的部分
     y_pred = y_pred[:, :-1]  # 预测序列,错开一位
     loss = K.sparse_categorical_crossentropy(y_true, y_pred)
     loss = K.sum(loss * y_mask) / K.sum(y_mask)
     return loss
예제 #20
0
 def compute_copy_loss(self, inputs, mask=None):
     _, y_mask, _, y_true, y_pred = inputs
     y_mask = tf.cast(y_mask, y_pred.dtype)
     y_true = tf.cast(y_true, y_pred.dtype)
     y_mask = K.cumsum(y_mask[:, ::-1], axis=1)[:, ::-1]
     y_mask = K.cast(K.greater(y_mask, 0.5), K.floatx())
     y_mask = y_mask[:, 1:]  # mask标记,减少一位
     y_pred = y_pred[:, :-1]  # 预测序列,错开一位
     y_true = y_true[:, :-1]  # 预测序列,错开一位
     loss = K.sparse_categorical_crossentropy(y_true, y_pred)
     loss = K.sum(loss * y_mask) / K.sum(y_mask)
     return loss
예제 #21
0
    def __call__(self, y_true, y_pred):
        y_true_val = y_true[:, :, 0]
        mask = y_true[:, :, 1]

        # masked per-sample means of each loss
        num_items_masked = K.sum(mask, axis=-1) + 1e-6
        masked_cross_entropy = (
            K.sum(mask * K.sparse_categorical_crossentropy(y_true_val, y_pred),
                  axis=-1) / num_items_masked)
        masked_entropy = (
            K.sum(mask * -K.sum(y_pred * K.log(y_pred), axis=-1), axis=-1) /
            num_items_masked)
        return masked_cross_entropy - self.penalty_weight * masked_entropy
예제 #22
0
    def call(self, inputs, **kwargs):
        y_true = inputs[0]
        y_pred = inputs[1]

        y_true = K.cast(y_true, tf.int32)
        blank_mask = tf.not_equal(y_true, K.cast(self.blank_value, tf.int32))

        y_true_초성, y_true_중성, y_true_종성 = JamoDeCompose()(y_true)
        y_pred_초성, y_pred_중성, y_pred_종성 = tf.split(
            y_pred,
            [len(초성) + 1, len(중성) + 1, len(종성) + 1], axis=-1)

        mask = tf.cast(blank_mask, dtype=K.floatx())
        loss_초성 = K.sparse_categorical_crossentropy(y_true_초성,
                                                    y_pred_초성) * mask
        loss_중성 = K.sparse_categorical_crossentropy(y_true_중성,
                                                    y_pred_중성) * mask
        loss_종성 = K.sparse_categorical_crossentropy(y_true_종성,
                                                    y_pred_종성) * mask

        mask = K.sum(mask, axis=1)
        loss_jamo = K.sum(loss_초성 + loss_중성 + loss_종성, axis=1)
        return loss_jamo / mask
    def loss(y_true, y_pred):
        """
        Parameters
        ----------
        y_true : keras tensor
            True values to predict
        y_pred : keras tensor
            Prediction made by the model. It is assumed that this keras tensor includes extra columns to store the abstaining classes.
        """
        base_pred = (1 - mask) * y_pred + K.epsilon()
        base_true = y_true
        base_cost = K.sparse_categorical_crossentropy(base_true, base_pred)
        # abs_pred = K.mean(mask * y_pred, axis=-1)
        abs_pred = K.sum(mask * y_pred, axis=-1)
        # add some small value to prevent NaN when prediction is abstained
        abs_pred = K.clip(abs_pred, K.epsilon(), 1. - K.epsilon())

        # return ((1. - abs_pred) * base_cost - alpha * K.log(1. - abs_pred))
        return K.mean((1. - abs_pred) * base_cost - alpha * K.log(1. - abs_pred))
예제 #24
0
def masked_perplexity(y_true, y_pred):
    """
    Masked version of popular metric for evaluating performance of
    language modelling architectures. It assumes that y_pred has shape
    (batch_size, sequence_length, 2), containing both
      - the original token ids
      - and the mask (0s and 1s, indicating places where
        a word has been replaced).
    both stacked along the last dimension.
    Masked perplexity ignores all but masked words.

    More info: http://cs224d.stanford.edu/lecture_notes/LectureNotes4.pdf
    """
    y_true_value = y_true[:, :, 0]
    mask = y_true[:, :, 1]
    cross_entropy = K.sparse_categorical_crossentropy(y_true_value, y_pred)
    batch_perplexities = K.exp(
        K.sum(mask * cross_entropy, axis=-1) / (K.sum(mask, axis=-1) + 1e-6))
    return K.mean(batch_perplexities)
예제 #25
0
def rpn_class_loss_graph(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.
    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG.
    """

    rpn_match = tf.squeeze(rpn_match, -1)

    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)

    indices = tf.where(K.not_equal(rpn_match, 0))
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)

    loss = K.sparse_categorical_crossentropy(target=anchor_class,
                                             output=rpn_class_logits,
                                             from_logits=True)
    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))

    return loss
def rpn_class_loss_func(rpn_match, rpn_class_logits):
    """
    input_rpn_match: [batch, anchor, 1] 1 = positive, -1 negative, 0 neutral
    rpn_class_ids : [batcn,anchoir, 2] BG/FG
    """
    # Squeeze last dim to simplify
    rpn_match = tf.squeeze(rpn_match, -1)
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = tf.where(K.not_equal(rpn_match, 0))
    # Pick rows that contribute to the loss and filter out the rest.
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)
    # Crossentropy loss
    loss = K.sparse_categorical_crossentropy(target=anchor_class,
                                             output=rpn_class_logits,
                                             from_logits=True)
    loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
    return loss
예제 #27
0
    def loss(y_true, y_pred):
        # Get the ground truth for each word per input text
        # true_features = K.constant(generate_ground_truth(input_x, y_true, vocabulary, if_dict))
        true_features = K.ones_like(input_x)  # TODO: Debugging

        # Get the heatmap generated by interpretation method for each word per input text
        predicted_features = []
        for idx, window_size in enumerate(filter_sizes):
            # use the interpretation method function to generate the heatmap (ex. grad_cam)
            heatmap = grad_cam(input_x, y_true, logits, conv_output[idx])
            word_level_heatmap = word_level_interpretation(heatmap, window_size)
            word_level_heatmap = unify_dim(word_level_heatmap, window_size)
            predicted_features.append(word_level_heatmap)

        # Average the heatmap for the several convolutional filters
        predicted_features = K.sum(predicted_features, axis=0) / len(filter_sizes)

        j_loss, j_acc = jaccard_sim(true_features, predicted_features)

        final_loss = 1e-5 + K.sparse_categorical_crossentropy(y_true, y_pred) + j_loss

        return final_loss
예제 #28
0
def rpn_class_loss_graph(rpn_match, rpn_class_logits):
    """RPN anchor classifier loss.

    rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive,
               -1=negative, 0=neutral anchor.
    rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG.
    """
    # Squeeze last dim to simplify
    rpn_match = tf.squeeze(rpn_match, -1)
    # Get anchor classes. Convert the -1/+1 match to 0/1 values.
    anchor_class = KB.cast(KB.equal(rpn_match, 1), tf.int32)
    # Positive and Negative anchors contribute to the loss,
    # but neutral anchors (match value = 0) don't.
    indices = tf.where(KB.not_equal(rpn_match, 0))
    # Pick rows that contribute to the loss and filter out the rest.
    rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
    anchor_class = tf.gather_nd(anchor_class, indices)
    # Cross entropy loss
    loss = KB.sparse_categorical_crossentropy(target=anchor_class,
                                             output=rpn_class_logits,
                                             from_logits=True)
    loss = KB.switch(tf.size(loss) > 0, tf.math.reduce_mean(loss), tf.constant(0.0))
    return loss
def visualize_layer(model,
                    layer_idx,
                    loss_as_exclusive=False,
                    output_dim=(701, 58),
                    filter_range=(0, None),
                    step=1.,
                    epochs=200,
                    upsampling_steps=9,
                    upsampling_factor=1.2):
    """Visualizes the most relevant filters of one conv-layer in a certain model.
    https://github.com/keras-team/keras/blob/master/examples/conv_filter_visualization.py
    # Arguments
        model: The model containing layer_name.
        layer_idx: The index of the layer to be visualized.
        loss_as_exclusive: If True, loss also minimizes activations of non-test filters in the layer.
        step: step size for gradient ascent.
        epochs: Number of iterations for gradient ascent.
        upsampling_steps: Number of upscaling steps. Currently not working.
        upsampling_factor: Factor to which to slowly upgrade the timeseries towards output_dim. Currently not working.
        output_dim: [n_timesteps, n_channels] The output image dimensions.
        filter_range: [lower, upper]
                      Determines the to be computed filter numbers.
                      If the second value is `None`, the last filter will be inferred as the upper boundary.
    """

    output_layer = model.layers[layer_idx]

    max_filts = len(output_layer.get_weights()[1])
    max_filts = max_filts if filter_range[1] is None else min(
        max_filts, filter_range[1])

    # iterate through each filter in this layer and generate its activation-maximization time series
    maximizing_activations = []
    for f_ix in range(filter_range[0], max_filts):
        s_time = time.time()
        if loss_as_exclusive:
            model_output = output_layer.output
        else:
            if isinstance(output_layer, tf.keras.layers.Conv1D):
                model_output = output_layer.output[:, :, f_ix]
            else:
                model_output = output_layer.output[:, f_ix]
        max_model = tf.keras.Model(model.input, model_output)

        # we start with some random noise that is smaller than the expected output.
        n_samples_out = output_dim[0]
        n_samples_intermediate = int(n_samples_out /
                                     (upsampling_factor**upsampling_steps))
        test_dat = tf.convert_to_tensor(
            np.random.random((1, n_samples_intermediate,
                              output_dim[-1])).astype(np.float32))

        for up in reversed(range(upsampling_steps)):
            # Run gradient ascent
            for _ in range(epochs):
                with tf.GradientTape() as tape:
                    tape.watch(test_dat)
                    layer_act = max_model(test_dat)
                    if not loss_as_exclusive:
                        loss_value = K.mean(layer_act)
                    else:
                        from_logits = output_layer.activation != tf.keras.activations.softmax
                        loss_value = K.sparse_categorical_crossentropy(
                            f_ix,
                            K.mean(layer_act, axis=-2),
                            from_logits=from_logits)[0]

                gradients = tape.gradient(loss_value, test_dat)
                # normalization trick: we normalize the gradient
                gradients = normalize(gradients)
                test_dat += gradients * step

                # some filters get stuck to 0, re-init with random data.
                # These will probably end up being low-loss activations.
                if loss_value <= K.epsilon():
                    test_dat = tf.convert_to_tensor(
                        np.random.random((1, n_samples_intermediate,
                                          output_dim[-1])).astype(np.float32))

            # Now upsample the timeseries
            n_samples_intermediate = int(n_samples_intermediate /
                                         (upsampling_factor**up))
            test_dat = upsample_timeseries(test_dat,
                                           n_samples_intermediate,
                                           axis=1)

        print('Costs of filter: {:5.0f} ( {:4.2f}s )'.format(
            loss_value.numpy(),
            time.time() - s_time))
        test_dat = upsample_timeseries(test_dat, n_samples_out, axis=1)
        maximizing_activations.append(
            (test_dat[0].numpy(), loss_value.numpy()))

    print('{} filters processed.'.format(len(maximizing_activations)))
    return maximizing_activations
예제 #30
0
def yolov2_loss(detector_mask, matching_true_boxes, class_one_hot, true_boxes_grid, y_pred, info=False):
	"""
	Calculate YOLO V2 loss from prediction (y_pred) and ground truth tensors (detector_mask,
	matching_true_boxes, class_one_hot, true_boxes_grid,)

	Parameters
	----------
	- detector_mask : tensor, shape (batch, size, GRID_W, GRID_H, anchors_count, 1)
		1 if bounding box detected by grid cell, else 0
	- matching_true_boxes : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, 5)
		Contains adjusted coords of bounding box in YOLO format
	- class_one_hot : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, class_count)
		One hot representation of bounding box label
	- true_boxes_grid : annotations : tensor (shape : batch_size, max annot, 5)
		true_boxes_grid format : x, y, w, h, c (coords unit : grid cell)
	- y_pred : prediction from model. tensor (shape : batch_size, GRID_W, GRID_H, anchors count, (5 + labels count)
	- info : boolean. True to get some infox about loss value
	
	Returns
	-------
	- loss : scalar
	- sub_loss : sub loss list : coords loss, class loss and conf loss : scalar

	"""

	# anchors tensor
	anchors = np.array(ANCHORS)
	anchors = anchors.reshape(len(anchors)//2, 2)

	# grid coords tensor ---> GRID_W * GRID*H grid
	# tf.tile(input, multiples, name=None)
	# left up corner coord , total GRID_W * GRID*H * anchor_count
	coord_x = tf.cast(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)), tf.float32)
	coord_y = tf.transpose(coord_x, (0,2,1,3,4))
	coords = tf.tile(tf.concat([coord_x, coord_y], -1), [y_pred.shape[0], 1, 1, 5, 1])

	# coordinate loss
	# box regression
	# bx = (sigmoid(tx) + cx ) /W
	# bw = pw * e^tw
	# pw is anchors W, cx is left up of coord , tx and tw are pred offset value, W is feature map width
	# in this case, we don't multipy width, because the the coord in matching value also is during 0~16
	pred_xy = K.sigmoid(y_pred[:,:,:,:,0:2]) # adjust center coords between 0 and 1
	pred_xy = (pred_xy + coords) # add cell coord for comparaison with ground truth. New coords in grid cell unit
	pred_wh = K.exp(y_pred[:,:,:,:,2:4]) * anchors # adjust width and height for comparaison with ground truth. New coords in grid cell unit
	# pred_wh = (pred_wh * anchors) # unit: grid cell
	nb_detector_mask = K.sum(tf.cast(detector_mask>0.0, tf.float32))
	xy_loss = LAMBDA_COORD*K.sum(detector_mask*K.square(matching_true_boxes[...,:2] - pred_xy))/(nb_detector_mask + 1e-6) # Non /2
	wh_loss = LAMBDA_COORD * K.sum(detector_mask * K.square(K.sqrt(matching_true_boxes[...,2:4])-
		K.sqrt(pred_wh))) / (nb_detector_mask + 1e-6)

	coord_loss = xy_loss + wh_loss

	# class loss
	pred_box_class = y_pred[...,5:]
	true_box_class = tf.argmax(class_one_hot, -1)
	# class_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
	class_loss = K.sparse_categorical_crossentropy(target=true_box_class, output=pred_box_class, from_logits=True)
	class_loss = K.expand_dims(class_loss, -1)*detector_mask
	class_loss = LAMBDA_CLASS * K.sum(class_loss) / (nb_detector_mask + 1e-6)

	# confidence loss
	pred_conf = K.sigmoid(y_pred[..., 4:5]) # only two class : object or background
	# for each detector : iou between prediction and ground truth
	x1 = matching_true_boxes[...,0]
	y1 = matching_true_boxes[...,1]
	w1 = matching_true_boxes[...,2]
	h1 = matching_true_boxes[...,3]
	x2 = pred_xy[...,0]
	y2 = pred_xy[...,1]
	w2 = pred_wh[...,0]
	h2 = pred_wh[...,1]
	ious = iou(x1, y1, w1, h1, x2, y2, w2, h2)
	ious = K.expand_dims(ious, -1)

	# for each detector: best ious between pred and true_boxes
	pred_xy = K.expand_dims(pred_xy, 4)
	pred_wh = K.expand_dims(pred_wh, 4)
	pred_wh_half = pred_wh / 2.
	pred_mins = pred_xy - pred_wh_half
	pred_maxes = pred_xy + pred_wh_half
	true_boxe_shape = K.int_shape(true_boxes_grid)
	true_boxes_grid = K.reshape(true_boxes_grid, [true_boxe_shape[0], 1, 1, 1, true_boxe_shape[1], true_boxe_shape[2]])
	true_xy = true_boxes_grid[...,0:2]
	true_wh = true_boxes_grid[...,2:4]
	true_wh_half = true_wh * 0.5
	true_mins = true_xy - true_wh_half
	true_maxes = true_xy + true_wh_half
	intersect_mins = K.maximum(pred_mins, true_mins) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2 
	intersect_maxes = K.minimum(pred_maxes, true_maxes) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2
	intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	pred_areas = pred_wh[..., 0] * pred_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, 1, 1
	true_areas = true_wh[..., 0] * true_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	union_areas = pred_areas + true_areas - intersect_areas
	iou_scores = intersect_areas / union_areas # shape : m, GRID_W, GRID_H, BOX, max_annot, 1
	best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
	best_ious = K.expand_dims(best_ious) # shape : m, GRID_W, GRID_H, BOX, 1
	
	# no object confidence loss
	no_object_detection = K.cast(best_ious < 0.6, K.dtype(best_ious)) 
	noobj_mask = no_object_detection * (1 - detector_mask)
	nb_noobj_mask  = K.sum(tf.cast(noobj_mask  > 0.0, tf.float32))
	
	noobject_loss =  LAMBDA_NOOBJECT * K.sum(noobj_mask * K.square(-pred_conf)) / (nb_noobj_mask + 1e-6)
	# object confidence loss
	object_loss = LAMBDA_OBJECT * K.sum(detector_mask * K.square(ious - pred_conf)) / (nb_detector_mask + 1e-6)
	# total confidence loss
	conf_loss = noobject_loss + object_loss
	
	# total loss
	loss = conf_loss + class_loss + coord_loss
	sub_loss = [conf_loss, class_loss, coord_loss] 

	if info:
		print('conf_loss   : {:.4f}'.format(conf_loss))
		print('class_loss  : {:.4f}'.format(class_loss))
		print('coord_loss  : {:.4f}'.format(coord_loss))
		print('    xy_loss : {:.4f}'.format(xy_loss))
		print('    wh_loss : {:.4f}'.format(wh_loss))
		print('--------------------')
		print('total loss  : {:.4f}'.format(loss)) 

		# display masks for each anchors
		for i in range(len(anchors)):
			f, (ax1, ax2, ax3) = plt.subplot(1,3,figsize=(10,5))
			# https://blog.csdn.net/Strive_For_Future/article/details/115052014?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522161883865316780262527067%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=161883865316780262527067&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_v2~rank_v29-2-115052014.first_rank_v2_pc_rank_v29&utm_term=f.tight_layout&spm=1018.2226.3001.4187
			f.tight_layout() 
			f.suptitle("MASKS FOR ANCHOR {} :".format(anchors[i,...]))

			ax1.matshow((K.sum(detector_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax1.set_title('detector_mask, count : {}'.format(K.sum(tf.cast(detector_mask[0,:,:,i]  > 0., tf.int32))))
			ax1.xaxis.set_ticks_position('bottom')
			
			ax2.matshow((K.sum(no_object_detection[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax2.set_title('no_object_detection mask')
			ax2.xaxis.set_ticks_position('bottom')
			
			ax3.matshow((K.sum(noobj_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1)
			ax3.set_title('noobj_mask')
			ax3.xaxis.set_ticks_position('bottom')
			  
	return loss, sub_loss