Esempio n. 1
0
 def call(self, x, reconstruction=False):
     self.reconstruction = reconstruction
     x_t = x[:, :self.d_dim, :]
     d = x[:, 2*self.d_dim:3*self.d_dim, :]
     if reconstruction:
         output_dim = self.time_stamp
         m = x[:, 3*self.d_dim:, :]
         ref_t = K.tile(d[:, :, None, :], (1, 1, output_dim, 1))
     else:
         m = x[:, self.d_dim: 2*self.d_dim, :]
         ref_t = np.linspace(0, self.hours_look_ahead, self.ref_points)
         output_dim = self.ref_points
         ref_t.shape = (1, ref_t.shape[0])
     #x_t = x_t*m
     d = K.tile(d[:, :, :, None], (1, 1, 1, output_dim))
     mask = K.tile(m[:, :, :, None], (1, 1, 1, output_dim))
     x_t = K.tile(x_t[:, :, :, None], (1, 1, 1, output_dim))
     norm = (d - ref_t)*(d - ref_t)
     a = K.ones((self.d_dim, self.time_stamp, output_dim))
     pos_kernel = K.log(1 + K.exp(self.kernel))
     alpha = a*pos_kernel[:, np.newaxis, np.newaxis]
     w = K.logsumexp(-alpha*norm + K.log(mask), axis=2)
     w1 = K.tile(w[:, :, None, :], (1, 1, self.time_stamp, 1))
     w1 = K.exp(-alpha*norm + K.log(mask) - w1)
     y = K.sum(w1*x_t, axis=2)
     if reconstruction:
         rep1 = tf.concat([y, w], 1)
     else:
         w_t = K.logsumexp(-10.0*alpha*norm + K.log(mask),
                           axis=2)  # kappa = 10
         w_t = K.tile(w_t[:, :, None, :], (1, 1, self.time_stamp, 1))
         w_t = K.exp(-10.0*alpha*norm + K.log(mask) - w_t)
         y_trans = K.sum(w_t*x_t, axis=2)
         rep1 = tf.concat([y, w, y_trans], 1)
     return rep1
Esempio n. 2
0
def sparse_multilabel_categorical_crossentropy(y_true,
                                               y_pred,
                                               mask_zero=False):
    """稀疏版多标签分类的交叉熵
    说明:
        1. y_true.shape=[..., num_positive],
           y_pred.shape=[..., num_classes];
        2. 请保证y_pred的值域是全体实数,换言之一般情况下
           y_pred不用加激活函数,尤其是不能加sigmoid或者
           softmax;
        3. 预测阶段则输出y_pred大于0的类;
        4. 详情请看:https://kexue.fm/archives/7359 。
    """
    zeros = K.zeros_like(y_pred[..., :1])
    y_pred = K.concatenate([y_pred, zeros], axis=-1)
    if mask_zero:
        infs = zeros + K.infinity()
        y_pred = K.concatenate([infs, y_pred[..., 1:]], axis=-1)
    y_pos_2 = batch_gather(y_pred, y_true)
    y_pos_1 = K.concatenate([y_pos_2, zeros], axis=-1)
    if mask_zero:
        y_pred = K.concatenate([-infs, y_pred[..., 1:]], axis=-1)
        y_pos_2 = batch_gather(y_pred, y_true)
    pos_loss = K.logsumexp(-y_pos_1, axis=-1)
    all_loss = K.logsumexp(y_pred, axis=-1)
    aux_loss = K.logsumexp(y_pos_2, axis=-1) - all_loss
    aux_loss = K.clip(1 - K.exp(aux_loss), K.epsilon(), 1)
    neg_loss = all_loss + K.log(aux_loss)
    return pos_loss + neg_loss
def weighted_sum(first, second, sigma, first_threshold=-np.inf, second_threshold=np.inf):
    first_normalized = first - kb.logsumexp(first, axis=-1)[...,None]
    second_normalized = second - kb.logsumexp(second, axis=-1)[...,None]
    # sigma.shape = (1,), first_normalized.shape = (T1, ..., Tm, d)
    # logit_probs.shape = (T1, ..., Tm, d)
    logit_probs = first_normalized * sigma + second_normalized * (1.0 - sigma)
    # logit_probs = kb.batch_dot(first_normalized, sigma) + kb.batch_dot(second_normalized, 1.0 - sigma)
    first_mask = (first_normalized < first_threshold).nonzero()
    logit_probs = kb.T.set_subtensor(logit_probs[first_mask], -np.inf)
    second_mask = (second_normalized < second_threshold).nonzero()
    logit_probs = kb.T.set_subtensor(logit_probs[second_mask], -np.inf)
    return logit_probs
def multilabel_categorical_crossentropy(y_true, y_pred):
	"""多标签分类的交叉熵
	说明:y_true和y_pred的shape一致,y_true的元素非0即1,
		 1表示对应的类为目标类,0表示对应的类为非目标类。
	"""
	y_pred = (1 - 2 * y_true) * y_pred
	y_pred_neg = y_pred - y_true * 1e12
	y_pred_pos = y_pred - (1 - y_true) * 1e12
	zeros = K.zeros_like(y_pred[..., :1])
	y_pred_neg = K.concatenate([y_pred_neg, zeros], axis=-1)
	y_pred_pos = K.concatenate([y_pred_pos, zeros], axis=-1)
	neg_loss = K.logsumexp(y_pred_neg, axis=-1)
	pos_loss = K.logsumexp(y_pred_pos, axis=-1)
	return neg_loss + pos_loss
Esempio n. 5
0
 def step(self, input_energy_t, states, return_logZ=True):
     prev_target_val, i, chain_energy = states[:3]
     t = K.cast(i[0, 0], dtype='int32')
     if len(states) > 3:
         if K.backend() == 'theano':
             m = states[3][:, t:(t + 2)]
         else:
             m = K.tf.slice(states[3], [0, t], [-1, 2])
         input_energy_t = input_energy_t * K.expand_dims(m[:, 0])
         chain_energy = chain_energy * K.expand_dims(
             K.expand_dims(
                 m[:, 0] * m[:, 1]))  # (1, F, F)*(B, 1, 1) -> (B, F, F)
     if return_logZ:
         energy = chain_energy + K.expand_dims(
             input_energy_t - prev_target_val,
             2)  # shapes: (1, B, F) + (B, F, 1) -> (B, F, F)
         new_target_val = K.logsumexp(-energy, 1)  # shapes: (B, F)
         return new_target_val, [new_target_val, i + 1]
     else:
         energy = chain_energy + K.expand_dims(
             input_energy_t + prev_target_val, 2)
         min_energy = K.min(energy, 1)
         argmin_table = K.cast(K.argmin(energy, 1),
                               K.floatx())  # cast for tf-version `K.rnn`
         return argmin_table, [min_energy, i + 1]
Esempio n. 6
0
 def loss(self, y_true, y_pred):
     """Negative log pdf. Used logsum trick for numerical stability"""
     mixture_weights, mu, sigma, = self.split_param_types(y_pred)
     norm = 1. / (np.sqrt(2. * np.pi) * sigma)
     exponent = -(K.square(y_true - mu) / (2. * K.square(sigma)) -
                  K.log(mixture_weights) - K.log(norm))
     return -K.logsumexp(exponent, axis=-1)
    def _gmd_log_likelihood(y_true, y_pred):
        """Log-likelihood loss for Gaussian Mixture Densities.
        Currently only supports tensorflow backend.
        Args:
            y_true (tensor): A tensor of shape (samples, c) with the target values.
            y_pred (tensor): Tensor of shape (samples, m*(c + 2)), where m is the number of gaussians.
                The second dimension encodes the following parameters (in that order):
                1) m log-priors (outputs of a log-softmax activation layer)
                2) m variances (outputs of a ShiftedELU activation layer)
                3) m*c means (outputs of a linear activation layer)
        Returns:
            Average negative log-likelihood of each sample.
        """
        splits = [m, m, m * c]

        # Get GMD parameters
        # Parameters are concatenated along the second axis
        # tf.split expect sizes, not locations
        log_prior, sigma_sq, mu = K.tf.split(y_pred,
                                             num_or_size_splits=splits,
                                             axis=1)

        y_true = K.expand_dims(y_true, axis=2)
        mu = K.reshape(mu, [-1, c, m])  # -1 is for the sample dimension
        dist = K.sum(K.square(y_true - mu), axis=1)

        exponent = log_prior - c * HALF_LOG_TWOPI - (
            c / 2.0) * K.log(sigma_sq) - (1 / 2.0) * dist / sigma_sq

        return -K.logsumexp(exponent, axis=1)
Esempio n. 8
0
 def step(self, input_energy_t, states, return_logZ=True):
     # not in the following  `prev_target_val` has shape = (B, F)
     # where B = batch_size, F = output feature dim
     # Note: `i` is of float32, due to the behavior of `K.rnn`
     prev_target_val, i, chain_energy = states[:3]
     t = K.cast(i[0, 0], dtype='int32')
     if len(states) > 3:
         if K.backend() == 'theano':
             m = states[3][:, t:(t + 2)]
         else:
             m = K.tf.slice(states[3], [0, t], [-1, 2])
         input_energy_t = input_energy_t * K.expand_dims(m[:, 0])
         chain_energy = chain_energy * K.expand_dims(
             K.expand_dims(
                 m[:, 0] * m[:, 1]))  # (1, F, F)*(B, 1, 1) -> (B, F, F)
     if return_logZ:
         energy = chain_energy + K.expand_dims(
             input_energy_t - prev_target_val,
             2)  # shapes: (1, B, F) + (B, F, 1) -> (B, F, F)
         new_target_val = K.logsumexp(-energy, 1)  # shapes: (B, F)
         return new_target_val, [new_target_val, i + 1]
     else:
         energy = chain_energy + K.expand_dims(
             input_energy_t + prev_target_val, 2)
         min_energy = K.min(energy, 1)
         argmin_table = K.cast(K.argmin(energy, 1),
                               K.floatx())  # cast for tf-version `K.rnn`
         return argmin_table, [min_energy, i + 1]
Esempio n. 9
0
    def call(self, x):
        # Construct the pairwise distance matrix
        D = pairwise_dists(x, x, epsilon=self.epsilon)
        J = []
        # We need to loop through all positive pairs. Since we know
        # the structure of the batch, this is not too difficult.
        for c in range(self.p):  # Loop through classes
            for i in range(self.k):
                for j in range(i + 1, self.k):
                    row_i = c * self.k + i
                    row_j = c * self.k + j
                    rows = K.gather(
                        D, K.constant([row_i, row_j], dtype=K.tf.int32))
                    rows = K.concatenate([
                        K.tf.slice(rows, begin=[0, 0], size=[2, c * self.k]),
                        K.tf.slice(rows,
                                   begin=[0, (c + 1) * self.k],
                                   size=[2, (self.p - c - 1) * self.k])
                    ],
                                         axis=1)
                    rows = K.flatten(rows)
                    J.append(K.logsumexp(self.margin - rows) + D[row_i, row_j])

        J = K.stack(J)
        return K.mean(K.square(K.relu(J))) / 2.0
Esempio n. 10
0
def discriminate_real(y_output, batch_size=batch_size):
    # logD(x) = logZ(x) - log(Z(x) + 1)  where Z(x) = sum_{k=1}^K exp(l_k(x))
    log_zx = K.logsumexp(y_output, axis=1)
    log_dx = log_zx - K.softplus(log_zx)
    dx = K.sum(K.exp(log_dx)) / batch_size
    loss = -K.sum(log_dx) / batch_size
    return loss, dx
Esempio n. 11
0
 def test_logsumexp(self, x_np, axis, keepdims, K):
     '''
     Check if K.logsumexp works properly for values close to one.
     '''
     x = K.variable(x_np)
     assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)),
                     np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)),
                     rtol=1e-5)
Esempio n. 12
0
 def test_logsumexp_optim(self, K):
     '''
     Check if optimization works.
     '''
     x_np = np.array([1e+4, 1e-4])
     assert_allclose(K.eval(K.logsumexp(K.variable(x_np), axis=0)),
                     1e4,
                     rtol=1e-5)
Esempio n. 13
0
 def loss(self, y_true, y_pred): # 目标y_pred需要是one hot形式
     mask = 1-y_true[:,1:,-1] if self.ignore_last_label else None
     y_true,y_pred = y_true[:,:,:self.num_labels],y_pred[:,:,:self.num_labels]
     init_states = [y_pred[:,0]] # 初始状态
     log_norm,_,_ = K.rnn(self.log_norm_step, y_pred[:,1:], init_states, mask=mask) # 计算Z向量(对数)
     log_norm = K.logsumexp(log_norm, 1, keepdims=True) # 计算Z(对数)
     path_score = self.path_score(y_pred, y_true) # 计算分子(对数)
     return log_norm - path_score # 即log(分子/分母)
Esempio n. 14
0
 def test_logsumexp(self, x_np, axis, keepdims, K):
     '''
     Check if K.logsumexp works properly for values close to one.
     '''
     x = K.variable(x_np)
     assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)),
                     np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)),
                     rtol=1e-5)
Esempio n. 15
0
 def test_logsumexp_optim(self, K):
     '''
     Check if optimization works.
     '''
     x_np = np.array([1e+4, 1e-4])
     assert_allclose(K.eval(K.logsumexp(K.variable(x_np), axis=0)),
                     1e4,
                     rtol=1e-5)
Esempio n. 16
0
def free_energy0(x, U, mask=None):
    """
    Free energy without boundary potential handling.
    """
    initial_states = [x[:, 0, :]]
    last_alpha, _ = _forward(x, lambda B: [K.logsumexp(B, axis=1)],
                             initial_states, U, mask)
    return last_alpha[:, 0]
Esempio n. 17
0
 def get_loss(self, args):
     logits, action, weights = args
     action = tf.reshape(action, [-1])
     mask = tf.one_hot(action, depth=self.action_size, dtype=tf.float32)
     logpi = tf.reduce_sum(
         (logits - tf.transpose([K.logsumexp(logits, axis=-1)])) * mask,
         axis=-1)
     logpi_w = tf.transpose([logpi]) * weights
     return logpi_w
Esempio n. 18
0
 def log_norm_step(self, inputs, states):
     """递归计算归一化因子
     要点:1、递归计算;2、用logsumexp避免溢出。
     技巧:通过expand_dims来对齐张量。
     """
     states = K.expand_dims(states[0], 2)  # (batch_size, output_dim, 1)
     trans = K.expand_dims(self.trans, 0)  # (1, output_dim, output_dim)
     output = K.logsumexp(states + trans, 1)  # (batch_size, output_dim)
     return output + inputs, [output + inputs]
def consensus_categorical_crossentropy(y_true, y_pred):
    # y_pred = tf.nn.softmax(y_pred, axis=-1)
    y_pred /= tf.reduce_sum(y_pred, len(y_pred.get_shape()) - 1, True)
    # print y_pred.shape
    y_pred = K.clip(y_pred, K.epsilon(), 1.0 - K.epsilon())
    # print y_true
    # print K.sum(y_true * (y_pred - K.logsumexp(y_pred)), axis=-1)
    return -tf.reduce_sum(y_true * (y_pred - K.logsumexp(y_pred)),
                          len(y_pred.get_shape()) - 1)
Esempio n. 20
0
 def log_norm_step(self, inputs, states):
     """递归计算归一化因子
     要点:1、递归计算;2、用logsumexp避免溢出。
     技巧:通过expand_dims来对齐张量。原本是先exp之后矩阵乘法运算的,为了防止溢出,可以先矩阵相加再做exp,
     再做的exp使用logsumexp完成有效防止溢出
     """
     states = K.expand_dims(states[0], 2)  # (batch_size, output_dim, 1)
     trans = K.expand_dims(self.trans, 0)  # (1, output_dim, output_dim)
     output = K.logsumexp(states + trans, 1)  # (batch_size, output_dim)
     return output + inputs, [output + inputs]
Esempio n. 21
0
 def call(self, logits):
     norm_logits = logits - K.tile(K.logsumexp(logits, axis=-1, keepdims=True),
                                   (1, K.shape(logits)[1]))
     categorical = K.softmax(logits)
     kl = -K.sum(categorical * (norm_logits - K.log(data['pis'])), axis=-1)
     ll = K.transpose(K.stack([mode.log_prob(self._input) for mode in modes]))
     ll = K.sum(categorical * ll, axis=-1)
     elbo = ll - kl
     self.add_loss(-elbo, inputs=logits)
     return logits
 def euclidean_distance(self, args):
     a, b = args
     N, D = K.shape(a)[0], K.shape(a)[1]
     M = K.shape(b)[0]
     a = K.expand_dims(a, axis=1)
     b = K.expand_dims(b, axis=0)
     a = K.tile(a, [1, M, 1])
     b = K.tile(b, [N, 1, 1])
     dist = K.mean(K.square(a - b), axis=2)
     return -dist - K.logsumexp(-dist)  #tf.nn.log_softmax(-dist)
Esempio n. 23
0
 def _get_weights(self):
     log_likelihood = -self.nll
     log_p = K.sum([q.prior.log_prob(q.samples) for q in self.latents],
                   axis=0)
     log_q = K.sum([q.log_prob(q.samples) for q in self.latents], axis=0)
     log_weights = log_likelihood + log_p - log_q
     log_weights -= K.logsumexp(log_weights, axis=-1, keepdims=True)
     weights_unnormalized = K.exp(log_weights)
     return weights_unnormalized / K.sum(
         weights_unnormalized, axis=-1, keepdims=True)
Esempio n. 24
0
def entropy_estimator_kl(x, var):
    # KL-based upper bound on entropy of mixture of Gaussians with covariance matrix var * I
    #  see Kolchinsky and Tracey, Estimating Mixture Entropy with Pairwise Distances, Entropy, 2017. Section 4.
    #  and Kolchinsky and Tracey, Nonlinear Information Bottleneck, 2017. Eq. 10
    dims, N = get_shape(x)
    dists = Kget_dists(x)
    dists2 = dists / (2 * var)
    normconst = (dims / 2.0) * K.log(2 * np.pi * var)
    lprobs = K.logsumexp(-dists2, axis=1) - K.log(N) - normconst
    h = -K.mean(lprobs)
    return dims / 2 + h
Esempio n. 25
0
def multilabel_categorical_crossentropy(y_true, y_pred):
    """多标签分类的交叉熵
    说明:
        1. y_true和y_pred的shape一致,y_true的元素非0即1,
           1表示对应的类为目标类,0表示对应的类为非目标类;
        2. 请保证y_pred的值域是全体实数,换言之一般情况下
           y_pred不用加激活函数,尤其是不能加sigmoid或者
           softmax;
        3. 预测阶段则输出y_pred大于0的类;
        4. 详情请看:https://kexue.fm/archives/7359 。
    """
    y_pred = (1 - 2 * y_true) * y_pred
    y_neg = y_pred - y_true * K.infinity()
    y_pos = y_pred - (1 - y_true) * K.infinity()
    zeros = K.zeros_like(y_pred[..., :1])
    y_neg = K.concatenate([y_neg, zeros], axis=-1)
    y_pos = K.concatenate([y_pos, zeros], axis=-1)
    neg_loss = K.logsumexp(y_neg, axis=-1)
    pos_loss = K.logsumexp(y_pos, axis=-1)
    return neg_loss + pos_loss
Esempio n. 26
0
def entropy_upper(data, noise_variance):
    pairwise_dists = get_dists_backend(data)
    pairwise_dists /= (2 * noise_variance)

    N = K.cast(K.shape(data)[0], K.floatx())
    dims = K.cast(K.shape(data)[1], K.floatx())
    normconst = (dims / 2.0) * K.log(2 * np.pi * noise_variance)

    term1 = K.logsumexp(-pairwise_dists, axis=1) - K.log(N) - normconst

    return -K.mean(term1) + dims / 2
Esempio n. 27
0
    def get_mdn_coef(output):
        # first column is the batch dimension
        assert output.shape[1] % 3 == 0
        num_components = int(int(output.shape[1]) / 3)

        logmix = output[:, :num_components]
        mean = output[:, num_components:2 * num_components]
        logstd = output[:, 2 * num_components:]

        logmix = logmix - K.logsumexp(logmix, axis=1, keepdims=True)

        return logmix, mean, logstd
        def for_each_batch(args):
            y_true, y_pred = args

            # y_pred_ = tf.boolean_mask(y_pred, tf.not_equal(y_true, -1))
            # y_true_ = tf.boolean_mask(y_true, tf.not_equal(y_true, -1))
            match = tf.cast(tf.equal(comb, y_true), tf.float32)
            num_matches_needed = len(
                tf.boolean_mask(y_true, tf.not_equal(y_true, -1)))
            y_true_combs = tf.boolean_mask(
                comb,
                K.sum(match, axis=1) == num_matches_needed)
            # yp = K.sum(K.log(y_true_*y_pred_ + (1-y_true_)*(1-y_pred_)))
            yp = K.logsumexp(
                K.sum(-K.binary_crossentropy(y_true_combs, y_pred), axis=1))

            # certain_combs = tf.numpy_function(lambda x: np.unique(x, axis=0), [tf.boolean_mask(self.comb, tf.not_equal(y_true, -1), axis=1)], tf.float32)
            # certain_combs = tf.Print(certain_combs, [certain_combs], 'Combs ')
            # yp -= K.logsumexp(K.sum(K.log(y_pred*self.comb + (1-y_pred)*(1-self.comb)), axis=1))
            yp -= K.logsumexp(
                K.sum(-K.binary_crossentropy(self.comb, y_pred), axis=1))
            return yp
Esempio n. 29
0
 def log_norm_step(self, inputs, states):
     """递归计算归一化因子
     要点:1、递归计算;2、用logsumexp避免溢出。
     技巧:通过expand_dims来对齐张量。
     """
     inputs, mask = inputs[:, :-1], inputs[:, -1:]
     states = K.expand_dims(states[0], 2)  # (batch_size, output_dim, 1)
     trans = K.expand_dims(self.trans, 0)  # (1, output_dim, output_dim)
     outputs = K.logsumexp(states + trans, 1)  # (batch_size, output_dim)
     outputs = outputs + inputs
     outputs = mask * outputs + (1 - mask) * states[:, :, 0]
     return outputs, [outputs]
Esempio n. 30
0
 def loss(self, y_true, y_pred):  # 目标y_pred需要是one hot形式
     if self.ignore_last_label:
         mask = 1 - y_true[:, :, -1:]
     else:
         mask = K.ones_like(y_pred[:, :, :1])
     y_true, y_pred = y_true[:, :, :self.num_labels], y_pred[:, :, :self.num_labels]
     path_score = self.path_score(y_pred, y_true)  # 计算分子(对数)
     init_states = [y_pred[:, 0]]  # 初始状态
     y_pred = K.concatenate([y_pred, mask])
     log_norm, _, _ = K.rnn(self.log_norm_step, y_pred[:, 1:], init_states)  # 计算Z向量(对数)
     log_norm = K.logsumexp(log_norm, 1, keepdims=True)  # 计算Z(对数)
     return log_norm - path_score  # 即log(分子/分母)
Esempio n. 31
0
 def log_norm_pre(self, inputs, states):
     '''
     expand previous states and inputs, sum with trans
     :param inputs: (batch_size, num_label), current word emission scores
     :param states: (batch_size, num_label), all paths  score of previous word
     :return:
     '''
     states = K.expand_dims(states[0], 2)
     inputs = K.expand_dims(inputs, 1)
     trans = K.expand_dims(self.trans, 0)
     scores = states + trans + inputs
     output = K.logsumexp(scores, 1)
     return output, [output]
Esempio n. 32
0
 def call(self, inputs):
     inputs, labels = inputs  # 以“预测值+目标(one hot)”为输入
     mask = 1 - labels[:, 1:, -1] if self.ignore_last_label else None
     inputs, labels = inputs[:, :, :self.num_labels], labels[:, :, :self.
                                                             num_labels]
     init_states = [inputs[:, 0]]  # 初始状态
     log_norm, _, _ = K.rnn(self.log_norm_step,
                            inputs[:, 1:],
                            init_states,
                            mask=mask)  # 计算Z向量(对数)
     log_norm = K.logsumexp(log_norm, 1, keepdims=True)  # 计算Z(对数)
     path_score = self.path_score(inputs, labels)  # 计算分子(对数)
     return log_norm - path_score  # 即log(分子/分母)
def sparse_amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35):
    y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1)
    y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, y_true], 1)
    y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来
    y_true_pred = K.expand_dims(y_true_pred, 1)
    y_true_pred_margin = y_true_pred - margin # 减去margin
    _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数
    _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1]
    logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失
    logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred)
    return - y_true_pred_margin * scale + logZ
def sparse_simpler_asoftmax_loss(y_true, y_pred, scale=30):
    y_true = K.expand_dims(y_true[:, 0], 1) # 保证y_true的shape=(None, 1)
    y_true = K.cast(y_true, 'int32') # 保证y_true的dtype=int32
    batch_idxs = K.arange(0, K.shape(y_true)[0])
    batch_idxs = K.expand_dims(batch_idxs, 1)
    idxs = K.concatenate([batch_idxs, y_true], 1)
    y_true_pred = K.tf.gather_nd(y_pred, idxs) # 目标特征,用tf.gather_nd提取出来
    y_true_pred = K.expand_dims(y_true_pred, 1)
    # 用到了四倍角公式进行展开
    y_true_pred_margin = 1 - 8 * K.square(y_true_pred) + 8 * K.square(K.square(y_true_pred))
    # 下面等效于min(y_true_pred, y_true_pred_margin)
    y_true_pred_margin = y_true_pred_margin - K.relu(y_true_pred_margin - y_true_pred)
    _Z = K.concatenate([y_pred, y_true_pred_margin], 1) # 为计算配分函数
    _Z = _Z * scale # 缩放结果,主要因为pred是cos值,范围[-1, 1]
    logZ = K.logsumexp(_Z, 1, keepdims=True) # 用logsumexp,保证梯度不消失
    logZ = logZ + K.log(1 - K.exp(scale * y_true_pred - logZ)) # 从Z中减去exp(scale * y_true_pred)
    return - y_true_pred_margin * scale + logZ
Esempio n. 35
0
def logsumexp(x):
    return K.logsumexp(x, axis=1, keepdims=False)