Esempio n. 1
0
    def _build_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            K = self.K
            n_nodes = self.net.n_nodes
            sum_weight = self.X.sum()
            batch_size = self.batch_size

            max_iter = n_nodes * self.iter_per_node // batch_size
            capacity = max_iter * 10
            self.queue = queue = tf.RandomShuffleQueue(capacity, 0, ["int64", "float"],
                              shapes=[[batch_size,], [batch_size, batch_size]])

            self.enq_indices = enq_inp = tf.placeholder("int64", [batch_size])
            self.enq_X = enq_X = tf.placeholder("float32",
                                               [batch_size, batch_size])
            self.enqueue = queue.enqueue((enq_inp, enq_X))

            indices, X_s = queue.dequeue()

            scale = np.sqrt(sum_weight / (n_nodes * n_nodes * K))
            initializer = tf.random_uniform_initializer(maxval=2*scale)
            self.W_var = W_var = tf.get_variable("W", [n_nodes, K], "float32",
                                                 initializer)
            self.H_var = H_var = tf.get_variable("H", [n_nodes, K], "float32",
                                                 initializer)

            self.W = tf.abs(W_var)
            self.H = tf.abs(H_var)

            W_s = tf.gather(W_var, indices)
            H_s = tf.gather(H_var, indices)

            W_abs = tf.abs(W_s)
            H_abs = tf.abs(H_s)

            self.loss = loss = tf.nn.l2_loss(X_s - tf.matmul(W_abs, H_abs,
                                                             transpose_b=True))

            dW, dH = tf.gradients(loss, [W_s, H_s])

            update_W = tf.scatter_sub(W_var, indices, self.lr*dW)
            update_H = tf.scatter_sub(H_var, indices, self.lr*dH)

            self.opt = tf.group(update_W, update_H)

            self.sess = tf.Session()
            self.init_op = tf.initialize_all_variables()
def center_loss(features, label, alpha, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    # 获取特征向量长度
    nrof_features = features.get_shape()[1]

    # 生成可以共享的变量centers
    with tf.variable_scope('center', reuse=True):
        centers = tf.get_variable('centers')
    label = tf.reshape(label, [-1])

    # 取出对应label下对应的center值,注意label里面的值可能会重复,因为一个标签下有可能会出现多个人
    centers_batch = tf.gather(centers, label)

    # 求特征点到中心的距离并乘以一定的系数,alfa是center的更新速度,越大代表更新的越慢
    diff = centers_batch - features

    # 获取一个batch中同一样本出现的次数,这里需要理解论文中的更新公式
    unique_label, unique_idx, unique_count = tf.unique_with_counts(label)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    # 更新center,输出是将对应于label的centers减去对应的diff,如果同一个标签出现多次,那么就减去多次
    centers = tf.scatter_sub(centers, label, diff)

    # 求center loss,这里是将l2_loss里面的值进行平方相加,再除以2,并没有进行开方
    loss = tf.nn.l2_loss(features - centers_batch)
    return loss, centers
Esempio n. 3
0
 def _assign_sub(self, ref, updates, indices=None):
   if indices is not None:
     if isinstance(ref, tf.Variable):
       return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking)
     elif isinstance(ref, resource_variable_ops.ResourceVariable):
       with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]):
         return ref.value()
     else:
       raise TypeError("did not expect type %r" % type(ref))
   else:
     return tf.assign_sub(ref, updates, use_locking=self._use_locking)
Esempio n. 4
0
  def _apply_sparse(self, grad, var):
    max_learning_rate = tf.where(self._counter < self._burnin,
                                 self._burnin_max_learning_rate,
                                 self._max_learning_rate)

    learn_rate = tf.clip_by_value(
        self._get_coordinatewise_learning_rate(grad, var), 0.,
        tf.cast(max_learning_rate, var.dtype))
    delta = grad.values * learn_rate

    return tf.scatter_sub(var, grad.indices, delta,
                          use_locking=self._use_locking)
Esempio n. 5
0
def center_loss(features, label, alfa, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
        initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
def center_loss(features, label, alfa, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.nn.l2_loss(features - centers_batch)
    return loss, centers
Esempio n. 7
0
def center_inter_triplet_loss_tf(features, nrof_features, label, alfa, nrof_classes, beta): # tensorflow version
    """ center_inter_loss = center_loss/||Xi - centers(0,1,2,...i-1,i+1,i+2,...)||
        --mzh 22022017
    """
    dim_features = features.get_shape()[1].value
    centers = tf.get_variable('centers', [nrof_classes, dim_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    nrof_elements_per_class_list = tf.get_variable('centers_cts', [nrof_classes], dtype=tf.float32,
                                  initializer=tf.constant_initializer(0), trainable=False)
    ## center_loss calculation
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers,label)  # get the corresponding center of each element in features, the list of the centers is in the same order as the features
    dist_centers = features - centers_batch
    dist_centers_sum = tf.reduce_sum(dist_centers**2,1)/2
    loss_center = tf.nn.l2_loss(dist_centers)

    ## calculation the repeat time of same label
    ones = tf.ones_like(label, tf.float32)
    nrof_elements_per_class_list = tf.scatter_add(nrof_elements_per_class_list, label, ones)  # counting the number elments in each class, the class is in the order of the [0,1,2,3,....] as initialzation
    nrof_elements_per_class = tf.gather(nrof_elements_per_class_list, label) #nrof_elements_per_class is the number of the elements in each class


    ## inter_center_loss calculation
    centers_batch1 = tf.gather(centers,label)
    centers_1D = tf.reshape(centers_batch1, [1, nrof_features * dim_features])
    centers_2D = tf.tile(centers_1D, [nrof_features, 1])
    centers_3D = tf.reshape(centers_2D,[nrof_features, nrof_features, dim_features])
    features_3D = tf.reshape(features, [nrof_features, 1, dim_features])
    dist_inter_centers = features_3D - centers_3D
    dist_inter_centers_sum_dim = tf.reduce_sum(dist_inter_centers**2,2)/2
    centers_cts_batch_1D = tf.tile(nrof_elements_per_class,[nrof_features])
    centers_cts_batch_2D = tf.reshape(centers_cts_batch_1D, [nrof_features, nrof_features])
    dist_inter_centers_sum_unique = tf.div(dist_inter_centers_sum_dim, centers_cts_batch_2D)
    dist_inter_centers_sum_all = tf.reduce_sum(dist_inter_centers_sum_unique, 1)
    dist_inter_centers_sum = dist_inter_centers_sum_all - dist_centers_sum
    loss_inter_centers = tf.reduce_mean(dist_inter_centers_sum)

    ## total loss
    loss = loss_center + (loss_center + beta*nrof_features - loss_inter_centers)

    ## update centers
    diff = (1 - alfa) * (centers_batch - features)
    centers_cts_batch_reshape = tf.reshape(nrof_elements_per_class, [-1, 1])
    diff_mean = tf.div(diff, centers_cts_batch_reshape)
    centers = tf.scatter_sub(centers, label, diff_mean)
    zeros = tf.zeros_like(label, tf.float32)
    center_cts_clear = tf.scatter_update(nrof_elements_per_class_list, label, zeros)
    return loss, centers,  loss_center, loss_inter_centers, center_cts_clear
Esempio n. 8
0
    def update_centers(self, alpha):
        '''
        采用center loss的更新策略
        :param alpha:
        :return:
        '''
        pixel_recovery_features = self.pixel_recovery_features
        print('centers are ', self.centers)
        assign_label = tf.cast(self.pos_mask, tf.int32)

        assign_features = tf.gather(self.centers, assign_label)
        pred_features = pixel_recovery_features
        diff = assign_features - pred_features
        print('diff is ', diff)
        kernel_size = 11
        num_channels = diff.get_shape().as_list()[-1]
        kernel = tf.convert_to_tensor(np.zeros([kernel_size, kernel_size, 1]),
                                      tf.float32)
        erosion = tf.clip_by_value(
            tf.squeeze(tf.nn.dilation2d(tf.cast(
                tf.expand_dims(self.pos_mask, axis=3), tf.float32),
                                        filter=kernel,
                                        strides=[1, 1, 1, 1],
                                        rates=[1, 1, 1, 1],
                                        padding='SAME'),
                       axis=3), 0.0, 1.0)
        neg_masks = tf.logical_and(
            self.inputs_mask,
            tf.cast(
                tf.cast(erosion, tf.int32) - tf.cast(self.pos_mask, tf.int32),
                tf.bool))
        selected_masks = tf.logical_or(tf.cast(self.pos_mask, tf.bool),
                                       tf.cast(neg_masks, tf.bool))
        selected_assign_label = tf.gather(
            tf.reshape(assign_label, [-1]),
            tf.where(tf.reshape(selected_masks, [-1]))[:, 0])
        selected_diff = tf.gather(
            tf.reshape(diff, [-1, num_channels]),
            tf.where(tf.reshape(selected_masks, [-1]))[:, 0])
        unique_label, unique_idx, unique_count = tf.unique_with_counts(
            tf.reshape(selected_assign_label, [-1]))
        appear_times = tf.gather(unique_count, unique_idx)
        selected_diff = selected_diff / tf.expand_dims(
            tf.cast(1 + appear_times, tf.float32), axis=1)
        selected_diff = alpha * selected_diff
        centers_update_op = tf.scatter_sub(self.centers, selected_assign_label,
                                           selected_diff)
        return centers_update_op
Esempio n. 9
0
def get_center_loss(features, labels, alpha, num_classes):
    """获取center loss及center的更新op
    
    Arguments:
        features: Tensor,表征样本特征,一般使用某个fc层的输出,shape应该为[batch_size, feature_length].
        labels: Tensor,表征样本label,非one-hot编码,shape应为[batch_size].
        alpha: 0-1之间的数字,控制样本类别中心的学习率,细节参考原文.
        num_classes: 整数,表明总共有多少个类别,网络分类输出有多少个神经元这里就取多少.
    
    Return:
        loss: Tensor,可与softmax loss相加作为总的loss进行优化.
        centers: Tensor,存储样本中心值的Tensor,仅查看样本中心存储的具体数值时有用.
        centers_update_op: op,用于更新样本中心的op,在训练时需要同时运行该op,否则样本中心不会更新
    """
    # 获取特征的维数,例如256维
    len_features = features.get_shape()[1]
    # 建立一个Variable,shape为[num_classes, len_features],用于存储整个网络的样本中心,
    # 设置trainable=False是因为样本中心不是由梯度进行更新的
    centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32,
        initializer=tf.constant_initializer(0), trainable=False)
    # 将label展开为一维的,输入如果已经是一维的,则该动作其实无必要
    labels = tf.reshape(labels, [-1])
    
    # 根据样本label,获取mini-batch中每一个样本对应的中心值
    centers_batch = tf.gather(centers, labels)
    
    # debug
    features =  tf.reshape(features,[128,-1])
    features = features[:,:2]
    print(features.get_shape())
    
    # 计算loss
    loss = tf.nn.l2_loss(features - centers_batch)
    
    # 当前mini-batch的特征值与它们对应的中心值之间的差
    diff = centers_batch - features
    
    # 获取mini-batch中同一类别样本出现的次数,了解原理请参考原文公式(4)
    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])
    
    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff
    
    centers_update_op = tf.scatter_sub(centers, labels, diff)
    
    return loss, centers, centers_update_op
Esempio n. 10
0
def center_loss(features, label, alfa, nrof_classes):
    """
    Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
    """
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    with tf.control_dependencies([centers]):
        loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
Esempio n. 11
0
def center_loss(features, label, alfa, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
          (http://ydwen.github.io/papers/WenECCV16.pdf)
          https://blog.csdn.net/u014380165/article/details/76946339
    """
    nrof_features = features.get_shape()[1]
    # 训练过程中,需要保存当前所有类中心的全连接预测特征centers, 每个batch的计算都要先读取已经保存的centers
    centers = tf.compat.v1.get_variable('centers', [ nrof_classes, nrof_features], dtype=tf.float32,
    initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)#获取当前batch对应的类中心特征
    diff = (1 - alfa) * (centers_batch - features)#计算当前的类中心与特征的差异,用于Cj的的梯度更新,这里facenet的作者做了一个 1-alfa操作,比较奇怪,和原论文不同
    centers = tf.scatter_sub(centers, label, diff)#更新梯度Cj,对于上图中步骤6,tensorflow会将该变量centers保留下来,用于计算下一个batch的centerloss
    loss = tf.reduce_mean(tf.square(features - centers_batch))#计算当前的centerloss 对应于Lc

    return loss, centers
Esempio n. 12
0
def center_loss(features, label, label_stats, centers, alfa):
    """The center loss.
       features: [batch_size, 512], the embedding of images. 
       label: [batch_size, class_num], class label, the label index is 1, others are 0.
       labels_stats: [batch_size, 1], the count of each label in the batch.
       centers: [class_num, 512], center points, each class have one.
       alfa: float, updating rate of centers.
    """
    label = tf.arg_max(label, 1)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = alfa * (centers_batch - features)
    diff = diff / label_stats
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.nn.l2_loss(features - centers_batch)
    return loss, centers
Esempio n. 13
0
    def get_center_loss(features, labels, alpha, num_labels):

        nrof_features = features.get_shape()[1]

        centers = tf.get_variable('centers', [num_labels, nrof_features], dtype=tf.float32,
            initializer=tf.constant_initializer(0.0), trainable=False)

        labels = tf.argmax(labels, 1)
        labels = tf.reshape(labels, [-1])

        centers_batch = tf.gather(centers, labels)
        diff = (1 - alpha) * (centers_batch - features)

        centers = tf.scatter_sub(centers, labels, diff)
        loss = tf.nn.l2_loss(features - centers_batch)
        return loss, centers
def center_loss(features, label, nrof_classes=8, alfa=0.95):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    nrof_features = features.get_shape()[-1]
    features = tf.reshape(features, [-1, nrof_features.value])
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    label = tf.cast(tf.argmax(label, 1), tf.int32)
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
Esempio n. 15
0
def center_loss(net, label_batch, alfa, nclass):
    norf_net = net.get_shape()[1]
    centers = tf.get_variable('centers', [nclass, norf_net],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)

    label = tf.reshape(label_batch, [-1])
    centers_batch = tf.gather(
        centers, label)  #从'centers'根据'label'的参数值获取切片。就是在axis维根据indices取某些值。

    diff = (1 - alfa) * (centers_batch - net)
    centers = tf.scatter_sub(centers, label, diff)  #对centers中的label位置的数据减去diff

    with tf.control_dependencies([centers]):
        loss = tf.reduce_mean(tf.square(net - centers_batch))
    return loss, centers
Esempio n. 16
0
def center_loss_v3(features, labels, alpha, name='center_loss'):
    # features [batch,hid]
    # label one_hot [batch,num_class]
    hidden_size = features.get_shape().as_list()[-1]
    num_classes = labels.get_shape().as_list()[-1]
    labels = tf.argmax(labels, axis=-1)  # [batch]
    with tf.variable_scope(name):
        centers = tf.get_variable('centers', [num_classes, hidden_size],
                                  dtype=tf.float32,
                                  initializer=tf.contrib.layers.xavier_initializer(),
                                  trainable=False)
        centers_batch = tf.gather(centers, labels)  # 获取当前batch对应的类中心特征
        c_loss = tf.reduce_mean(tf.nn.l2_loss(features - centers_batch), axis=-1)

        centers_diff = alpha * (centers_batch - features)  # 类中心的梯度
        centers = tf.scatter_sub(centers, labels, centers_diff)  # 更新梯度
    return c_loss, centers
def get_center_loss(features, labels, alpha, num_classes):
    len_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [num_classes, len_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    labels = tf.reshape(labels, [-1])
    centers_batch = tf.gather(centers, labels)
    diff = centers_batch - features
    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])
    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff
    loss = tf.nn.l2_loss(features - centers_batch)
    centers_update_op = tf.scatter_sub(centers, labels, diff)
    return loss, centers_update_op
Esempio n. 18
0
 def center_loss(self, features, label, alpha, num_classes):
     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
    (http://ydwen.github.io/papers/WenECCV16.pdf)
    copy from facenet: https://github.com/davidsandberg/facenet
 """
     num_features = features.get_shape()[1]
     centers = tf.get_variable('centers', [num_classes, num_features],
                               dtype=tf.float32,
                               initializer=tf.constant_initializer(0),
                               trainable=False)
     label = tf.reshape(label, [-1])
     centers_batch = tf.gather(centers, label)
     diff = (1 - alpha) * (centers_batch - features)
     centers = tf.scatter_sub(centers, label, diff)
     with tf.control_dependencies([centers]):
         loss = tf.reduce_mean(tf.square(features - centers_batch))
     return loss, centers
def update_means(features, labels, alpha):
    with tf.variable_scope('means', reuse=True):
        means = tf.get_variable('means')

    labels = tf.reshape(labels, [-1])
    means_batch = tf.gather(means, labels)
    diff = means_batch - features

    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff
    means = tf.scatter_sub(means, labels, diff)

    return means
Esempio n. 20
0
def center_loss(features, label, alfa, nrof_classes):
    # embedding的维度
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    label = tf.reshape(label, [-1])
    # 挑选出每个batch对应的centers [batch,nrof_features]
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    # 相同类别会累计相减
    centers = tf.scatter_sub(centers, label, diff)
    # 先更新完centers在计算loss
    with tf.control_dependencies([centers]):
        loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers
Esempio n. 21
0
def _center_loss_func(features, labels, alpha, num_classes,
                      centers, feature_dim):
    assert feature_dim == features.get_shape()[1]
    label, t = tf.split(labels, 2, axis=1)
    label = K.reshape(label, [-1])
    label = tf.to_int32(label)
    #print(sess.run(labels))
    #l = tf.Variable([1, 64])

    centers_batch = tf.gather(centers, label)
    #print(sess.run(centers_batch))
    #print(sess.run(features))
    #assert tf.shape(centers_batch) == tf.shape(features)
    diff = (1 - alpha) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(K.square(features - centers_batch))
    return loss
Esempio n. 22
0
 def _assign_sub(self, ref, updates, indices=None):
     if indices is not None:
         if isinstance(ref, tf.Variable):
             return tf.scatter_sub(ref,
                                   indices,
                                   updates,
                                   use_locking=self._use_locking)
         elif isinstance(ref, resource_variable_ops.ResourceVariable):
             with tf.control_dependencies([
                     resource_variable_ops.resource_scatter_add(
                         ref.handle, indices, -updates)
             ]):
                 return ref.value()
         else:
             raise TypeError("did not expect type %r" % type(ref))
     else:
         return tf.assign_sub(ref, updates, use_locking=self._use_locking)
def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn",
                         decay=0.9, epsilon=1e-3, data_format="NHWC"):

  shape = [num_channels]
  indices = tf.where(mask)
  indices = tf.cast(indices,tf.int32)
  indices = tf.reshape(indices, [-1])

  with tf.compat.v1.variable_scope(name, reuse=None if is_training else True):
    offset = tf.compat.v1.get_variable(
      "offset", shape,
      initializer=tf.constant_initializer(0.0))
    scale = tf.compat.v1.get_variable(
      "scale", shape,
      initializer=tf.constant_initializer(1.0))
    offset = tf.boolean_mask(offset, mask)
    scale = tf.boolean_mask(scale, mask)

    moving_mean = tf.compat.v1.get_variable(
      "moving_mean", shape, trainable=False,
      initializer=tf.constant_initializer(0.0))
    moving_variance = tf.compat.v1.get_variable(
      "moving_variance", shape, trainable=False,
      initializer=tf.constant_initializer(1.0))

    if is_training:
      x, mean, variance = tf.compat.v1.nn.fused_batch_norm(
        x, scale, offset, epsilon=epsilon, data_format=data_format,
        is_training=True)
      mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean)
      variance = (1.0 - decay) * (tf.boolean_mask(moving_variance, mask) - variance)
      update_mean = tf.scatter_sub(moving_mean, indices, mean, use_locking=True)
      update_variance = tf.compat.v1.scatter_sub(
        moving_variance, indices, variance, use_locking=True)
      with tf.control_dependencies([update_mean, update_variance]):
        x = tf.identity(x)
    else:
      masked_moving_mean = tf.boolean_mask(moving_mean, mask)
      masked_moving_variance = tf.boolean_mask(moving_variance, mask)
      x, _, _ = tf.nn.fused_batch_norm(x, scale, offset,
                                       mean=masked_moving_mean,
                                       variance=masked_moving_variance,
                                       epsilon=epsilon, data_format=data_format,
                                       is_training=False)
  return x
Esempio n. 24
0
    def _apply_sparse_shared(self, grad_values, grad_indices, var):
        shape = np.array(var.get_shape())
        var_rank = len(shape)
        # For sparse case, we only update the accumulator representing the sparse
        # dimension. In this case SM3 is similar to isotropic adagrad but with
        # better bound (due to the max operator).
        #
        # We do not use the column accumulator because it will updated for
        # every gradient step and will significantly overestimate the gradient
        # square. While, the row accumulator can take advantage of the sparsity
        # in the gradients. Even if one implements the column accumulator - it
        # will result in a no-op because the row accumulators will have lower
        # values.
        #
        # Note that: We do not run this code paths for our experiments in our paper
        # as on TPU all the sparse gradients are densified.
        if var_rank > 1:
            accumulator = self.get_slot(var, "accumulator_" + str(0))
            current_accumulator = tf.gather(accumulator, grad_indices)
            expanded_shape = tf.concat(
                [[tf.shape(current_accumulator)[0]], [1] * (var_rank - 1)], 0)
            current_accumulator = tf.reshape(current_accumulator,
                                             expanded_shape)
            current_accumulator += grad_values * grad_values
        else:
            accumulator = self.get_slot(var, "accumulator")
            current_accumulator = tf.scatter_add(accumulator, grad_indices,
                                                 grad_values * grad_values)

        accumulator_inv_sqrt = tf.where(tf.greater(current_accumulator, 0),
                                        tf.rsqrt(current_accumulator),
                                        tf.zeros_like(current_accumulator))
        scaled_g = (grad_values * accumulator_inv_sqrt)
        updates = []
        with tf.control_dependencies([scaled_g]):
            if var_rank > 1:
                axes = list(range(1, var_rank))
                dim_accumulator = tf.reduce_max(current_accumulator, axis=axes)
                updates = [
                    tf.scatter_update(accumulator, grad_indices,
                                      dim_accumulator)
                ]
        with tf.control_dependencies(updates):
            return tf.scatter_sub(var, grad_indices,
                                  self._learning_rate_tensor * scaled_g)
Esempio n. 25
0
def center_loss_similarity(features, label, alfa, nrof_classes):
    ## center_loss on cosine distance =1 - similarity instead of the L2 norm, i.e. Euclidian distance

    ## normalisation as the embedding vectors in order to similarity distance
    features = tf.nn.l2_normalize(features, 1, 1e-10, name='feat_emb')

    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    centers_cts = tf.get_variable('centers_cts', [nrof_classes],
                                  dtype=tf.float32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    #centers_cts_init = tf.zeros_like(nrof_classes, tf.float32)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(
        centers, label
    )  #get the corresponding center of each element in features, the list of the centers is in the same order as the features
    #loss = tf.nn.l2_loss(features - centers_batch) ## 0.5*(L2 norm)**2, L2 norm is the Euclidian distance
    similarity_all = tf.matmul(
        features, tf.transpose(tf.nn.l2_normalize(
            centers_batch, 1,
            1e-10)))  ## dot prodoct, cosine distance, similarity of x and y
    similarity_self = tf.diag_part(similarity_all)
    loss_x = tf.subtract(1.0, similarity_self)
    loss = tf.reduce_sum(
        loss_x)  ## sum the cosine distance of each vector/tensor
    diff = (1 - alfa) * (centers_batch - features)
    ones = tf.ones_like(label, tf.float32)
    centers_cts = tf.scatter_add(
        centers_cts, label, ones
    )  # counting the number of each class, the class is in the order of the [0,1,2,3,....] as initialzation
    centers_cts_batch = tf.gather(centers_cts, label)
    #centers_cts_batch_ext = tf.tile(centers_cts_batch, nrof_features)
    #centers_cts_batch_reshape = tf.reshape(centers_cts_batch_ext,[-1, nrof_features])
    centers_cts_batch_reshape = tf.reshape(centers_cts_batch, [-1, 1])
    diff_mean = tf.div(diff, centers_cts_batch_reshape)
    centers = tf.scatter_sub(centers, label, diff_mean)
    zeros = tf.zeros_like(label, tf.float32)
    center_cts_clear = tf.scatter_update(centers_cts, label, zeros)
    #return loss, centers, label, centers_batch, diff, centers_cts, centers_cts_batch, diff_mean,center_cts_clear, centers_cts_batch_reshape
    #return loss, centers, loss_x, similarity_all, similarity_self
    return loss, centers
Esempio n. 26
0
def update_centers(features, labels, alpha):
    with tf.variable_scope('center', reuse=tf.AUTO_REUSE):
        centers = tf.get_variable('centers')

    labels = tf.reshape(labels, [-1]) # flatten
    centers_batch = tf.gather(centers, labels) # Gather center tensor by labels value order
    diff = centers_batch - features # L1 distance array between each of center and feature

    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    centers = tf.scatter_sub(centers, labels, diff)

    return centers
Esempio n. 27
0
    def margin_center_loss(self, features, label, nrof_classes):

        nrof_features = features.get_shape()[1]
        centers = tf.get_variable('centers', [nrof_classes, nrof_features],
                                  dtype=tf.float32,
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
        label = tf.reshape(label, [-1])

        mloss = self.margin_loss(features, label, centers, 0.6)

        centers_batch = tf.gather(centers, label)
        diff = (1 - 0.6) * (centers_batch - features)
        centers = tf.scatter_sub(centers, label, diff)
        with tf.control_dependencies([centers]):
            loss = tf.reduce_mean(tf.square(features - centers_batch))

        return loss + mloss
Esempio n. 28
0
def update_centers(features, labels, alpha):
    with tf.variable_scope('center', reuse=tf.AUTO_REUSE):
        centers = tf.get_variable('centers')

    labels = tf.reshape(labels, [-1])
    centers_batch = tf.gather(centers, labels)
    diff = centers_batch - features

    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    diff = alpha * diff

    centers = tf.scatter_sub(centers, labels, diff)

    return centers
Esempio n. 29
0
    def __init__(self, param, load_last_model = False):
        '''
        param 包含了模型需要的所有超参数, 类实例,可用.访问其属性
        '''

        self.load_last_model = load_last_model
        self.sess = tf.Session()
        self.param = param
        self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
        self.inputs = tf.placeholder(dtype = tf.float32, shape = (None,28,28,1), name = 'inputs')
        self.labels = tf.placeholder(tf.int32, shape = (None,), name = 'labels')
        self.lr = tf.placeholder(tf.float32, (), 'param_learning_rate')
        self.alpha = tf.placeholder(tf.float32, (), 'center_learning_rate')
        self.lamb = tf.placeholder(tf.float32, (), 'lambda')
        self.labels_onehot = tf.one_hot(self.labels, 10,  name = 'label_onehot')
        self.features, self.logits = self.inference()
        self.center = tf.Variable(initial_value = tf.random_normal(shape = (10,2), 
                                                                   name = 'center_initializer'),
                                                           
                                  trainable = False,
                                  name = 'center',
                                  dtype = tf.float32)
        self.center_of_centers = tf.reduce_mean(self.center, axis = 0)
        self.center_loss, self.softmax_loss, self.delta_center = self.loss()
        self.total_loss = self.softmax_loss + self.lamb * self.center_loss
        self.update_center = tf.scatter_sub(self.center, self.labels, self.alpha*self.delta_center)
        self.accuracy = self.acc()
        self.optimizer = tf.train.AdamOptimizer(self.lr, name = 'optimizer')\
                            .minimize(loss = self.total_loss)
        self.train_x, self.train_y,\
        self.valid_x, self.valid_y,\
        self.test_x,  self.test_y  = self.load_data()
        self.train_size = len(self.train_x)
        self.batch_size = self.param.batch_size
        tf.summary.scalar('train_softmax_loss', self.softmax_loss)
        tf.summary.scalar('train_center_loss', self.center_loss)
        tf.summary.scalar('train_total_loss', self.total_loss)
#        tf.summary.scalar('valid_total_loss', self.total_loss, collections = ['valid'])
#        tf.summary.scalar('valid_softmax_loss', self.softmax_loss, collections = ['valid'])
#        tf.summary.scalar('valid_center_loss', self.center_loss, collections = ['valid'])        
        self.summaries = tf.summary.merge_all()
#        self.summaries_valid = tf.summary.merge_all(key = 'valid')
        self.writer = tf.summary.FileWriter(logdir = './/log//', graph = self.sess.graph)
        self.saver = tf.train.Saver()
Esempio n. 30
0
def _get_center_loss(features, labels, alpha, num_classes):
    """get center loss and op for center update

    Arguments:
        features: Tensor,shape should be [batch_size, feature_length].
        labels: Tensor,not one-hot code ,shape should be [batch_size].
        alpha: between 0-1,control the learning rate of centor
        num_classes: int, same as the class_number

    Return:
        loss: Tensor, can add with softmax loss as the total loss.
        centers: Tensor,storing the centor of sample only be used when looking the centor
        centers_update_op: op used for update the centors need to run this op when training otherwise the centors wouldn't be updated
    """
    # get the feature dimention such as 2208
    len_features = features.get_shape()[1]
    # build up a Variable,shape is [num_classes, len_features] used to store the centor
    # set trainable=False is because the centor is not updated by gradient
    centers = tf.get_variable('centers', [num_classes, len_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    # expand the label to one dimention,if the output is already one dimentoin, this action is not neccesery
    labels = tf.reshape(labels, [-1])

    # get the centor of each sample in a mini-batch
    centers_batch = tf.gather(centers, labels)
    # calcu loss
    loss = tf.nn.l2_loss(features - centers_batch)

    diff = centers_batch - features
    '''
    # get the frequence of same category in one mini-batch,refer to equ(4) in paper
    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff = diff / tf.cast((1 + appear_times), tf.float32)
    '''
    diff = alpha * diff

    centers_update_op = tf.scatter_sub(centers, labels, diff)

    return loss, centers, centers_update_op
Esempio n. 31
0
def center_loss(features, labels, num_classes, alpha=0.99, weight=1.0):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
     (http://ydwen.github.io/papers/WenECCV16.pdf)
  """
    num_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [num_classes, num_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    labels = tf.reshape(labels, [-1])
    centers_batch = tf.gather(centers, labels)
    diffs = (1 - alpha) * (centers_batch - features)
    centers = tf.scatter_sub(centers, labels, diffs)

    center_loss_mean = tf.reduce_mean(tf.square(features - centers_batch))

    tf.add_to_collection('losses', weight * center_loss_mean)

    return center_loss_mean, centers
Esempio n. 32
0
def center_loss_angel(features, label, alfa, nrof_classes):

    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
        initializer=tf.constant_initializer(1), trainable=False)

    centers_unit = tf.nn.l2_normalize(centers,dim=1)
    features_unit = tf.nn.l2_normalize(centers,dim=1)

    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers_unit, label)

    cos_theta = tf.reduce_sum( tf.multiply(features_unit , centers_batch),axis=1)

    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)

    loss = tf.reduce_mean(tf.square( cos_theta))
    return loss, centers
Esempio n. 33
0
    def loss_center(self):
        print('hahaha')
        alpha = self.cfg.alpha
        centers_batch = tf.gather(self.centers, self.pl_labels)             #(Batch, n_features)
        numerator = centers_batch - self.deep_features                      #(Batch, n_features)
        
        _, idx, count = tf.unique_with_counts(self.pl_labels)
        denominator = tf.gather(count, idx)                                 #(Batch)
        denominator = tf.cast(denominator, tf.float32)                      #(Batch)
        denominator = tf.reshape(denominator, [-1, 1]) 

                    
        diff = tf.divide(numerator, denominator) * alpha                    #(Batch, n_features)
        self.centers = tf.scatter_sub(self.centers, self.pl_labels, diff)   #(n_classes, n_features)
        square = tf.square(self.deep_features - centers_batch)
        loss_batch = tf.reduce_sum(square, axis=1)
        loss = tf.reduce_mean(loss_batch)
        result = tf.scalar_mul(self.cfg.lmbda, loss)
        return result
Esempio n. 34
0
    def get_center_loss3(self, labels, centers, bottleneck, alpha=0.5):
        """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
        (http://ydwen.github.io/papers/WenECCV16.pdf)
        """
        labels = tf.where(tf.equal(labels, 1.0))[:, 1]  # onehot to dense
        labels = tf.cast(labels, tf.int64)
        labels = tf.reshape(labels, [-1])
        centers_batch = tf.gather(centers, labels)
        loss = tf.nn.l2_loss(bottleneck - centers_batch)
        diff = centers_batch - bottleneck
        unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
        appear_times = tf.gather(unique_count, unique_idx)
        appear_times = tf.reshape(appear_times, [-1, 1])
        diff = diff / tf.cast((1 + appear_times), tf.float32)
        diff = alpha * diff

        update_op = tf.scatter_sub(centers, labels, diff)

        return loss, update_op
Esempio n. 35
0
def get_margin_loss(labels, features, num_classes, alpha=0.1, training=True):
    len_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [num_classes, len_features],
                              dtype=tf.float32,
                              initializer=tf.constant_initializer(0),
                              trainable=False)
    labels = tf.reshape(labels, [-1])

    centers_batch = tf.gather(centers, labels)

    center_dist = tf.reduce_sum(tf.abs(tf.subtract(features, centers_batch)),
                                axis=1)

    diff = centers_batch - features
    unique_label, unique_idx, unique_count = tf.unique_with_counts(labels)
    appear_time = tf.gather(unique_count, unique_idx)
    appear_time = tf.reshape(appear_time, [-1, 1])

    diff = diff / tf.cast((1 + appear_time), tf.float32)
    diff = alpha * diff

    centers_update_up = tf.scatter_sub(centers, labels, diff)

    feature_center_pair_dist = tf.reduce_sum(tf.abs(
        tf.subtract(tf.expand_dims(features, 1), tf.expand_dims(centers, 0))),
                                             axis=2)

    feature_center_dist = tf.subtract(tf.expand_dims(center_dist, 1),
                                      feature_center_pair_dist)

    feature_center_labels_equal = tf.equal(
        tf.expand_dims(labels, 1),
        tf.expand_dims(tf.constant(list(range(num_classes)), dtype=tf.int64),
                       0))
    mask_feature_center = tf.to_float(
        tf.logical_not(feature_center_labels_equal))

    margin_loss = tf.reduce_sum(
        tf.nn.softplus(feature_center_dist) *
        mask_feature_center) / tf.reduce_sum(mask_feature_center)

    return margin_loss, centers, centers_update_up
Esempio n. 36
0
def center_loss(embedding, label, num_classes, alpha=0.1, scope="center_loss"):
    r"""Center-Loss as described in the paper
    `A Discriminative Feature Learning Approach for Deep Face Recognition`
    <http://ydwen.github.io/papers/WenECCV16.pdf> by Wen et al.

    Args:
        embedding (tf.Tensor): features produced by the network
        label (tf.Tensor): ground-truth label for each feature
        num_classes (int): number of different classes
        alpha (float): learning rate for updating the centers

    Returns:
        tf.Tensor: center loss
    """
    nrof_features = embedding.get_shape()[1]
    centers = tf.get_variable('centers', [num_classes, nrof_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alpha) * (centers_batch - embedding)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(tf.square(embedding - centers_batch), name=scope)
    return loss
def island_loss(features, label, alpha, nrof_classes, nrof_features, lamda1=10):
    """Center loss based on the paper "Island Loss for Learning Discriminative Features in Facial Expression Recognition"
       (https://github.com/SeriaZheng/EmoNet/blob/master/loss_function/loss_paper/Island_loss.pdf)
    """
    # 生成可以共享的变量centers
    with tf.variable_scope('center', reuse=True):
        centers = tf.get_variable('centers')
    label = tf.reshape(label, [-1])

    # 取出对应label下对应的center值,注意label里面的值可能会重复,因为一个标签下有可能会出现多个人
    centers_batch = tf.gather(centers, label)

    # 求特征点到中心的距离并乘以一定的系数,diff1为center loss
    diff1 = centers_batch - features

    # 获取一个batch中同一样本出现的次数,这里需要理解论文中的更新公式
    unique_label, unique_idx, unique_count = tf.unique_with_counts(label)
    appear_times = tf.gather(unique_count, unique_idx)
    appear_times = tf.reshape(appear_times, [-1, 1])

    diff1 = diff1 / tf.cast((1 + appear_times), tf.float32)
    diff1 = alpha * diff1

    # diff2为island loss的center更新项
    diff2 = tf.get_variable('diff2', [nrof_classes, nrof_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    for i in range(nrof_classes):
        for j in range(nrof_classes):
            if i!=j:
                diff2 = tf.scatter_add(diff2, i,
                                       (tf.gather(centers, i) / tf.sqrt(
                                           tf.reduce_sum(tf.square(tf.gather(centers, i)))) * tf.sqrt(
                                           tf.reduce_sum(tf.square(tf.gather(centers, j)))))
                                       - tf.multiply(
                                           (tf.reduce_sum(
                                               tf.multiply(tf.gather(centers, i), tf.gather(centers, j))) / tf.sqrt(
                                               tf.reduce_sum(tf.square(tf.gather(centers, i)))) *
                                            tf.pow(tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, j)))), 3)),
                                           tf.gather(centers, j)))
    diff2 = diff2 * lamda1 / (nrof_classes - 1)
    diff2 = alpha * diff2

    # 求center loss,这里是将l2_loss里面的值进行平方相加,再除以2,并没有进行开方
    loss1 = tf.nn.l2_loss(features - centers_batch)

    # 求island loss
    loss2 = tf.zeros(1)
    for i in range(nrof_classes):
        for j in range(nrof_classes):
            if i!=j:
                loss2 = tf.add(tf.add(tf.reduce_sum(tf.multiply(tf.gather(centers, i), tf.gather(centers, j))) / (
                        tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, i)))) *
                        tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, j))))), tf.ones(1)), loss2)
    loss2 = lamda1 * loss2

    loss = tf.add(loss1,loss2)

    # 更新center,输出是将对应于label的centers减去对应的diff,如果同一个标签出现多次,那么就减去多次(diff1与centers维度不同)
    centers = tf.scatter_sub(centers, label, diff1)
    # diff2维度与centers相同可以直接减
    centers = tf.subtract(centers, diff2)

    return loss, centers
 def scatter_subtract(variables1, variables2):
     shape = utils.get_tensor_size(variables2)
     values, indices = tf.nn.top_k(-1 * variables1, tf.cast(k * shape / 100, tf.int32))
     return tf.scatter_sub(variables2, indices, values)