def _build_graph(self): self.graph = tf.Graph() with self.graph.as_default(): K = self.K n_nodes = self.net.n_nodes sum_weight = self.X.sum() batch_size = self.batch_size max_iter = n_nodes * self.iter_per_node // batch_size capacity = max_iter * 10 self.queue = queue = tf.RandomShuffleQueue(capacity, 0, ["int64", "float"], shapes=[[batch_size,], [batch_size, batch_size]]) self.enq_indices = enq_inp = tf.placeholder("int64", [batch_size]) self.enq_X = enq_X = tf.placeholder("float32", [batch_size, batch_size]) self.enqueue = queue.enqueue((enq_inp, enq_X)) indices, X_s = queue.dequeue() scale = np.sqrt(sum_weight / (n_nodes * n_nodes * K)) initializer = tf.random_uniform_initializer(maxval=2*scale) self.W_var = W_var = tf.get_variable("W", [n_nodes, K], "float32", initializer) self.H_var = H_var = tf.get_variable("H", [n_nodes, K], "float32", initializer) self.W = tf.abs(W_var) self.H = tf.abs(H_var) W_s = tf.gather(W_var, indices) H_s = tf.gather(H_var, indices) W_abs = tf.abs(W_s) H_abs = tf.abs(H_s) self.loss = loss = tf.nn.l2_loss(X_s - tf.matmul(W_abs, H_abs, transpose_b=True)) dW, dH = tf.gradients(loss, [W_s, H_s]) update_W = tf.scatter_sub(W_var, indices, self.lr*dW) update_H = tf.scatter_sub(H_var, indices, self.lr*dH) self.opt = tf.group(update_W, update_H) self.sess = tf.Session() self.init_op = tf.initialize_all_variables()
def center_loss(features, label, alpha, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ # 获取特征向量长度 nrof_features = features.get_shape()[1] # 生成可以共享的变量centers with tf.variable_scope('center', reuse=True): centers = tf.get_variable('centers') label = tf.reshape(label, [-1]) # 取出对应label下对应的center值,注意label里面的值可能会重复,因为一个标签下有可能会出现多个人 centers_batch = tf.gather(centers, label) # 求特征点到中心的距离并乘以一定的系数,alfa是center的更新速度,越大代表更新的越慢 diff = centers_batch - features # 获取一个batch中同一样本出现的次数,这里需要理解论文中的更新公式 unique_label, unique_idx, unique_count = tf.unique_with_counts(label) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff # 更新center,输出是将对应于label的centers减去对应的diff,如果同一个标签出现多次,那么就减去多次 centers = tf.scatter_sub(centers, label, diff) # 求center loss,这里是将l2_loss里面的值进行平方相加,再除以2,并没有进行开方 loss = tf.nn.l2_loss(features - centers_batch) return loss, centers
def _assign_sub(self, ref, updates, indices=None): if indices is not None: if isinstance(ref, tf.Variable): return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking) elif isinstance(ref, resource_variable_ops.ResourceVariable): with tf.control_dependencies([resource_variable_ops.resource_scatter_add(ref.handle, indices, -updates)]): return ref.value() else: raise TypeError("did not expect type %r" % type(ref)) else: return tf.assign_sub(ref, updates, use_locking=self._use_locking)
def _apply_sparse(self, grad, var): max_learning_rate = tf.where(self._counter < self._burnin, self._burnin_max_learning_rate, self._max_learning_rate) learn_rate = tf.clip_by_value( self._get_coordinatewise_learning_rate(grad, var), 0., tf.cast(max_learning_rate, var.dtype)) delta = grad.values * learn_rate return tf.scatter_sub(var, grad.indices, delta, use_locking=self._use_locking)
def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.nn.l2_loss(features - centers_batch) return loss, centers
def center_inter_triplet_loss_tf(features, nrof_features, label, alfa, nrof_classes, beta): # tensorflow version """ center_inter_loss = center_loss/||Xi - centers(0,1,2,...i-1,i+1,i+2,...)|| --mzh 22022017 """ dim_features = features.get_shape()[1].value centers = tf.get_variable('centers', [nrof_classes, dim_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) nrof_elements_per_class_list = tf.get_variable('centers_cts', [nrof_classes], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) ## center_loss calculation label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers,label) # get the corresponding center of each element in features, the list of the centers is in the same order as the features dist_centers = features - centers_batch dist_centers_sum = tf.reduce_sum(dist_centers**2,1)/2 loss_center = tf.nn.l2_loss(dist_centers) ## calculation the repeat time of same label ones = tf.ones_like(label, tf.float32) nrof_elements_per_class_list = tf.scatter_add(nrof_elements_per_class_list, label, ones) # counting the number elments in each class, the class is in the order of the [0,1,2,3,....] as initialzation nrof_elements_per_class = tf.gather(nrof_elements_per_class_list, label) #nrof_elements_per_class is the number of the elements in each class ## inter_center_loss calculation centers_batch1 = tf.gather(centers,label) centers_1D = tf.reshape(centers_batch1, [1, nrof_features * dim_features]) centers_2D = tf.tile(centers_1D, [nrof_features, 1]) centers_3D = tf.reshape(centers_2D,[nrof_features, nrof_features, dim_features]) features_3D = tf.reshape(features, [nrof_features, 1, dim_features]) dist_inter_centers = features_3D - centers_3D dist_inter_centers_sum_dim = tf.reduce_sum(dist_inter_centers**2,2)/2 centers_cts_batch_1D = tf.tile(nrof_elements_per_class,[nrof_features]) centers_cts_batch_2D = tf.reshape(centers_cts_batch_1D, [nrof_features, nrof_features]) dist_inter_centers_sum_unique = tf.div(dist_inter_centers_sum_dim, centers_cts_batch_2D) dist_inter_centers_sum_all = tf.reduce_sum(dist_inter_centers_sum_unique, 1) dist_inter_centers_sum = dist_inter_centers_sum_all - dist_centers_sum loss_inter_centers = tf.reduce_mean(dist_inter_centers_sum) ## total loss loss = loss_center + (loss_center + beta*nrof_features - loss_inter_centers) ## update centers diff = (1 - alfa) * (centers_batch - features) centers_cts_batch_reshape = tf.reshape(nrof_elements_per_class, [-1, 1]) diff_mean = tf.div(diff, centers_cts_batch_reshape) centers = tf.scatter_sub(centers, label, diff_mean) zeros = tf.zeros_like(label, tf.float32) center_cts_clear = tf.scatter_update(nrof_elements_per_class_list, label, zeros) return loss, centers, loss_center, loss_inter_centers, center_cts_clear
def update_centers(self, alpha): ''' 采用center loss的更新策略 :param alpha: :return: ''' pixel_recovery_features = self.pixel_recovery_features print('centers are ', self.centers) assign_label = tf.cast(self.pos_mask, tf.int32) assign_features = tf.gather(self.centers, assign_label) pred_features = pixel_recovery_features diff = assign_features - pred_features print('diff is ', diff) kernel_size = 11 num_channels = diff.get_shape().as_list()[-1] kernel = tf.convert_to_tensor(np.zeros([kernel_size, kernel_size, 1]), tf.float32) erosion = tf.clip_by_value( tf.squeeze(tf.nn.dilation2d(tf.cast( tf.expand_dims(self.pos_mask, axis=3), tf.float32), filter=kernel, strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME'), axis=3), 0.0, 1.0) neg_masks = tf.logical_and( self.inputs_mask, tf.cast( tf.cast(erosion, tf.int32) - tf.cast(self.pos_mask, tf.int32), tf.bool)) selected_masks = tf.logical_or(tf.cast(self.pos_mask, tf.bool), tf.cast(neg_masks, tf.bool)) selected_assign_label = tf.gather( tf.reshape(assign_label, [-1]), tf.where(tf.reshape(selected_masks, [-1]))[:, 0]) selected_diff = tf.gather( tf.reshape(diff, [-1, num_channels]), tf.where(tf.reshape(selected_masks, [-1]))[:, 0]) unique_label, unique_idx, unique_count = tf.unique_with_counts( tf.reshape(selected_assign_label, [-1])) appear_times = tf.gather(unique_count, unique_idx) selected_diff = selected_diff / tf.expand_dims( tf.cast(1 + appear_times, tf.float32), axis=1) selected_diff = alpha * selected_diff centers_update_op = tf.scatter_sub(self.centers, selected_assign_label, selected_diff) return centers_update_op
def get_center_loss(features, labels, alpha, num_classes): """获取center loss及center的更新op Arguments: features: Tensor,表征样本特征,一般使用某个fc层的输出,shape应该为[batch_size, feature_length]. labels: Tensor,表征样本label,非one-hot编码,shape应为[batch_size]. alpha: 0-1之间的数字,控制样本类别中心的学习率,细节参考原文. num_classes: 整数,表明总共有多少个类别,网络分类输出有多少个神经元这里就取多少. Return: loss: Tensor,可与softmax loss相加作为总的loss进行优化. centers: Tensor,存储样本中心值的Tensor,仅查看样本中心存储的具体数值时有用. centers_update_op: op,用于更新样本中心的op,在训练时需要同时运行该op,否则样本中心不会更新 """ # 获取特征的维数,例如256维 len_features = features.get_shape()[1] # 建立一个Variable,shape为[num_classes, len_features],用于存储整个网络的样本中心, # 设置trainable=False是因为样本中心不是由梯度进行更新的 centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) # 将label展开为一维的,输入如果已经是一维的,则该动作其实无必要 labels = tf.reshape(labels, [-1]) # 根据样本label,获取mini-batch中每一个样本对应的中心值 centers_batch = tf.gather(centers, labels) # debug features = tf.reshape(features,[128,-1]) features = features[:,:2] print(features.get_shape()) # 计算loss loss = tf.nn.l2_loss(features - centers_batch) # 当前mini-batch的特征值与它们对应的中心值之间的差 diff = centers_batch - features # 获取mini-batch中同一类别样本出现的次数,了解原理请参考原文公式(4) unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff centers_update_op = tf.scatter_sub(centers, labels, diff) return loss, centers, centers_update_op
def center_loss(features, label, alfa, nrof_classes): """ Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" """ nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def center_loss(features, label, alfa, nrof_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) https://blog.csdn.net/u014380165/article/details/76946339 """ nrof_features = features.get_shape()[1] # 训练过程中,需要保存当前所有类中心的全连接预测特征centers, 每个batch的计算都要先读取已经保存的centers centers = tf.compat.v1.get_variable('centers', [ nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label)#获取当前batch对应的类中心特征 diff = (1 - alfa) * (centers_batch - features)#计算当前的类中心与特征的差异,用于Cj的的梯度更新,这里facenet的作者做了一个 1-alfa操作,比较奇怪,和原论文不同 centers = tf.scatter_sub(centers, label, diff)#更新梯度Cj,对于上图中步骤6,tensorflow会将该变量centers保留下来,用于计算下一个batch的centerloss loss = tf.reduce_mean(tf.square(features - centers_batch))#计算当前的centerloss 对应于Lc return loss, centers
def center_loss(features, label, label_stats, centers, alfa): """The center loss. features: [batch_size, 512], the embedding of images. label: [batch_size, class_num], class label, the label index is 1, others are 0. labels_stats: [batch_size, 1], the count of each label in the batch. centers: [class_num, 512], center points, each class have one. alfa: float, updating rate of centers. """ label = tf.arg_max(label, 1) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = alfa * (centers_batch - features) diff = diff / label_stats centers = tf.scatter_sub(centers, label, diff) loss = tf.nn.l2_loss(features - centers_batch) return loss, centers
def get_center_loss(features, labels, alpha, num_labels): nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [num_labels, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0.0), trainable=False) labels = tf.argmax(labels, 1) labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) diff = (1 - alpha) * (centers_batch - features) centers = tf.scatter_sub(centers, labels, diff) loss = tf.nn.l2_loss(features - centers_batch) return loss, centers
def center_loss(features, label, nrof_classes=8, alfa=0.95): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ nrof_features = features.get_shape()[-1] features = tf.reshape(features, [-1, nrof_features.value]) centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.cast(tf.argmax(label, 1), tf.int32) centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def center_loss(net, label_batch, alfa, nclass): norf_net = net.get_shape()[1] centers = tf.get_variable('centers', [nclass, norf_net], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label_batch, [-1]) centers_batch = tf.gather( centers, label) #从'centers'根据'label'的参数值获取切片。就是在axis维根据indices取某些值。 diff = (1 - alfa) * (centers_batch - net) centers = tf.scatter_sub(centers, label, diff) #对centers中的label位置的数据减去diff with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(net - centers_batch)) return loss, centers
def center_loss_v3(features, labels, alpha, name='center_loss'): # features [batch,hid] # label one_hot [batch,num_class] hidden_size = features.get_shape().as_list()[-1] num_classes = labels.get_shape().as_list()[-1] labels = tf.argmax(labels, axis=-1) # [batch] with tf.variable_scope(name): centers = tf.get_variable('centers', [num_classes, hidden_size], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer(), trainable=False) centers_batch = tf.gather(centers, labels) # 获取当前batch对应的类中心特征 c_loss = tf.reduce_mean(tf.nn.l2_loss(features - centers_batch), axis=-1) centers_diff = alpha * (centers_batch - features) # 类中心的梯度 centers = tf.scatter_sub(centers, labels, centers_diff) # 更新梯度 return c_loss, centers
def get_center_loss(features, labels, alpha, num_classes): len_features = features.get_shape()[1] centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) diff = centers_batch - features unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff loss = tf.nn.l2_loss(features - centers_batch) centers_update_op = tf.scatter_sub(centers, labels, diff) return loss, centers_update_op
def center_loss(self, features, label, alpha, num_classes): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) copy from facenet: https://github.com/davidsandberg/facenet """ num_features = features.get_shape()[1] centers = tf.get_variable('centers', [num_classes, num_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alpha) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def update_means(features, labels, alpha): with tf.variable_scope('means', reuse=True): means = tf.get_variable('means') labels = tf.reshape(labels, [-1]) means_batch = tf.gather(means, labels) diff = means_batch - features unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff means = tf.scatter_sub(means, labels, diff) return means
def center_loss(features, label, alfa, nrof_classes): # embedding的维度 nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) # 挑选出每个batch对应的centers [batch,nrof_features] centers_batch = tf.gather(centers, label) diff = (1 - alfa) * (centers_batch - features) # 相同类别会累计相减 centers = tf.scatter_sub(centers, label, diff) # 先更新完centers在计算loss with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss, centers
def _center_loss_func(features, labels, alpha, num_classes, centers, feature_dim): assert feature_dim == features.get_shape()[1] label, t = tf.split(labels, 2, axis=1) label = K.reshape(label, [-1]) label = tf.to_int32(label) #print(sess.run(labels)) #l = tf.Variable([1, 64]) centers_batch = tf.gather(centers, label) #print(sess.run(centers_batch)) #print(sess.run(features)) #assert tf.shape(centers_batch) == tf.shape(features) diff = (1 - alpha) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(K.square(features - centers_batch)) return loss
def _assign_sub(self, ref, updates, indices=None): if indices is not None: if isinstance(ref, tf.Variable): return tf.scatter_sub(ref, indices, updates, use_locking=self._use_locking) elif isinstance(ref, resource_variable_ops.ResourceVariable): with tf.control_dependencies([ resource_variable_ops.resource_scatter_add( ref.handle, indices, -updates) ]): return ref.value() else: raise TypeError("did not expect type %r" % type(ref)) else: return tf.assign_sub(ref, updates, use_locking=self._use_locking)
def batch_norm_with_mask(x, is_training, mask, num_channels, name="bn", decay=0.9, epsilon=1e-3, data_format="NHWC"): shape = [num_channels] indices = tf.where(mask) indices = tf.cast(indices,tf.int32) indices = tf.reshape(indices, [-1]) with tf.compat.v1.variable_scope(name, reuse=None if is_training else True): offset = tf.compat.v1.get_variable( "offset", shape, initializer=tf.constant_initializer(0.0)) scale = tf.compat.v1.get_variable( "scale", shape, initializer=tf.constant_initializer(1.0)) offset = tf.boolean_mask(offset, mask) scale = tf.boolean_mask(scale, mask) moving_mean = tf.compat.v1.get_variable( "moving_mean", shape, trainable=False, initializer=tf.constant_initializer(0.0)) moving_variance = tf.compat.v1.get_variable( "moving_variance", shape, trainable=False, initializer=tf.constant_initializer(1.0)) if is_training: x, mean, variance = tf.compat.v1.nn.fused_batch_norm( x, scale, offset, epsilon=epsilon, data_format=data_format, is_training=True) mean = (1.0 - decay) * (tf.boolean_mask(moving_mean, mask) - mean) variance = (1.0 - decay) * (tf.boolean_mask(moving_variance, mask) - variance) update_mean = tf.scatter_sub(moving_mean, indices, mean, use_locking=True) update_variance = tf.compat.v1.scatter_sub( moving_variance, indices, variance, use_locking=True) with tf.control_dependencies([update_mean, update_variance]): x = tf.identity(x) else: masked_moving_mean = tf.boolean_mask(moving_mean, mask) masked_moving_variance = tf.boolean_mask(moving_variance, mask) x, _, _ = tf.nn.fused_batch_norm(x, scale, offset, mean=masked_moving_mean, variance=masked_moving_variance, epsilon=epsilon, data_format=data_format, is_training=False) return x
def _apply_sparse_shared(self, grad_values, grad_indices, var): shape = np.array(var.get_shape()) var_rank = len(shape) # For sparse case, we only update the accumulator representing the sparse # dimension. In this case SM3 is similar to isotropic adagrad but with # better bound (due to the max operator). # # We do not use the column accumulator because it will updated for # every gradient step and will significantly overestimate the gradient # square. While, the row accumulator can take advantage of the sparsity # in the gradients. Even if one implements the column accumulator - it # will result in a no-op because the row accumulators will have lower # values. # # Note that: We do not run this code paths for our experiments in our paper # as on TPU all the sparse gradients are densified. if var_rank > 1: accumulator = self.get_slot(var, "accumulator_" + str(0)) current_accumulator = tf.gather(accumulator, grad_indices) expanded_shape = tf.concat( [[tf.shape(current_accumulator)[0]], [1] * (var_rank - 1)], 0) current_accumulator = tf.reshape(current_accumulator, expanded_shape) current_accumulator += grad_values * grad_values else: accumulator = self.get_slot(var, "accumulator") current_accumulator = tf.scatter_add(accumulator, grad_indices, grad_values * grad_values) accumulator_inv_sqrt = tf.where(tf.greater(current_accumulator, 0), tf.rsqrt(current_accumulator), tf.zeros_like(current_accumulator)) scaled_g = (grad_values * accumulator_inv_sqrt) updates = [] with tf.control_dependencies([scaled_g]): if var_rank > 1: axes = list(range(1, var_rank)) dim_accumulator = tf.reduce_max(current_accumulator, axis=axes) updates = [ tf.scatter_update(accumulator, grad_indices, dim_accumulator) ] with tf.control_dependencies(updates): return tf.scatter_sub(var, grad_indices, self._learning_rate_tensor * scaled_g)
def center_loss_similarity(features, label, alfa, nrof_classes): ## center_loss on cosine distance =1 - similarity instead of the L2 norm, i.e. Euclidian distance ## normalisation as the embedding vectors in order to similarity distance features = tf.nn.l2_normalize(features, 1, 1e-10, name='feat_emb') nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) centers_cts = tf.get_variable('centers_cts', [nrof_classes], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) #centers_cts_init = tf.zeros_like(nrof_classes, tf.float32) label = tf.reshape(label, [-1]) centers_batch = tf.gather( centers, label ) #get the corresponding center of each element in features, the list of the centers is in the same order as the features #loss = tf.nn.l2_loss(features - centers_batch) ## 0.5*(L2 norm)**2, L2 norm is the Euclidian distance similarity_all = tf.matmul( features, tf.transpose(tf.nn.l2_normalize( centers_batch, 1, 1e-10))) ## dot prodoct, cosine distance, similarity of x and y similarity_self = tf.diag_part(similarity_all) loss_x = tf.subtract(1.0, similarity_self) loss = tf.reduce_sum( loss_x) ## sum the cosine distance of each vector/tensor diff = (1 - alfa) * (centers_batch - features) ones = tf.ones_like(label, tf.float32) centers_cts = tf.scatter_add( centers_cts, label, ones ) # counting the number of each class, the class is in the order of the [0,1,2,3,....] as initialzation centers_cts_batch = tf.gather(centers_cts, label) #centers_cts_batch_ext = tf.tile(centers_cts_batch, nrof_features) #centers_cts_batch_reshape = tf.reshape(centers_cts_batch_ext,[-1, nrof_features]) centers_cts_batch_reshape = tf.reshape(centers_cts_batch, [-1, 1]) diff_mean = tf.div(diff, centers_cts_batch_reshape) centers = tf.scatter_sub(centers, label, diff_mean) zeros = tf.zeros_like(label, tf.float32) center_cts_clear = tf.scatter_update(centers_cts, label, zeros) #return loss, centers, label, centers_batch, diff, centers_cts, centers_cts_batch, diff_mean,center_cts_clear, centers_cts_batch_reshape #return loss, centers, loss_x, similarity_all, similarity_self return loss, centers
def update_centers(features, labels, alpha): with tf.variable_scope('center', reuse=tf.AUTO_REUSE): centers = tf.get_variable('centers') labels = tf.reshape(labels, [-1]) # flatten centers_batch = tf.gather(centers, labels) # Gather center tensor by labels value order diff = centers_batch - features # L1 distance array between each of center and feature unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff centers = tf.scatter_sub(centers, labels, diff) return centers
def margin_center_loss(self, features, label, nrof_classes): nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) mloss = self.margin_loss(features, label, centers, 0.6) centers_batch = tf.gather(centers, label) diff = (1 - 0.6) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) with tf.control_dependencies([centers]): loss = tf.reduce_mean(tf.square(features - centers_batch)) return loss + mloss
def update_centers(features, labels, alpha): with tf.variable_scope('center', reuse=tf.AUTO_REUSE): centers = tf.get_variable('centers') labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) diff = centers_batch - features unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff centers = tf.scatter_sub(centers, labels, diff) return centers
def __init__(self, param, load_last_model = False): ''' param 包含了模型需要的所有超参数, 类实例,可用.访问其属性 ''' self.load_last_model = load_last_model self.sess = tf.Session() self.param = param self.keep_prob = tf.placeholder(tf.float32, name = 'keep_prob') self.inputs = tf.placeholder(dtype = tf.float32, shape = (None,28,28,1), name = 'inputs') self.labels = tf.placeholder(tf.int32, shape = (None,), name = 'labels') self.lr = tf.placeholder(tf.float32, (), 'param_learning_rate') self.alpha = tf.placeholder(tf.float32, (), 'center_learning_rate') self.lamb = tf.placeholder(tf.float32, (), 'lambda') self.labels_onehot = tf.one_hot(self.labels, 10, name = 'label_onehot') self.features, self.logits = self.inference() self.center = tf.Variable(initial_value = tf.random_normal(shape = (10,2), name = 'center_initializer'), trainable = False, name = 'center', dtype = tf.float32) self.center_of_centers = tf.reduce_mean(self.center, axis = 0) self.center_loss, self.softmax_loss, self.delta_center = self.loss() self.total_loss = self.softmax_loss + self.lamb * self.center_loss self.update_center = tf.scatter_sub(self.center, self.labels, self.alpha*self.delta_center) self.accuracy = self.acc() self.optimizer = tf.train.AdamOptimizer(self.lr, name = 'optimizer')\ .minimize(loss = self.total_loss) self.train_x, self.train_y,\ self.valid_x, self.valid_y,\ self.test_x, self.test_y = self.load_data() self.train_size = len(self.train_x) self.batch_size = self.param.batch_size tf.summary.scalar('train_softmax_loss', self.softmax_loss) tf.summary.scalar('train_center_loss', self.center_loss) tf.summary.scalar('train_total_loss', self.total_loss) # tf.summary.scalar('valid_total_loss', self.total_loss, collections = ['valid']) # tf.summary.scalar('valid_softmax_loss', self.softmax_loss, collections = ['valid']) # tf.summary.scalar('valid_center_loss', self.center_loss, collections = ['valid']) self.summaries = tf.summary.merge_all() # self.summaries_valid = tf.summary.merge_all(key = 'valid') self.writer = tf.summary.FileWriter(logdir = './/log//', graph = self.sess.graph) self.saver = tf.train.Saver()
def _get_center_loss(features, labels, alpha, num_classes): """get center loss and op for center update Arguments: features: Tensor,shape should be [batch_size, feature_length]. labels: Tensor,not one-hot code ,shape should be [batch_size]. alpha: between 0-1,control the learning rate of centor num_classes: int, same as the class_number Return: loss: Tensor, can add with softmax loss as the total loss. centers: Tensor,storing the centor of sample only be used when looking the centor centers_update_op: op used for update the centors need to run this op when training otherwise the centors wouldn't be updated """ # get the feature dimention such as 2208 len_features = features.get_shape()[1] # build up a Variable,shape is [num_classes, len_features] used to store the centor # set trainable=False is because the centor is not updated by gradient centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) # expand the label to one dimention,if the output is already one dimentoin, this action is not neccesery labels = tf.reshape(labels, [-1]) # get the centor of each sample in a mini-batch centers_batch = tf.gather(centers, labels) # calcu loss loss = tf.nn.l2_loss(features - centers_batch) diff = centers_batch - features ''' # get the frequence of same category in one mini-batch,refer to equ(4) in paper unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) ''' diff = alpha * diff centers_update_op = tf.scatter_sub(centers, labels, diff) return loss, centers, centers_update_op
def center_loss(features, labels, num_classes, alpha=0.99, weight=1.0): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ num_features = features.get_shape()[1] centers = tf.get_variable('centers', [num_classes, num_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) diffs = (1 - alpha) * (centers_batch - features) centers = tf.scatter_sub(centers, labels, diffs) center_loss_mean = tf.reduce_mean(tf.square(features - centers_batch)) tf.add_to_collection('losses', weight * center_loss_mean) return center_loss_mean, centers
def center_loss_angel(features, label, alfa, nrof_classes): nrof_features = features.get_shape()[1] centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(1), trainable=False) centers_unit = tf.nn.l2_normalize(centers,dim=1) features_unit = tf.nn.l2_normalize(centers,dim=1) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers_unit, label) cos_theta = tf.reduce_sum( tf.multiply(features_unit , centers_batch),axis=1) diff = (1 - alfa) * (centers_batch - features) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(tf.square( cos_theta)) return loss, centers
def loss_center(self): print('hahaha') alpha = self.cfg.alpha centers_batch = tf.gather(self.centers, self.pl_labels) #(Batch, n_features) numerator = centers_batch - self.deep_features #(Batch, n_features) _, idx, count = tf.unique_with_counts(self.pl_labels) denominator = tf.gather(count, idx) #(Batch) denominator = tf.cast(denominator, tf.float32) #(Batch) denominator = tf.reshape(denominator, [-1, 1]) diff = tf.divide(numerator, denominator) * alpha #(Batch, n_features) self.centers = tf.scatter_sub(self.centers, self.pl_labels, diff) #(n_classes, n_features) square = tf.square(self.deep_features - centers_batch) loss_batch = tf.reduce_sum(square, axis=1) loss = tf.reduce_mean(loss_batch) result = tf.scalar_mul(self.cfg.lmbda, loss) return result
def get_center_loss3(self, labels, centers, bottleneck, alpha=0.5): """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" (http://ydwen.github.io/papers/WenECCV16.pdf) """ labels = tf.where(tf.equal(labels, 1.0))[:, 1] # onehot to dense labels = tf.cast(labels, tf.int64) labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) loss = tf.nn.l2_loss(bottleneck - centers_batch) diff = centers_batch - bottleneck unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff = diff / tf.cast((1 + appear_times), tf.float32) diff = alpha * diff update_op = tf.scatter_sub(centers, labels, diff) return loss, update_op
def get_margin_loss(labels, features, num_classes, alpha=0.1, training=True): len_features = features.get_shape()[1] centers = tf.get_variable('centers', [num_classes, len_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) labels = tf.reshape(labels, [-1]) centers_batch = tf.gather(centers, labels) center_dist = tf.reduce_sum(tf.abs(tf.subtract(features, centers_batch)), axis=1) diff = centers_batch - features unique_label, unique_idx, unique_count = tf.unique_with_counts(labels) appear_time = tf.gather(unique_count, unique_idx) appear_time = tf.reshape(appear_time, [-1, 1]) diff = diff / tf.cast((1 + appear_time), tf.float32) diff = alpha * diff centers_update_up = tf.scatter_sub(centers, labels, diff) feature_center_pair_dist = tf.reduce_sum(tf.abs( tf.subtract(tf.expand_dims(features, 1), tf.expand_dims(centers, 0))), axis=2) feature_center_dist = tf.subtract(tf.expand_dims(center_dist, 1), feature_center_pair_dist) feature_center_labels_equal = tf.equal( tf.expand_dims(labels, 1), tf.expand_dims(tf.constant(list(range(num_classes)), dtype=tf.int64), 0)) mask_feature_center = tf.to_float( tf.logical_not(feature_center_labels_equal)) margin_loss = tf.reduce_sum( tf.nn.softplus(feature_center_dist) * mask_feature_center) / tf.reduce_sum(mask_feature_center) return margin_loss, centers, centers_update_up
def center_loss(embedding, label, num_classes, alpha=0.1, scope="center_loss"): r"""Center-Loss as described in the paper `A Discriminative Feature Learning Approach for Deep Face Recognition` <http://ydwen.github.io/papers/WenECCV16.pdf> by Wen et al. Args: embedding (tf.Tensor): features produced by the network label (tf.Tensor): ground-truth label for each feature num_classes (int): number of different classes alpha (float): learning rate for updating the centers Returns: tf.Tensor: center loss """ nrof_features = embedding.get_shape()[1] centers = tf.get_variable('centers', [num_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) label = tf.reshape(label, [-1]) centers_batch = tf.gather(centers, label) diff = (1 - alpha) * (centers_batch - embedding) centers = tf.scatter_sub(centers, label, diff) loss = tf.reduce_mean(tf.square(embedding - centers_batch), name=scope) return loss
def island_loss(features, label, alpha, nrof_classes, nrof_features, lamda1=10): """Center loss based on the paper "Island Loss for Learning Discriminative Features in Facial Expression Recognition" (https://github.com/SeriaZheng/EmoNet/blob/master/loss_function/loss_paper/Island_loss.pdf) """ # 生成可以共享的变量centers with tf.variable_scope('center', reuse=True): centers = tf.get_variable('centers') label = tf.reshape(label, [-1]) # 取出对应label下对应的center值,注意label里面的值可能会重复,因为一个标签下有可能会出现多个人 centers_batch = tf.gather(centers, label) # 求特征点到中心的距离并乘以一定的系数,diff1为center loss diff1 = centers_batch - features # 获取一个batch中同一样本出现的次数,这里需要理解论文中的更新公式 unique_label, unique_idx, unique_count = tf.unique_with_counts(label) appear_times = tf.gather(unique_count, unique_idx) appear_times = tf.reshape(appear_times, [-1, 1]) diff1 = diff1 / tf.cast((1 + appear_times), tf.float32) diff1 = alpha * diff1 # diff2为island loss的center更新项 diff2 = tf.get_variable('diff2', [nrof_classes, nrof_features], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) for i in range(nrof_classes): for j in range(nrof_classes): if i!=j: diff2 = tf.scatter_add(diff2, i, (tf.gather(centers, i) / tf.sqrt( tf.reduce_sum(tf.square(tf.gather(centers, i)))) * tf.sqrt( tf.reduce_sum(tf.square(tf.gather(centers, j))))) - tf.multiply( (tf.reduce_sum( tf.multiply(tf.gather(centers, i), tf.gather(centers, j))) / tf.sqrt( tf.reduce_sum(tf.square(tf.gather(centers, i)))) * tf.pow(tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, j)))), 3)), tf.gather(centers, j))) diff2 = diff2 * lamda1 / (nrof_classes - 1) diff2 = alpha * diff2 # 求center loss,这里是将l2_loss里面的值进行平方相加,再除以2,并没有进行开方 loss1 = tf.nn.l2_loss(features - centers_batch) # 求island loss loss2 = tf.zeros(1) for i in range(nrof_classes): for j in range(nrof_classes): if i!=j: loss2 = tf.add(tf.add(tf.reduce_sum(tf.multiply(tf.gather(centers, i), tf.gather(centers, j))) / ( tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, i)))) * tf.sqrt(tf.reduce_sum(tf.square(tf.gather(centers, j))))), tf.ones(1)), loss2) loss2 = lamda1 * loss2 loss = tf.add(loss1,loss2) # 更新center,输出是将对应于label的centers减去对应的diff,如果同一个标签出现多次,那么就减去多次(diff1与centers维度不同) centers = tf.scatter_sub(centers, label, diff1) # diff2维度与centers相同可以直接减 centers = tf.subtract(centers, diff2) return loss, centers
def scatter_subtract(variables1, variables2): shape = utils.get_tensor_size(variables2) values, indices = tf.nn.top_k(-1 * variables1, tf.cast(k * shape / 100, tf.int32)) return tf.scatter_sub(variables2, indices, values)