def mycrossentropy(y_true, y_pred, e=0.1): loss1 = K.categorical_crossentropy(y_true, y_pred) loss2 = K.categorical_crossentropy( K.ones_like(y_pred) / nb_classes, y_pred) # K.ones_like(y_pred) / nb_classes return (1 - e) * loss1 + e * loss2
def adj_loss(y_true, y_pred): Y_pred = k.argmax(y_pred) Y_true = k.argmax(y_true) adj = SimpleAdjMat(batch_size, pixel_distance) adj_pred = adj.adj_mat(Y_pred, Y_true) adj_pred = tf.norm(tensor=adj_pred, ord=1, axis=1) adj_true = adj.adj_mat(Y_true, Y_pred) adj_true = tf.norm(tensor=adj_true, ord=1, axis=1) # L2 quad = (adj_pred - adj_true) quad = quad * quad sqrt = k.sqrt(quad) global adj_loss_value adj_loss_value = lambda_loss * k.mean(sqrt) global categ_loss categ_loss = k.categorical_crossentropy(y_true, y_pred) loss = adj_loss_value + categ_loss return loss
def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): """Computes the categorical crossentropy loss. Args: y_true: tensor of true targets. y_pred: tensor of predicted targets. from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. Returns: Categorical crossentropy loss value. """ y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) def _smooth_labels(): num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype) return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, lambda: y_true) return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def prepare_simple_model(input_tensor, loss_name, target): axis = 1 if K.image_data_format() == 'channels_first' else -1 loss = None num_channels = None activation = None if loss_name == 'sparse_categorical_crossentropy': loss = lambda y_true, y_pred: K.sparse_categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = np.amax(target) + 1 activation = 'softmax' elif loss_name == 'categorical_crossentropy': loss = lambda y_true, y_pred: K.categorical_crossentropy( # pylint: disable=g-long-lambda y_true, y_pred, axis=axis) num_channels = target.shape[axis] activation = 'softmax' elif loss_name == 'binary_crossentropy': loss = lambda y_true, y_pred: K.binary_crossentropy(y_true, y_pred) # pylint: disable=unnecessary-lambda num_channels = target.shape[axis] activation = 'sigmoid' predictions = Conv2D(num_channels, 1, activation=activation, kernel_initializer='ones', bias_initializer='ones')(input_tensor) simple_model = keras.models.Model(inputs=input_tensor, outputs=predictions) simple_model.compile(optimizer='rmsprop', loss=loss) return simple_model
def categorical_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0): """Computes the categorical crossentropy loss. Args: y_true: tensor of true targets. y_pred: tensor of predicted targets. from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. Returns: Categorical crossentropy loss value. """ y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) label_smoothing = ops.convert_to_tensor(label_smoothing, dtype=K.floatx()) def _smooth_labels(): num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype) return y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) y_true = smart_cond.smart_cond(label_smoothing, _smooth_labels, lambda: y_true) return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def wcce(y_true, y_pred): Kweights = tf.constant(weights) if not tf.is_tensor(y_pred): y_pred = tf.constant(y_pred) y_true = tf.cast(y_true, y_pred.dtype) return K.categorical_crossentropy( y_true, y_pred, from_logits=True) * K.sum(y_true * Kweights, axis=-1)
def cat_loss(y_true, y_pred): logits = layer.output loss = k.categorical_crossentropy(y_true, logits, from_logits=True) mask = k.sum(y_true, -1) mask = mask > 0 loss = tf.boolean_mask(loss, mask) loss = k.mean(loss, axis=None, keepdims=False) return loss
def masked_categorical_crossentropy(y_true, y_pred): """ Categorical/softmax cross-entropy loss with masking """ mask = y_true[:, -1] y_true = y_true[:, :-1] loss = K.categorical_crossentropy(target=y_true, output=y_pred, from_logits=True) mask = K.cast(mask, dtype=np.float32) loss *= mask return K.mean(loss, axis=-1)
def build(self, features, label, input_shape=None): backend = self.backend weights = self.weights include_top = self.include_top pooling = self.pooling layers = self.layers classes = self.classes # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) img_input = layers.Input(tensor=features, shape=input_shape) x = self.block1_conv1(img_input) x = self.block1_conv2(x) x = self.block1_pool(x) x = self.block2_conv1(x) x = self.block2_conv2(x) x = self.block2_pool(x) x = self.block3_conv1(x) x = self.block3_conv2(x) x = self.block3_conv3(x) x = self.block3_conv4(x) x = self.block3_pool(x) x = self.block4_conv1(x) x = self.block4_conv2(x) x = self.block4_conv3(x) x = self.block4_conv4(x) x = self.block4_pool(x) x = self.block5_conv1(x) x = self.block5_conv2(x) x = self.block5_conv3(x) x = self.block5_conv4(x) x = self.block5_pool(x) if include_top: # Classification block x = self.flatten(x) x = self.fc1(x) x = self.fc2(x) x = self.predict(x) else: if pooling == 'avg': x = self.pool(x) elif pooling == 'max': x = self.pool(x) score_array = K.categorical_crossentropy(x, label) return tf.reduce_mean(score_array)
def build_model(self): # x: [batch_size, self.max_seq_len] # y: [batch_size] x = tf.nn.embedding_lookup(self.word_embedding, self.x) # x: [batch_size, self.max_seq_len, word_embedding_dim] label_embeddings = tf.nn.embedding_lookup(self.label_embedding, self.label_embedding_id) y = self.y # x_emb x_emb = tf.reduce_max(x, axis=1) # ---------- attention -------------- if self.use_attention: with tf.name_scope('attention'): x_lbl_fea = self.attention_layer(x, label_embeddings, self.word_embedding_dim, self.label_embedding_dim, self.seqlen) else: #x_lbl_fea = tf.reduce_mean(x, axis=1) x_lbl_fea = x_emb # ---------- supervised classification output ---------- with tf.name_scope('output'): fea_dim = x_lbl_fea.get_shape().as_list()[-1] y_ = self.classification_layer(x_lbl_fea, label_embeddings, fea_dim, self.label_embedding_dim) # ---------- graph context loss --------------- gl1 = tf.nn.embedding_lookup(self.label_embedding, self.gl1) if self.neg_samp: gl2 = tf.nn.embedding_lookup( tf.get_variable('context_embedding', [self.label_num, self.label_embedding_dim], initializer=self.weight_initializer), self.gl2) l_gy = tf.multiply(gl1, gl2) g_loss = tf.reduce_mean(-tf.log( tf.sigmoid(tf.multiply(tf.reduce_sum(l_gy, axis=1), self.gy)))) else: l_gy = tf.layers.dense(gl1, self.label_embedding_dim, activation=tf.nn.softmax, use_bias=False) g_loss = tf.reduce_mean( categorical_crossentropy( tf.one_hot(self.gl2, self.label_embedding_dim), l_gy)) # ---------- classification loss --------------- loss = tf.reduce_mean( tf.multiply( tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_), self.label_prop)) # if self.use_propensity: # loss = tf.losses.sigmoid_cross_entropy(y, y_, weights=tf.expand_dims(self.label_prop, -1)) # else: # loss = tf.losses.sigmoid_cross_entropy(y, y_) return x_emb, tf.sigmoid(y_), loss, g_loss
def call(self, inputs): ''' CLASSIFIER LOSS ''' label = inputs[3] reversed_label = inputs[4] GP_sm = inputs[0] GP_op = inputs[1] same_gender_loss = K.categorical_crossentropy(label, GP_sm) opposite_gender_loss = K.categorical_crossentropy(reversed_label, GP_op) classifier_loss = same_gender_loss + opposite_gender_loss ''' FACE MATCHER LOSS ''' FM_img = inputs[5] FM_sm = inputs[2] face_matcher_loss = K.square(K.sqrt(K.sum(K.square(FM_sm - FM_img), axis=1, keepdims=True)+ K.epsilon())) ''' TOTAL LOSS ''' total_loss = classifier_loss + face_matcher_loss self.add_loss(total_loss, inputs=inputs) return total_loss # define the loss as output
def sparse_competitive_crossentropy(y_true_label, y_pred, C, K): y_true_label = tf.reshape(y_true_label, [-1]) y_pred = ops.convert_to_tensor_v2_with_dispatch(y_pred) y_true = tf.one_hot(y_true_label, C) y_true = math_ops.cast(y_true, y_pred.dtype) y_true = tf.repeat(y_true, repeats=K, axis=1) y_pred2 = tf.stop_gradient(y_pred) y_pred2_max = tf.reduce_max(y_pred2, axis=1, keepdims=True) y_pred2 = y_pred2 - y_pred2_max y_true = tf.multiply(y_true, tf.exp(y_pred2)) y_true = tf.linalg.normalize(y_true, axis=1, ord=1)[0] y_true = tf.stop_gradient(y_true) return backend.categorical_crossentropy(y_true, y_pred, from_logits=True)
def my_str_cross_entropy_loss(target, y_pred): ''' 字符比较,每个字符用crossentropy loss,所有字符求平均 ''' str_loss_list = [] for i in range(plate_str_length): # 计算出每个字符的loss str_loss = K.categorical_crossentropy(target, y_pred, from_logits=False) str_loss_list.append(str_loss) # 计算loss平均值 mean_loss = K.mean(str_loss_list) return mean_loss
def adj_loss(y_true, y_pred): Y_pred = k.argmax(y_pred) Y_true = k.argmax(y_true) adj0 = SingleAdjMat(batch_size, 0, pixel_distance) adj1 = SingleAdjMat(batch_size, 1, pixel_distance) adj_pred0 = adj0.adj_mat(Y_pred, Y_true) adj_pred0 = tf.norm(tensor=adj_pred0, ord=1, axis=1) adj_pred1 = adj1.adj_mat(Y_pred, Y_true) adj_pred1 = tf.norm(tensor=adj_pred1, ord=1, axis=1) adj_true0 = adj0.adj_mat(Y_true, Y_pred) adj_true0 = tf.norm(tensor=adj_true0, ord=1, axis=1) adj_true1 = adj1.adj_mat(Y_true, Y_pred) adj_true1 = tf.norm(tensor=adj_true1, ord=1, axis=1) # L2 quad0 = (adj_pred0 - adj_true0) quad0 = quad0 * quad0 quad1 = (adj_pred1 - adj_true1) quad1 = quad1 * quad1 global adj_loss_value tmp0 = k.mean(quad0) tmp0 = k.sum(tmp0) # global adj_loss_value # tmp0 = k.mean(quad0, keepdims=True) # tmp0 = k.sum(tmp0, axis=0) # tmp0 = tmp0 * vector_weights # tmp0 = k.sum(tmp0, axis=0) tmp1 = k.mean(quad1) tmp1 = k.sum(tmp1) tmp = tmp0 + tmp1 adj_loss_value = lambda_loss * tmp global categ_loss categ_loss = k.categorical_crossentropy(y_true, y_pred) loss = adj_loss_value + categ_loss return loss
def adj_loss(y_true, y_pred): Y_pred = k.argmax(y_pred) Y_true = k.argmax(y_true) adj = WeightedAdjMat(batch_size, pixel_distance) adj_pred = adj.adj_mat(Y_pred, Y_true) adj_pred = tf.norm(tensor=adj_pred, ord=1, axis=1) adj_true = adj.adj_mat(Y_true, Y_pred) adj_true = tf.norm(tensor=adj_true, ord=1, axis=1) # L1 mod = k.abs(adj_pred - adj_true) global adj_loss_value adj_loss_value = lambda_loss * k.mean(mod) global categ_loss categ_loss = k.categorical_crossentropy(y_true, y_pred) loss = adj_loss_value + categ_loss return loss
def adj_loss(y_true, y_pred): Y_pred = k.argmax(y_pred) Y_true = k.argmax(y_true) adj = adj_mat_func(batch_size) adj_pred = adj.adj_mat(Y_pred, Y_true) adj_pred = tf.norm(tensor=adj_pred, ord=1, axis=1) adj_true = adj.adj_mat(Y_true, Y_pred) adj_true = tf.norm(tensor=adj_true, ord=1, axis=1) # L2 quad = (adj_pred - adj_true) quad = quad * quad global adj_loss_value adj_loss_value = lambda_loss * k.mean(quad) global categ_loss categ_loss = k.categorical_crossentropy(y_true, y_pred) loss = adj_loss_value + categ_loss return loss
def pre_build_model(self): # x_feature_id: [batch_size, max_seq_len] # x_feature_v: [batch_size, max_seq_len] # y: [batch_size] # # label embeddings label_embeddings = tf.nn.embedding_lookup(self.label_embedding, self.label_embedding_id) # y y = self.y #y = tf.multiply(self.y, self.label_prop) # ---------- x ------------- word_embeddings_padding = tf.concat((tf.constant( 0, dtype=tf.float32, shape=[1, self.word_embedding_dim ]), self.word_embedding), axis=0) x_feature_id = self.x_feature_id x = tf.nn.embedding_lookup(word_embeddings_padding, x_feature_id) # x: [batch_size, max_seq_len, word_embedding_dim] # normalize feature_v #feature_v = tf.divide(self.x_feature_v, tf.norm(self.x_feature_v, 2, axis=-1, keepdims=True)) #feature_v = tf.divide(self.x_feature_v, tf.reduce_sum(self.x_feature_v, -1, keepdims=True)) feature_v = self.x_feature_v feature_v = tf.layers.batch_normalization(feature_v) #feature_v = tf.contrib.layers.dropout(feature_v, keep_prob=0.5) if self.use_attention: with tf.name_scope('attention'): att_weight = self.attention_layer(x, label_embeddings, self.word_embedding_dim, self.label_embedding_dim, self.seqlen) # att_weight: [batch_size, max_seq_len) feature_v = tf.multiply(feature_v, att_weight) # ---------- feature embeddings ----------- x_emb = tf.reduce_sum(tf.multiply(x, tf.expand_dims(feature_v, -1)), axis=1) # x_emb: [batch_size, word_embedding_dim] # label_embeddings: [batch_size, label_embedding_dim] x_label_concat = tf.concat([x_emb, label_embeddings], axis=-1) # ---------- output layer ---------- y_hidden = tf.layers.dense(x_label_concat, self.num_classify_hidden, activation=tf.nn.relu, use_bias=True, name='pre_dense_0') #y_hidden = tf.contrib.layers.dropout(y_hidden, keep_prob=0.5) y_hidden = tf.layers.batch_normalization(y_hidden) #y_hidden = tf.contrib.layers.dropout(y_hidden, keep_prob=0.5) y_out = tf.layers.dense(y_hidden, 1, activation=None, name='pre_dense_1') loss = tf.reduce_sum( tf.multiply( tf.nn.sigmoid_cross_entropy_with_logits( labels=y, logits=tf.squeeze(y_out)), self.label_prop)) #loss = tf.nn.l2_loss(y - y_out, name='l2_loss') # ---------- graph context loss --------------- if self.use_graph: with tf.variable_scope('graph_embedding', reuse=tf.AUTO_REUSE): gl1 = tf.nn.embedding_lookup(self.label_embedding, self.gl1) if self.neg_samp: gl2 = tf.nn.embedding_lookup( tf.get_variable( 'context_embedding', [self.label_num, self.label_embedding_dim], initializer=self.weight_initializer), self.gl2) l_gy = tf.multiply(gl1, gl2) g_loss = tf.reduce_mean(-tf.log( tf.sigmoid( tf.multiply(tf.reduce_sum(l_gy, axis=1), self.gy))) ) else: l_gy = tf.layers.dense(gl1, self.label_embedding_dim, activation=tf.nn.softmax, use_bias=False) g_loss = tf.reduce_mean( categorical_crossentropy( tf.one_hot(self.gl2, self.label_embedding_dim), l_gy)) else: g_loss = 0 # ---------- get feature_gradient ------------- word_grads = tf.gradients(loss, [self.word_embedding])[0] word_abs_grads = tf.abs(word_grads) sum_word_grads = tf.reduce_sum(word_abs_grads, axis=-1) #print 'shape of sum_word_grads' #print sum_word_grads.get_shape().as_list() return x_emb, y_out, sum_word_grads, loss, g_loss
def categorical_crossentropy(y_true, y_pred, from_logits=False, **kwargs): return K.expand_dims( K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits))
def build_model(batch_size, items_num, hidden_size, steps): ''' :param batch_size: :param items_num: :param hidden_size: :param steps: gnn propogation steps :return: ''' # 长度固定(一个batch_size的各异的商品数), session点击序列的商品id保持不动 items = keras.layers.Input(shape=(None, ), name='items', dtype="int32") # 对应的索引, 与items相对(点击序列长度) seq_index = keras.layers.Input(shape=(None, ), name="seq_index", dtype="int32") # 与seq_index相对,每个session序列重新编码之后最大的索引 last_index = keras.layers.Input(shape=(1, ), name="last_index", dtype="int32") # 入度的邻接矩阵 adj_in = keras.layers.Input(shape=(None, None), name="adj_in", dtype="float32") # 出度的邻接矩阵, 一个session序列对应一个邻接矩阵 adj_out = keras.layers.Input(shape=(None, None), name="adj_out", dtype="float32") # session原始点击序列的mask序列 mask = keras.layers.Input(shape=(None, 1), name="mask", dtype="float32") # session对应的下一次点击商品id label = keras.layers.Input(shape=(1, ), name="label", dtype="int32") # inputs = [items, seq_index, last_index, adj_in, adj_out, mask, label] # 构建商品的embedding矩阵 items_embedding = keras.layers.Embedding( input_dim=items_num, output_dim=hidden_size, embeddings_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=1e-4, seed=seed)) # 定义个可训练的tensor, shape: (items_num - 1, hidden_size) class Bias(keras.layers.Layer): def build(self, input_shape): self.y_emb = self.add_weight(shape=(items_num - 1, hidden_size), initializer='zeros', dtype=tf.float32, name='vocab_emb') self.built = True def call(self, x): return tf.matmul(x, self.y_emb, transpose_b=True) # 获取原始session的商品id的embdding矩阵 items_emb = items_embedding( items) # (batch_size, uniq_max, hidden_size) # 输入的长度是确定的 init_state = items_emb item_in = keras.layers.Dense( hidden_size) # 训练出(hidden_size, hidden_size)的权重矩阵 item_out = keras.layers.Dense(hidden_size) # 共享, 从而学习全局权重矩阵 seq_dense = keras.layers.Dense(hidden_size) last_dense = keras.layers.Dense(hidden_size) # gnn传播的步数 for i in range(steps): init_state = keras.layers.Lambda( lambda x: tf.reshape(x, [batch_size, -1, hidden_size]))( init_state) # (batch_size*uniq_max, hidden_size) # 对商品序列的embedding矩阵进行线性变换 # (batch_size, uniq_max, uniq_max) * (batch_size, uniq_max, hidden_size) => (batch_size, uniq_max, hidden_size) state_in = item_in(init_state) # (batch_size, uniq_max, hidden_size) state_out = item_out(init_state) # (batch_size, uniq_max, hidden_size) # 与邻接矩阵进行矩乘法 分别获取[batch_size, uniq_max, hidden_size] state_adj_in = keras.layers.Lambda(lambda x: tf.matmul(x[0], x[1]))( [adj_in, state_in]) state_adj_out = keras.layers.Lambda(lambda x: tf.matmul(x[0], x[1]))( [adj_out, state_out]) # 进行拼接,作为GRU的输入[batch_size, uniq_max, 2 * hidden_size] gru_input = keras.layers.Lambda( lambda x: tf.concat([x[0], x[1]], axis=2))( [state_adj_in, state_adj_out]) # 缩减维度,进行一步gru迭代[batch_size * uniq_max, 2 * hidden_size] gru_input = keras.layers.Lambda( lambda x: tf.reshape(x, [-1, 2 * hidden_size]))(gru_input) # 扩展维度, 在时间步上进行填充(batch_size * uniq_max, 1, 2 * hidden_size) gru_input = keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=1))( gru_input) # 经过GRU, 得到(batch_size, hidden_size) _, init_state = keras.layers.GRU(hidden_size, return_sequences=True, return_state=True)(gru_input) final_state = init_state # (batch_size * uniq_max, hidden_size) # 从获取每一行session点击序列的索引 seq_reshape = keras.layers.Lambda(lambda x: tf.reshape(x, [-1]))( seq_index) # (batch_size * seq_len) seq_len是点击序列长度 seq = keras.layers.Lambda(lambda x: tf.gather(final_state, x))( seq_reshape) # (batch_size * seq_len, hidden_size) seq = keras.layers.Lambda( lambda x: tf.reshape(x, [batch_size, -1, hidden_size]))( seq) # (batch_size, seq_len, hidden_size) # last = keras.layers.Lambda(lambda x: tf.squeeze(tf.gather(final_state, x)))(last_index) # (batch_size, hidden_size) last = keras.layers.Lambda(lambda x: tf.gather(final_state, x))( last_index) # (batch_size, 1, hidden_size) # 注意使用tf.squeeze删除维度为1,会造成张量的rank消息 its rank is undefined, but the layer requires a defined rank. last = keras.layers.Lambda(lambda x: tf.reshape(x, [-1, hidden_size]))( last) seq_fc = seq_dense(seq) # (batch_size, seq_len, hidden_size) last_fc = last_dense(last) # (batch_size, hidden_size) add = keras.layers.Add()([seq_fc, last_fc]) # (batch_size, seq_len, hidden_size) add_sigmoid = keras.layers.Lambda(lambda x: tf.sigmoid(x))(add) weights = keras.layers.Dense(1)(add_sigmoid) # (batch_size, seq_len, 1) weights = keras.layers.Multiply()([weights, mask ]) # (batch_size, seq_len, 1) 对应元素相乘 weights_mask = keras.layers.Multiply()( [seq, weights]) # (batch_size, seq_len, hidden_size) global_attention = keras.layers.Lambda( lambda x: tf.reduce_sum(weights_mask, axis=1))( weights_mask) # (batch_size, hidden_size) final_attention = keras.layers.Lambda( lambda x: tf.concat([x[0], x[1]], axis=1))( [global_attention, last]) # (batch_size, 2*hidden_size) final_attention_fc = keras.layers.Dense(hidden_size)( final_attention) # (batch_size, hidden_size) # 现在需要定义个可训练的tensor, shape: (items_num - 1, hidden_size) logits = Bias()(final_attention_fc) # (batch_size, items_num - 1) model = keras.models.Model( inputs=[items, seq_index, last_index, adj_in, adj_out, mask], outputs=logits) # 计算损失函数 loss = K.categorical_crossentropy(target=label, output=logits, from_logits=True) model.add_loss(loss) model.compile(optimizer=keras.optimizers.SGD(1e-3)) return model
def weighted_categorical_cross_entropy(y_true, y_pred, class_weights): return tf.math.reduce_max(y_true * class_weights, axis=-1) * K.categorical_crossentropy( y_true, y_pred)
def am_softmax_loss(y_true, y_pred, scale=30, margin=0.35): # NOTE 预测出来的x就是归一化后的,并且W也是归一化后的,所以y_pred就是cos(𝜃) y_pred = (y_true * (y_pred - margin) + (1 - y_true) * y_pred) * scale return K.categorical_crossentropy(y_true, y_pred, from_logits=True)
def custom_loss(y_true, y_pred): return (K.categorical_crossentropy(y_true, y_pred, from_logits=True))
def categorical_crossentropy(y_true, y_pred, from_logits=False): return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def masked_categorical_crossentropy(y_true, y_pred): #mask = 1. - K.cast(K.equal(K.argmax(y_true, axis=-1), 0), K.floatx()) return categorical_crossentropy(y_true, y_pred)
def categorical_crossentropy_with_logits(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred, from_logits=True)
''' from tensorflow.python.keras import backend as K data_train, data_test = tf.keras.datasets.mnist.load_data() X, Y = data_train plt.imshow(np.concatenate([X[0, :, :, np.newaxis]]*3, axis=-1)) tf.reset_default_graph() tf.get_default_graph().get_operations() X = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1]) X = tf.layers.conv2d(X, 8, kernel_size=3, padding='SAME', strides=(2, 2), use_b ais=True, name='1') # None x 14 x 14 x 8 X = tf.layers.conv2d(X, 16, kernel_size=3, padding='SAME', straides=(2, 2), use_brais=True, name='2') # None x 7 x 7 x 16 X = tf.nn.relu(X) X = tf.layers.flatten(X) # None x 784 X = tf.layers.dense(X, 64, name='dense1') #None x 64 X = tf.nn.relu(X) X = tf.layers.dense(X, 64, name='dense2') #None x 64 X = tf.nn.relu(X) X = tf.layers.dense(X, 10, name='cls') #None x 10 -- logit X = tf.nn.softmax(X, axis=-1) # None x 10 -- predictions sess=tf.Session() y = tf.palceholder(dtype=tf.int64,shape=[None]) loss = K.categorical_crossentropy(target=y, output=x, from_logits=False)
def summed_categorical_crossentropy(y_true, y_pred): """ Negative log likelihood of categorical distribution """ return K.sum(K.categorical_crossentropy(y_true, y_pred), axis=-1)
def categorical_crossentropy(y_true, y_pred, from_logits=False): return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
def build_model(self): # x_feature_id: [batch_size, max_seq_len] # x_feature_v: [batch_size, max_seq_len] # y: [batch_size] # # label embeddings label_embeddings = tf.nn.embedding_lookup(self.label_embedding, self.label_embedding_id) # y y = self.y #y = tf.multiply(self.y, self.label_prop) # ---------- x ------------- word_embeddings_padding = tf.concat((tf.constant( 0, dtype=tf.float32, shape=[1, self.word_embedding_dim ]), self.word_embedding), axis=0) x = tf.nn.embedding_lookup(word_embeddings_padding, self.x_feature_id) # x: [batch_size, max_seq_len, word_embedding_dim] #feature_v = self.gaussian_noise_layer(tf.cast(feature_v, dtype=tf.float32)) feature_v = self.x_feature_v feature_v = tf.layers.batch_normalization(feature_v) if self.use_attention: with tf.name_scope('attention'): att_weight = self.attention_layer(x, label_embeddings, self.word_embedding_dim, self.label_embedding_dim, self.seqlen) # att_weight: [batch_size, max_seq_len) feature_v = tf.multiply(feature_v, att_weight) # ---------- feature embeddings ----------- x_emb = tf.reduce_sum(tf.multiply(x, tf.expand_dims(feature_v, -1)), axis=1) # x_emb: [batch_size, word_embedding_dim] # label_embeddings: [batch_size, label_embedding_dim] # feature_label_embeddings = tf.reduce_sum(tf.nn.embedding_lookup( word_embeddings_padding, self.label_active_feature), axis=1) x_label_concat = tf.concat([x_emb, label_embeddings], axis=-1) x_label_concat = tf.concat([x_label_concat, feature_label_embeddings], axis=-1) # ---------- output layer ---------- #y_hidden = tf.layers.dense(x_label_concat, self.num_classify_hidden, activation=tf.nn.relu, use_bias=True, kernel_regularizer=tf.contrib.layers.l2_regularizer, name='dense_0') weight_1 = tf.get_variable('train_weight_1', [ self.word_embedding_dim * 2 + self.label_embedding_dim, self.num_classify_hidden ], initializer=self.weight_initializer) y_hidden = tf.nn.relu(tf.matmul(x_label_concat, weight_1)) #y_hidden = tf.contrib.layers.dropout(y_hidden, keep_prob=0.5) y_hidden = tf.layers.batch_normalization(y_hidden) weight_2 = tf.get_variable('train_weight_2', [self.num_classify_hidden, 2], initializer=self.weight_initializer) y_out = tf.matmul(y_hidden, weight_2) loss = tf.losses.softmax_cross_entropy( tf.one_hot(tf.cast(y, dtype=tf.int32), 2), logits=y_out, weights=self.label_prop) + tf.nn.l2_loss(weight_1) + tf.nn.l2_loss( weight_2) #y_out = tf.layers.dense(y_hidden, 1, activation=None, kernel_regularizer=tf.contrib.layers.l2_regularizer, name='dense_1') #loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=tf.squeeze(y_out)) + tf.reduce_sum(tf.losses.get_regularization_losses()) #loss = tf.reduce_sum(tf.multiply(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=tf.squeeze(y_out)), self.label_prop)) + tf.nn.l2_loss(weight_1) + tf.nn.l2_loss(weight_2) #loss = tf.nn.l2_loss(y - y_out, name='l2_loss') # ---------- graph context loss --------------- if self.use_graph: gl1 = tf.nn.embedding_lookup(self.label_embedding, self.gl1) if self.neg_samp: gl2 = tf.nn.embedding_lookup( tf.get_variable('context_embedding', [self.label_num, self.label_embedding_dim], initializer=self.weight_initializer), self.gl2) l_gy = tf.multiply(gl1, gl2) g_loss = tf.reduce_mean(-tf.log( tf.sigmoid( tf.multiply(tf.reduce_sum(l_gy, axis=1), self.gy)))) else: l_gy = tf.layers.dense(gl1, self.label_embedding_dim, activation=tf.nn.softmax, use_bias=False) g_loss = tf.reduce_mean( categorical_crossentropy( tf.one_hot(self.gl2, self.label_embedding_dim), l_gy)) else: g_loss = 0 # ---------- get feature_gradient ------------- # word_grads = tf.gradients(loss, [self.word_embedding])[0] # sum_word_grads = tf.sparse_reduce_sum(word_grads, axis=-1) # print 'shape of sum_word_grads' # print sum_word_grads.get_shape().as_list() return x_emb, y_out, loss, g_loss
def categorical_crossentropy(y_true, y_pred): return K.categorical_crossentropy(y_true, y_pred)
def amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35): y_pred = y_true * (y_pred - margin) + (1 - y_true) * y_pred y_pred *= scale return k.categorical_crossentropy(y_true, y_pred, from_logits=True)
def amsoftmax_loss(y_true, y_pred, scale=30, margin=0.35): y_pred = y_pred - y_true * margin y_pred = y_pred * scale return K.categorical_crossentropy(y_true, y_pred, from_logits=True)