def _apply(self, scores_pos, scores_neg): """Apply the loss function. Parameters ---------- scores_pos : tf.Tensor, shape [n, 1] A tensor of scores assigned to positive statements. scores_neg : tf.Tensor, shape [n*negative_count, 1] A tensor of scores assigned to negative statements. Returns ------- loss : tf.Tensor The loss value that must be minimized. """ margin = tf.constant(self._loss_parameters['margin'], dtype=tf.float32, name='margin') alpha = tf.constant(self._loss_parameters['alpha'], dtype=tf.float32, name='alpha') # Compute p(neg_samples) based on eq 4 scores_neg_reshaped = tf.reshape(scores_neg, [self._loss_parameters['eta'], tf.shape(scores_pos)[0]]) p_neg = tf.nn.softmax(alpha * scores_neg_reshaped, axis=0) # Compute Loss based on eg 5 loss = tf.reduce_sum(-tf.log_sigmoid(margin - tf.negative(scores_pos))) - tf.reduce_sum( tf.multiply(p_neg, tf.log_sigmoid(tf.negative(scores_neg_reshaped) - margin))) return loss
def build_graph(self): self.h = tf.placeholder(tf.int32, [None]) self.t = tf.placeholder(tf.int32, [None]) self.sign = tf.placeholder(tf.float32, [None]) cur_seed = random.getrandbits(32) self.embeddings = tf.get_variable( name="embeddings" + str(self.order), shape=[self.node_size, self.rep_size], initializer=tf.contrib.layers.xavier_initializer(uniform=False, seed=cur_seed)) self.context_embeddings = tf.get_variable( name="context_embeddings" + str(self.order), shape=[self.node_size, self.rep_size], initializer=tf.contrib.layers.xavier_initializer(uniform=False, seed=cur_seed)) # self.h_e = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.embeddings, self.h), 1) # self.t_e = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.embeddings, self.t), 1) # self.t_e_context = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.context_embeddings, self.t), 1) self.h_e = tf.nn.embedding_lookup(self.embeddings, self.h) self.t_e = tf.nn.embedding_lookup(self.embeddings, self.t) self.t_e_context = tf.nn.embedding_lookup(self.context_embeddings, self.t) self.second_loss = -tf.reduce_mean( tf.log_sigmoid(self.sign * tf.reduce_sum( tf.multiply(self.h_e, self.t_e_context), axis=1))) self.first_loss = -tf.reduce_mean( tf.log_sigmoid(self.sign * tf.reduce_sum( tf.multiply(self.h_e, self.t_e), axis=1))) if self.order == 1: self.loss = self.first_loss else: self.loss = self.second_loss optimizer = tf.train.AdamOptimizer(0.001) self.train_op = optimizer.minimize(self.loss)
def coupling_layer(x, b, name, init=False, backward=False, eps=1e-6): def get_vars(b, x): bx = b * x logit_s = NN(bx, "logit_s", init=init) + 2. s = tf.sigmoid(logit_s) + eps t = NN(bx, "t", init=init) return logit_s, s, t with tf.variable_scope(name, reuse=(not init)): if backward: logit_s, s, t = get_vars(b, x) x = b * x + (1. - b) * ((x / s) - t) x, logdet_an = actnorm(x, name + "_an_in", init, logdet=True, backward=True) logdet = tf.reduce_sum(tf.log_sigmoid(logit_s) * (1. - b), axis=[1, 2, 3]) return x, -logdet + logdet_an else: x, logdet_an = actnorm(x, name + "_an_in", init, logdet=True, backward=False) logit_s, s, t = get_vars(b, x) if not init: tf.summary.histogram("s/" + name, s) tf.summary.histogram("t/" + name, t) x = b * x + (1. - b) * ((x + t) * s) logdet = tf.reduce_sum(tf.log_sigmoid(logit_s) * (1. - b), axis=[1, 2, 3]) return x, logdet + logdet_an
def z_mac_model(xy_var, w_list, z_list): """ computes layerwise and joint losses for z-optimization :param xy_var: in and out variables :param w_list: weights per layer :param z_list: zs per layer :return: """ x_in = xy_var.x losses = [] for layer, z_in, z_tgt, w in zip(range(len(z_list)), [x_in] + z_list[:-1], z_list, w_list[:-1]): with tf.variable_scope('layer_{}'.format(layer), reuse=tf.AUTO_REUSE): z_rec = tf.nn.relu(z_in @ w[:-1, :] + w[-1, :]) loss = tf.reduce_mean(tf.reduce_sum((z_tgt - z_rec)**2, axis=-1)) losses.append(loss) with tf.variable_scope('layer_{}'.format(len(z_list)), reuse=tf.AUTO_REUSE): w, z_in = w_list[-1], z_list[-1] z_rec = z_in @ w[:-1, :] + w[-1, :] if xy_var.y is not None: ce_term = -tf.reduce_sum(xy_var.y * tf.log_sigmoid(z_rec) + (1 - xy_var.y) * tf.log_sigmoid(-z_rec), axis=-1) out_loss = tf.reduce_mean(ce_term) else: out_loss = tf.reduce_mean(tf.reduce_sum((x_in - z_rec)**2, axis=-1)) losses.append(out_loss) opt_loss = tf.reduce_sum(losses) return opt_loss, losses
def kl_divergence(self, param_batch_1, param_batch_2): probs_on = tf.sigmoid(param_batch_1) probs_off = tf.sigmoid(-param_batch_1) log_diff_on = tf.log_sigmoid(param_batch_1) - tf.log_sigmoid( param_batch_2) log_diff_off = tf.log_sigmoid(-param_batch_1) - tf.log_sigmoid( -param_batch_2) kls = probs_on * log_diff_on + probs_off * log_diff_off return tf.reduce_sum(kls, axis=-1)
def __init__(self, params, w=None, c=None): p = ct.obj_dic(params) self.dim = p.dim self.lr = p.learn_rate self.k = p.num_sampled self.optimizer = p.optimizer self.epoch_num = p.epoch_num self.show_num = p.show_num self.size_subgraph = p.size_subgraph self.num_nodes = p.num_nodes self.num_edges = p.num_edges self.batch_size = p.batch_size self.logger = p.log self.tensor_graph = tf.Graph() with self.tensor_graph.as_default(): tf.set_random_seed(random.randint(0, 1e9)) self.w_id = tf.placeholder(tf.int32, shape=[None]) self.c_pos_id = tf.placeholder(tf.int32, shape=[None]) self.c_neg_id = tf.placeholder(tf.int32, shape=[None, self.k]) self.neg_weight = tf.placeholder(tf.float32, shape=[None, self.k]) self.pos_weight = tf.placeholder(tf.float32, shape=[None]) if w is None: self.w = tf.Variable(tf.random_uniform( [self.size_subgraph, self.dim], -1.0 / self.size_subgraph, 1.0 / self.size_subgraph), dtype=tf.float32) else: self.w = tf.Variable(w, dtype=tf.float32) if c is None: self.c = tf.Variable(tf.truncated_normal( [self.size_subgragh, self.embedding_size], -1.0, 1.0), dtype=tf.float32) else: self.c = tf.Variable(c, dtype=tf.float32) self.embed = tf.nn.embedding_lookup(self.w, self.w_id) self.c_pos = tf.nn.embedding_lookup(self.c, self.c_pos_id) self.c_neg = tf.nn.embedding_lookup(self.c, self.c_neg_id) self.pos_dot = tf.reduce_sum(tf.multiply(self.embed, self.c_pos), axis=1) embed_3d = tf.reshape(self.embed, [-1, 1, self.dim]) # dim: batch_size * 1 * k self.neg_dot_pre = tf.matmul(embed_3d, self.c_neg, transpose_b=True) # dim: batch_size * k self.neg_dot = tf.squeeze(self.neg_dot_pre) #self.loss = -tf.reduce_sum(tf.log_sigmoid(self.pos_dot)) - \ # tf.reduce_sum(tf.log_sigmoid(-self.neg_dot)) self.loss = -tf.reduce_mean(tf.multiply(tf.log_sigmoid(self.pos_dot), self.pos_weight)) / self.num_edges - \ tf.reduce_mean(tf.multiply(tf.log_sigmoid(-self.neg_dot), self.neg_weight)) / self.num_nodes / self.num_nodes self.train_step = getattr(tf.train, self.optimizer)( self.lr).minimize(self.loss)
def construct(self, num_nodes: int, embedding_size: int, order: int, learn_rate: float, neg_sample_size: int = None) -> None: with self._session.graph.as_default(): self._source_embeddings = tf.Variable( tf.random_uniform([num_nodes, embedding_size], -1.0, 1.0)) # Placeholders for inputs self._src_edges = tf.placeholder(tf.int32, shape=[None]) self._tar_edges = tf.placeholder(tf.int32, shape=[None]) self._nes_edges = tf.placeholder(tf.int32, shape=[None, neg_sample_size]) # Source embeddings src_embed = tf.nn.embedding_lookup(self._source_embeddings, self._src_edges) # Target embeddings if order == 1: # Same embedding as source tar_embed = tf.nn.embedding_lookup(self._source_embeddings, self._tar_edges) nes_embed = tf.nn.embedding_lookup(self._source_embeddings, self._nes_edges) else: # Separate target embeddings for second-order self._target_embeddings = tf.Variable( tf.random_uniform([num_nodes, embedding_size], -1.0, 1.0)) tar_embed = tf.nn.embedding_lookup(self._target_embeddings, self._tar_edges) nes_embed = tf.nn.embedding_lookup(self._target_embeddings, self._nes_edges) # Loss negative = [] for i in range(neg_sample_size): ns_dot = tf.multiply(nes_embed[:, i], src_embed) ns_dot = tf.reduce_sum(ns_dot, axis=1) ns_sigma = tf.log_sigmoid(-ns_dot) negative.append(ns_sigma) negative = tf.reduce_sum(negative, axis=0) positive = tf.multiply(tar_embed, src_embed) positive = tf.reduce_sum(positive, axis=1) self._loss = -tf.reduce_mean(tf.log_sigmoid(positive) + negative) # We use the SGD optimizer. global_step = tf.train.create_global_step() self._training = tf.train.AdamOptimizer(learn_rate).minimize( self._loss, global_step=global_step) # Initialize variables self._session.run(tf.global_variables_initializer())
def __init__(self, args): self.args = args if args.sampling_strategy == 'negative': self.user_num = args.b self.item_num = args.b * (1 + args.k) elif args.sampling_strategy == 'stratified_sampling': self.user_num = args.b * (1 + args.k) self.item_num = args.b / args.s self.x_u_all = tf.placeholder(name='x_u', dtype=tf.float32, shape=[None, args.user_vector_dim]) # (user_num, user_vector_dim) self.x_v_all = tf.placeholder(name='x_v', dtype=tf.float32, shape=[None, args.seq_max_length, args.word_vector_dim]) # (item_num, seq_max_length, word_vector_dim) self.f_u_all = self.f(self.x_u_all) # (user_num, embedding_dim) self.g_v_all = self.g(self.x_v_all, args.g_func) # (item_num, embedding_dim) self.r_uv_all = tf.matmul(self.f_u_all, self.g_v_all, transpose_b=True) # score function, (user_num, item_num) if args.sampling_strategy == 'negative': L_positive_mean = tf.reduce_sum(tf.log_sigmoid(tf.stack([self.r_uv_all[i, i * (args.k + 1)] for i in range(args.b)]))) L_negative_mean = tf.reduce_sum( tf.reduce_mean(tf.log_sigmoid(-tf.stack([self.r_uv_all[i, i * (args.k + 1) + 1: (i + 1) * (args.k + 1)] for i in range(args.b)])), axis=1) ) elif args.sampling_strategy == 'stratified_sampling': L_positive_mean = tf.reduce_sum( tf.log_sigmoid(tf.stack([self.r_uv_all[i * args.s * (args.k + 1) : i * args.s * (args.k + 1) + args.s, i] for i in range(int(args.b / args.s))]))) L_negative_mean = tf.reduce_sum( tf.reduce_mean(tf.log_sigmoid( -tf.stack([self.r_uv_all[i * args.s * (args.k + 1) + args.s : (i + 1) * args.s * (args.k + 1), i] for i in range(int(args.b / args.s))])) , axis=1) * args.s ) elif args.sampling_strategy == 'negative_sharing': L_positive_mean = tf.reduce_sum( tf.log_sigmoid(tf.stack([self.r_uv_all[i, i] for i in range(args.b)])) ) L_negative_mean = (tf.reduce_sum(tf.log_sigmoid(-self.r_uv_all)) - tf.reduce_sum( tf.log_sigmoid(-tf.stack([self.r_uv_all[i, i] for i in range(args.b)]))) ) / args.b elif args.sampling_strategy == 'SS_with_NS': L_positive_mean = tf.reduce_sum( tf.log_sigmoid(tf.stack([self.r_uv_all[i * args.s: (i + 1) * args.s, i] for i in range(int(args.b / args.s))])) ) L_negative_mean = (tf.reduce_sum(tf.log_sigmoid(-self.r_uv_all)) - tf.reduce_sum( tf.log_sigmoid( -tf.stack([self.r_uv_all[i * args.s: (i + 1) * args.s, i] for i in range(int(args.b / args.s))])) )) / (args.b / args.s) self.loss = -(L_positive_mean + args.Lambda * L_negative_mean) self.optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) self.train_op = self.optimizer.minimize(self.loss)
def log_p_x_given_z(self, x): """ :param x: An (n_samples, n_dims) tensor :return: An (n_samples, n_labels) tensor p_x_given_z where result[n, k] indicates p(X=x[n] | Z=z[k]) """ # D x K alpha = tf.transpose(self.w + self.c) # N x K return tf.matmul(x, tf.log_sigmoid(alpha)) + tf.matmul( (1 - x), tf.log_sigmoid(-alpha))
def _keypoints_loss(self, keypoints, gbbox_yx, gbbox_y, gbbox_x, gbbox_h, gbbox_w, classid, meshgrid_y, meshgrid_x, pshape): sigma = self._gaussian_radius(gbbox_h, gbbox_w, 0.7) gbbox_y = tf.reshape(gbbox_y, [-1, 1, 1]) gbbox_x = tf.reshape(gbbox_x, [-1, 1, 1]) sigma = tf.reshape(sigma, [-1, 1, 1]) num_g = tf.shape(gbbox_y)[0] meshgrid_y = tf.expand_dims(meshgrid_y, 0) meshgrid_y = tf.tile(meshgrid_y, [num_g, 1, 1]) meshgrid_x = tf.expand_dims(meshgrid_x, 0) meshgrid_x = tf.tile(meshgrid_x, [num_g, 1, 1]) keyp_penalty_reduce = tf.exp(-((gbbox_y - meshgrid_y)**2 + (gbbox_x - meshgrid_x)**2) / (2 * sigma**2)) zero_like_keyp = tf.expand_dims(tf.zeros(pshape, dtype=tf.float32), axis=-1) reduction = [] gt_keypoints = [] for i in range(self.num_classes): exist_i = tf.equal(classid, i) reduce_i = tf.boolean_mask(keyp_penalty_reduce, exist_i, axis=0) reduce_i = tf.cond( tf.equal(tf.shape(reduce_i)[0], 0), lambda: zero_like_keyp, lambda: tf.expand_dims(tf.reduce_max(reduce_i, axis=0), axis=-1)) reduction.append(reduce_i) gbbox_yx_i = tf.boolean_mask(gbbox_yx, exist_i) gt_keypoints_i = tf.cond( tf.equal(tf.shape(gbbox_yx_i)[0], 0), lambda: zero_like_keyp, lambda: tf.expand_dims(tf.sparse.to_dense( tf.sparse.SparseTensor(gbbox_yx_i, tf.ones_like( gbbox_yx_i[..., 0], tf.float32), dense_shape=pshape), validate_indices=False), axis=-1)) gt_keypoints.append(gt_keypoints_i) reduction = tf.concat(reduction, axis=-1) gt_keypoints = tf.concat(gt_keypoints, axis=-1) keypoints_pos_loss = -tf.pow( 1. - tf.sigmoid(keypoints), 2.) * tf.log_sigmoid(keypoints) * gt_keypoints keypoints_neg_loss = -tf.pow(1. - reduction, 4) * tf.pow( tf.sigmoid(keypoints), 2.) * ( -keypoints + tf.log_sigmoid(keypoints)) * (1. - gt_keypoints) num_g = tf.maximum(num_g, tf.ones_like(num_g, dtype=tf.int32)) keypoints_loss = tf.reduce_sum(keypoints_pos_loss) / tf.cast( num_g, tf.float32) + tf.reduce_sum(keypoints_neg_loss) / tf.cast( num_g, tf.float32) return keypoints_loss
def _log_prob(self, data, num_samples=1): """Assumes data is [batch_size] + data_dim.""" batch_size = tf.shape(data)[0] log_Z = tf.log_sigmoid(self.logit_Z) # pylint: disable=invalid-name log_1mZ = -self.logit_Z + log_Z # pylint: disable=invalid-name # [B] data_log_accept = tf.squeeze(tf.log_sigmoid( self.logit_accept_fn(data)), axis=-1) truncated_geometric_log_probs = tf.range(self.T - 1, dtype=self.dtype) * log_1mZ # [B, T-1] truncated_geometric_log_probs = ( truncated_geometric_log_probs[None, :] + data_log_accept[:, None]) # [B, T] truncated_geometric_log_probs = tf.concat([ truncated_geometric_log_probs, tf.tile((self.T - 1) * log_1mZ[None, None], [batch_size, 1]) ], axis=-1) truncated_geometric_log_probs -= tf.reduce_logsumexp( truncated_geometric_log_probs, axis=-1, keepdims=True) # [B] entropy = -tf.reduce_sum(tf.exp(truncated_geometric_log_probs) * truncated_geometric_log_probs, axis=-1) proposal_samples = self.proposal.sample([self.T]) # [T] + data_dim proposal_logit_accept = self.logit_accept_fn(proposal_samples) proposal_log_reject = tf.reduce_mean( -proposal_logit_accept + tf.log_sigmoid(proposal_logit_accept)) # [B] noise_term = tf.reduce_sum( tf.exp(truncated_geometric_log_probs) * tf.range(self.T, dtype=self.dtype)[None, :] * proposal_log_reject, axis=-1) try: # Try giving the proposal lower bound num_samples if it can use it. log_prob_proposal = self.proposal.log_prob(data, num_samples=num_samples) except TypeError: log_prob_proposal = self.proposal.log_prob(data) elbo = log_prob_proposal + data_log_accept + noise_term + entropy return elbo
def build_graph(self): inputs = util.build_inputs(self._observation_space, self._action_space, scale=self._scale) self._obs_ph, self._act_ph, self._next_obs_ph = inputs[:3] self.obs_input, self.act_input, _ = inputs[3:] with tf.variable_scope("discrim_network"): self._disc_mlp, self._disc_logits_gen_is_high = self._build_discrim_net( self.obs_input, self.act_input, **self._build_discrim_net_kwargs) # Get test and train reward based on type if self.reward_type == 'positive': self._policy_test_reward = self._policy_train_reward \ = -tf.log_sigmoid(self._disc_logits_gen_is_high) elif self.reward_type == 'negative': self._policy_test_reward = self._policy_train_reward \ = tf.log_sigmoid(-self._disc_logits_gen_is_high) elif self.reward_type == 'wgan': self._policy_test_reward = self._policy_train_reward \ = -self._disc_logits_gen_is_high / self.wgan_clip elif self.reward_type == 'neutral_b': self._policy_test_reward = self._policy_train_reward \ = 2*tf.sigmoid(-self._disc_logits_gen_is_high) - 1 elif self.reward_type == 'neutral': self._policy_test_reward = self._policy_train_reward \ = -self._disc_logits_gen_is_high elif self.reward_type == 'neutral_w': # weighted neutral reward = w * positive + (1 - w) * negative self._policy_test_reward = self._policy_train_reward \ = -self.wgan_clip * tf.log_sigmoid(self._disc_logits_gen_is_high) + \ (1 - self.wgan_clip) * tf.log_sigmoid(-self._disc_logits_gen_is_high) else: raise NotImplementedError # Get loss function if self.reward_type != 'wgan': self._disc_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=self._disc_logits_gen_is_high, labels=tf.cast(self.labels_gen_is_one_ph, tf.float32), ) else: # gen * 1 + expert * -1 -----> will minimize generator logits # reward will be logits_gen_is_high self._disc_loss = ( 1 - 2 * tf.cast(self.labels_gen_is_one_ph, tf.float32) ) * self._disc_logits_gen_is_high / self.wgan_clip
def add_generator_loss(self, fake_vals, outputs, labels, fake_rewards=None, classes=None, class_labels=None): loss = tf.losses.mean_squared_error(labels, outputs) if not classes is None and not class_labels is None: class_labels = tf.cast(class_labels, tf.int32) class_labels = tf.one_hot(class_labels, self.num_class) print("class", class_labels.get_shape()) classes = tf.reshape(classes, (self.batch_size, self.decoder_length, self.districts, self.num_class)) class_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=class_labels, logits=classes) loss += class_loss if not fake_rewards is None: print("Using reinsforcement learning") advatages = tf.abs(fake_rewards) loss = tf.reduce_mean(tf.multiply(loss, tf.stop_gradient(advatages))) else: print("Using combined loss function") if self.alpha: sigmoid_loss = self.alpha * tf.log_sigmoid(fake_vals) # sigmoid_loss = self.alpha * tf.losses.sigmoid_cross_entropy(fake_vals, tf.constant(1., shape=[self.batch_size, self.decoder_length])) # normal lossmse + (-log(D(G))) loss = loss - sigmoid_loss #loss_values = sigmoid_loss loss = tf.reduce_mean(loss) else: loss = tf.reduce_mean(loss) for v in tf.trainable_variables(): if not 'bias' in v.name.lower(): loss += 0.0001 * tf.nn.l2_loss(v) return loss
def __init__(self, num_user, num_item, num_factor, reg_rate, lr): print("num_factor:", num_factor, "regularization_rate:", reg_rate, "learning_rate:", lr) print("model preparing...") self.num_user = num_user self.num_item = num_item self.num_factor = num_factor self.reg_rate = reg_rate self.lr = lr self.u = tf.placeholder(tf.int32, [None], name="uid") self.i = tf.placeholder(tf.int32, [None], name="iid") self.j = tf.placeholder(tf.int32, [None], name="jid") self.W_u = tf.Variable(tf.random_normal([self.num_user, self.num_factor], stddev=0.01), name="W_u") self.W_i = tf.Variable(tf.random_normal([self.num_item, self.num_factor], stddev=0.01), name="W_i") self.u_emb = tf.nn.embedding_lookup(self.W_u, self.u) self.i_emb = tf.nn.embedding_lookup(self.W_i, self.i) self.j_emb = tf.nn.embedding_lookup(self.W_i, self.j) self.r_hat_ui = tf.reduce_sum(self.u_emb * self.i_emb, 1, True) self.r_hat_uj = tf.reduce_sum(self.u_emb * self.j_emb, 1, True) self.bpr_loss = -tf.reduce_mean(tf.log_sigmoid(self.r_hat_ui - self.r_hat_uj)) self.regularization = tf.nn.l2_loss(self.W_u) + tf.nn.l2_loss(self.W_i) self.loss = self.bpr_loss + self.reg_rate * self.regularization self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss) print("model prepared...")
def classifier(config, pooled_output, num_labels, labels, dropout_prob, ratio_weight=None, **kargs): output_layer = pooled_output hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer()) output_layer = tf.nn.dropout(output_layer, keep_prob=1 - dropout_prob) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) if config.get("label_type", "single_label") == "single_label": if config.get("loss", "entropy") == "entropy": print("==standard cross entropy==") per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(labels)) elif config.get("loss", "entropy") == "focal_loss": print("==multi_label focal loss==") per_example_loss, _ = loss_utils.focal_loss_multi_v1(config, logits=logits, labels=labels) try: per_example_loss = loss_utils.weighted_loss_ratio( config, per_example_loss, labels, ratio_weight) loss = tf.reduce_sum(per_example_loss) print(" == applying weighted loss == ") except: loss = tf.reduce_mean(per_example_loss) if config.get("with_center_loss", "no") == "center_loss": print("==apply with center loss==") center_loss, _ = loss_utils.center_loss_v2(config, features=pooled_output, labels=labels) loss += center_loss * config.get("center_loss_coef", 1e-3) return (loss, per_example_loss, logits) elif config.get("label_type", "single_label") == "multi_label": logits = tf.log_sigmoid(logits) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(labels)) per_example_loss = tf.reduce_sum(per_example_loss, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits) else: raise NotImplementedError()
def logistic_logcdf(*, x, mean, logscale): """ log cdf of logistic distribution this operates elementwise """ z = (x - mean) * tf.exp(-logscale) return tf.log_sigmoid(z)
def forward(self, tensors, mode: str = None): """Forward method of the layer Parameters ---------- tensors : Tuple[tf.Tensor] - positives : shape = (batch, num_events) - negatives : shape = (batch, num_events, num_negatives) - mask : shape = (batch, num_events, num_negatives) - weights : shape = (batch, num_events) Returns ------- tf.Tensor BPR loss """ positives, negatives, mask, weights = tensors positives, negatives = make_same_shape([positives, negatives], broadcast=False) # One score per negative : (batch, num_events, num_negatives) scores = -tf.log_sigmoid(positives - negatives) # One loss per event, average of scores : (batch, num_events) event_scores = WeightedAverage()((scores, tf.to_float(mask))) # Each event contributes according to its weight event_weights = weights * tf.to_float(tf.reduce_any(mask, axis=-1)) event_losses = event_scores * event_weights return tf.div_no_nan(tf.reduce_sum(event_losses), tf.reduce_sum(event_weights))
def focal_loss_alt(x, y, num_classes): """Focal loss alternative. Args: x: (tensor) sized [N, D] y: (tensor) sized [N,] num_classes: numbers of classes Return: (tensor) focal loss. """ alpha = 0.25 y = tf.cast(y, tf.int32) t = tf.one_hot(y, depth=num_classes + 1) # [N, #total_cls] t = t[:, 1:] xt = x * (2 * t - 1) # xt = x if t > 0 else -x pt = tf.log_sigmoid(2 * xt + 1) w = alpha * t + (1 - alpha) * (1 - t) loss = -w * pt / 2 loss = tf.reduce_sum(loss) positive_index = tf.where(y > 0) num_case = tf.cast( tf.shape(positive_index, out_type=tf.int32)[0], tf.float32) num_case = tf.maximum(num_case, 1.0) loss = tf.reduce_sum(loss) / num_case return loss #/num_case
def pairwise_binary_logsigmoid( labels, predictions, weights=1.0, scope=None, loss_collection=ops.GraphKeys.LOSSES, #reduction=Reduction.SUM_BY_NONZERO_WEIGHTS ): with ops.name_scope(scope, "absolute_difference", (predictions, labels, weights)) as scope: mask_pos = tf.equal(labels, 1) mask_neg = tf.equal(labels, 0) yhat_pos = tf.boolean_mask(predictions, mask_pos) yhat_neg = tf.boolean_mask(predictions, mask_neg) yhat_diff = (tf.reshape(yhat_pos, (-1, 1)) - tf.reshape(yhat_neg, (1, -1))) losses = tf.log_sigmoid(-yhat_diff) print("losses", losses.dtype) loss = tf.reduce_sum(losses) #util.add_loss(loss, loss_collection) return loss """
def _optimize_line(self): """ Unsupervised traininig in LINE manner. """ self.u_i = tf.placeholder(name='u_i', dtype=tf.int32, shape=[self.sample_num]) self.u_j = tf.placeholder(name='u_j', dtype=tf.int32, shape=[self.sample_num]) self.label = tf.placeholder(name='label', dtype=tf.float64, shape=[self.sample_num]) self.u_i_embedding = tf.matmul( tf.one_hot(self.u_i, depth=self.node_num, dtype=tf.float64), self.embed) self.u_j_embedding = tf.matmul( tf.one_hot(self.u_j, depth=self.node_num, dtype=tf.float64), self.embed) self.inner_product = tf.reduce_sum(self.u_i_embedding * self.u_j_embedding, axis=1) self.loss = -tf.reduce_mean( tf.log_sigmoid(self.label * self.inner_product)) self.line_optimizer = tf.train.AdamOptimizer( self.learning_rate).minimize(self.loss)
def _apply(self, y_true, y_pred): """ Apply the loss function. Parameters ---------- y_true : tf.Tensor A tensor of true values. y_pred : tf.Tensor A tensor of predicted values. Returns ------- loss : float The loss value that must be minimized. """ if self._loss_parameters['label_smoothing'] is not None: y_true = tf.add((1 - self._loss_parameters['label_smoothing']) * y_true, (self._loss_parameters['label_smoothing']) / self._loss_parameters['num_entities']) if self._loss_parameters['label_weighting']: eps = 1e-6 wt = tf.reduce_mean(y_true) loss = -tf.reduce_sum((1 - wt) * y_true * tf.log_sigmoid(y_pred) + wt * (1 - y_true) * tf.log(1 - tf.sigmoid(y_pred) + eps)) else: loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true, logits=y_pred)) return loss
def __init__(self, args): tf.set_random_seed(seed) np.random.seed(seed) self.X = tf.SparseTensor(*sparse_feeder(args.X)) self.N, self.D = args.X.shape self.L = args.embedding_dim self.n_hidden = [512] self.u_i = tf.placeholder(name='u_i', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)]) self.u_j = tf.placeholder(name='u_j', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)]) self.label = tf.placeholder(name='label', dtype=tf.float32, shape=[args.batch_size * (args.K + 1)]) self.__create_model(args.proximity) if not args.is_all: self.val_edges = args.val_edges self.val_ground_truth = args.val_ground_truth self.neg_val_energy = -self.energy_kl( self.val_edges[:, 0], self.val_edges[:, 1], args.proximity) self.val_set = True else: self.val_set = False # softmax loss self.energy = -self.energy_kl(self.u_i, self.u_j, args.proximity) self.loss = -tf.reduce_mean(tf.log_sigmoid(self.label * self.energy)) self.optimizer = tf.train.AdamOptimizer( learning_rate=args.learning_rate) self.train_op = self.optimizer.minimize(self.loss)
def structural_loss_inner(self, u_i, u_j, label): embedding = tf.concat(self.embedding, axis=0) u_i_embedding = tf.gather(embedding, u_i) u_j_embedding = tf.gather(embedding, u_j) inner_product = tf.reduce_sum(u_i_embedding * u_j_embedding, axis=1) loss = -tf.reduce_mean(tf.log_sigmoid(label * inner_product)) return loss
def batch_all_triplet_loss(sparse_input, input_label, encode, pos_triplets_only=False): """Build the triplet loss over a batch of embeddings. We generate all the valid triplets and average the loss over the positive ones. Args: input_label: labels of the batch, of size (batch_size,) encode: tensor of shape (batch_size, embed_dim) Returns: triplet_loss: scalar tensor containing the triplet loss """ # Get the dot product dotproduct = tf.matmul(encode, tf.transpose(encode)) # shape (batch_size, batch_size, 1) anchor_positive_dotproduct = tf.expand_dims(dotproduct, 2) assert anchor_positive_dotproduct.shape[2] == 1 # shape (batch_size, 1, batch_size) anchor_negative_dotproduct = tf.expand_dims(dotproduct, 1) assert anchor_negative_dotproduct.shape[1] == 1 # Compute a 3D tensor of size (batch_size, batch_size, batch_size) # triplet_loss[i, j, k] will contain the triplet loss of anchor=i, positive=j, negative=k # Uses broadcasting where the 1st argument has shape (batch_size, batch_size, 1) # and the 2nd (batch_size, 1, batch_size) triplet_distance = -anchor_positive_dotproduct + anchor_negative_dotproduct # Put to zero the invalid triplets # (where label(a) != label(p) or label(n) == label(a) or a == p) valid_triplet_mask = tf.to_float(_get_triplet_mask(input_label)) num_valid_triplets = tf.reduce_sum(valid_triplet_mask) # Count number of positive triplets (where triplet_distance > 0) pos_valid_triplet_mask = tf.to_float( tf.greater(tf.multiply(valid_triplet_mask, triplet_distance), 1e-16)) num_pos_valid_triplets = tf.reduce_sum(pos_valid_triplet_mask) # Set final mask if pos_triplets_only: mask = pos_valid_triplet_mask num_triplet = num_pos_valid_triplets else: mask = valid_triplet_mask num_triplet = num_valid_triplets # Get final mean triplet loss over the (positive) valid triplets triplet_loss = -tf.log_sigmoid(-triplet_distance) * mask triplet_loss = tf.reduce_sum(triplet_loss) / (num_triplet + 1e-16) data_weight = tf.reduce_sum(mask, [1, 2]) + tf.reduce_sum( mask, [0, 1]) + tf.reduce_sum(mask, [0, 2]) return triplet_loss, data_weight, num_pos_valid_triplets / ( num_valid_triplets + 1e-16), num_pos_valid_triplets
def get_vars(self, feats, reuse, eps=1e-6): logit_s = self.NN(feats, "logit_s", reuse) + 2. s = tf.sigmoid(logit_s) + eps t = self.NN(feats, "t", reuse) logdet = tf.reduce_sum(tf.log_sigmoid(logit_s), axis=[1, 2, 3]) + tf.cast( tf.log(eps), feats.dtype) return s, t, logdet
def KerasFocalLoss(target, input): gamma = 2. input = tf.cast(input, tf.float32) max_val = K.clip(-input, 0, 1) loss = input - input * target + max_val + K.log(K.exp(-max_val) + K.exp(-input - max_val)) invprobs = tf.log_sigmoid(-input * (target * 2.0 - 1.0)) loss = K.exp(invprobs * gamma) * loss return K.mean(K.sum(loss, axis=1))
def gan_loss(x, gz, discriminator): """Original GAN loss. Args: x: Batch of real samples. gz: Batch of generated samples. discriminator: Discriminator function. Returns: d_loss: Discriminator loss. g_loss: Generator loss. """ dx = discriminator(x) with tf.variable_scope(tf.get_variable_scope(), reuse=True): dgz = discriminator(gz) d_loss = -tf.reduce_mean(tf.log_sigmoid(dx) + tf.log_sigmoid(-dgz)) g_loss = -tf.reduce_mean(tf.log_sigmoid(dgz)) return d_loss, g_loss
def sigmoid_focal_loss(labels, logits, gamma=2.): loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) invprobs = tf.log_sigmoid(-logits * (labels * 2 - 1)) w = tf.stop_gradient(tf.exp(invprobs * gamma)) loss = w * loss return loss
def compute_loss(pred, prob, weighted): if weighted: importance = tf.nn.softmax(tf.negative(pred) - tf.log(prob)) else: importance = tf.nn.softmax(tf.ones_like(pred)) weight_loss = tf.multiply(importance, tf.negative(tf.log_sigmoid(pred))) loss = tf.reduce_sum(weight_loss, -1, keepdims=True) return loss
def log_loss(yij, epsilon=1e-7, name="log_loss"): """ bpr loss :param yij: :param epsilon: A small increment to add to avoid taking a log of zero. :param name: :return: """ with tf.name_scope(name): return -tf.log_sigmoid(yij + epsilon)
def __init__(self, args): self.u_i = tf.placeholder(name='u_i', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)]) self.u_j = tf.placeholder(name='u_j', dtype=tf.int32, shape=[args.batch_size * (args.K + 1)]) self.label = tf.placeholder(name='label', dtype=tf.float32, shape=[args.batch_size * (args.K + 1)]) self.embedding = tf.get_variable('target_embedding', [args.num_of_nodes, args.embedding_dim], initializer=tf.random_uniform_initializer(minval=-1., maxval=1.)) self.u_i_embedding = tf.matmul(tf.one_hot(self.u_i, depth=args.num_of_nodes), self.embedding) if args.proximity == 'first-order': self.u_j_embedding = tf.matmul(tf.one_hot(self.u_j, depth=args.num_of_nodes), self.embedding) elif args.proximity == 'second-order': self.context_embedding = tf.get_variable('context_embedding', [args.num_of_nodes, args.embedding_dim], initializer=tf.random_uniform_initializer(minval=-1., maxval=1.)) self.u_j_embedding = tf.matmul(tf.one_hot(self.u_j, depth=args.num_of_nodes), self.context_embedding) self.inner_product = tf.reduce_sum(self.u_i_embedding * self.u_j_embedding, axis=1) self.loss = -tf.reduce_mean(tf.log_sigmoid(self.label * self.inner_product)) self.learning_rate = tf.placeholder(name='learning_rate', dtype=tf.float32) # self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate) self.optimizer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate) self.train_op = self.optimizer.minimize(self.loss)
def discriminator(encodings, sequence_lengths, lang_ids, num_layers=3, hidden_size=1024, dropout=0.3): """Discriminates the encoder outputs against lang_ids. Args: encodings: The encoder outputs of shape [batch_size, max_time, hidden_size]. sequence_lengths: The length of each sequence of shape [batch_size]. lang_ids: The true lang id of each sequence of shape [batch_size]. num_layers: The number of layers of the discriminator. hidden_size: The hidden size of the discriminator. dropout: The dropout to apply on each discriminator layer output. Returns: A tuple with: the discriminator loss (L_d) and the adversarial loss (L_adv). """ x = encodings for _ in range(num_layers): x = tf.nn.dropout(x, 1.0 - dropout) x = tf.layers.dense(x, hidden_size, activation=tf.nn.leaky_relu) x = tf.nn.dropout(x, 1.0 - dropout) y = tf.layers.dense(x, 1) mask = tf.sequence_mask( sequence_lengths, maxlen=tf.shape(encodings)[1], dtype=tf.float32) mask = tf.expand_dims(mask, -1) y = tf.log_sigmoid(y) * mask y = tf.reduce_sum(y, axis=1) y = tf.exp(y) l_d = binary_cross_entropy(y, lang_ids, smoothing=0.1) l_adv = binary_cross_entropy(y, 1 - lang_ids) return l_d, l_adv
def _log_unnormalized_prob(self, x): if self.validate_args: x = distribution_util.embed_check_nonnegative_integer_form(x) return (self.total_count * tf.log_sigmoid(-self.logits) + x * tf.log_sigmoid(self.logits))