def loss_kldiv(y_in,x): """ mass sculpting penlaty term usking kullback_leibler_divergence y_in: truth [h, y] x: predicted NN output for y h: the truth mass histogram vector "one-hot encoded" (length NBINS=40) y: the truth categorical labels "one-hot encoded" (length NClasses=2) """ h = y_in[:,0:NBINS] y = y_in[:,NBINS:NBINS+2] h_all = K.dot(K.transpose(h), y) h_all_q = h_all[:,0] h_all_h = h_all[:,1] h_all_q = h_all_q / K.sum(h_all_q,axis=0) h_all_h = h_all_h / K.sum(h_all_h,axis=0) h_btag_anti_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x)) h_btag_anti_h = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x)) h_btag_q = h_btag_anti_q[:,1] h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0) h_anti_q = h_btag_anti_q[:,0] h_anti_q = h_anti_q / K.sum(h_anti_q,axis=0) h_btag_h = h_btag_anti_h[:,1] h_btag_h = h_btag_h / K.sum(h_btag_h,axis=0) h_anti_h = h_btag_anti_q[:,0] h_anti_h = h_anti_h / K.sum(h_anti_h,axis=0) return categorical_crossentropy(y, x) + \ LAMBDA*kullback_leibler_divergence(h_btag_q, h_anti_q) + \ LAMBDA*kullback_leibler_divergence(h_btag_h, h_anti_h)
def JSD(p, q): """ Jensen-Shannon divergence: A smoothed and symmetric version of the KL divergence. """ m = 0.5 * (p + q) return 0.5 * losses.kullback_leibler_divergence( p, m) + 0.5 * losses.kullback_leibler_divergence(q, m)
def loss_kldiv(y_in, x): """ mass sculpting penlaty term using kullback_leibler_divergence y_in: truth [h, y] x: predicted NN output for y h: the truth mass histogram vector "one-hot encoded" (length NBINS=40) y: the truth categorical labels "one-hot encoded" (length NClasses=2) """ h = y_in[:, 0:NBINS] y = y_in[:, NBINS:NBINS + 2] # build mass histogram for true q events weighted by q, b prob h_alltag_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 0]), x)) # build mass histogram for true b events weighted by q, b prob h_alltag_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 1]), x)) # select mass histogram for true q events weighted by q prob; normalize h_qtag_q = h_alltag_q[:, 0] h_qtag_q = h_qtag_q / K.sum(h_qtag_q, axis=0) # select mass histogram for true q events weighted by b prob; normalize h_btag_q = h_alltag_q[:, 1] h_btag_q = h_btag_q / K.sum(h_btag_q, axis=0) # select mass histogram for true b events weighted by q prob; normalize h_qtag_b = h_alltag_b[:, 0] h_qtag_b = h_qtag_b / K.sum(h_qtag_b, axis=0) # select mass histogram for true b events weighted by b prob; normalize h_btag_b = h_alltag_b[:, 1] h_btag_b = h_btag_b / K.sum(h_btag_b, axis=0) # compute KL divergence between true q events weighted by b vs q prob (symmetrize?) # compute KL divergence between true b events weighted by b vs q prob (symmetrize?) return categorical_crossentropy(y, x) + \ LAMBDA_ADV*kullback_leibler_divergence(h_btag_q, h_qtag_q) + \ LAMBDA_ADV*kullback_leibler_divergence(h_btag_b, h_qtag_b)
def fit_sinc(sampler, stepsize, data_seed, num_training_datapoints=20): x_train = init_random_uniform(np.zeros(1), np.ones(1), num_points=num_training_datapoints, rng=np.random.RandomState(seed=data_seed)) y_train = sinc(x_train) x_test = np.linspace(0, 1, 100)[:, None] y_test = sinc(x_test) if sampler == "SGHMC": model = Robo_BNN(sampling_method=SAMPLERS[sampler], l_rate=stepsize) else: from keras.losses import cosine_proximity, kullback_leibler_divergence, binary_crossentropy model = BayesianNeuralNetwork( optimizer=SAMPLERS[sampler], learning_rate=stepsize, hyperloss=lambda y_true, y_pred: kullback_leibler_divergence( y_true=y_true, y_pred=y_pred[:, 0])) model.train(x_train, y_train) prediction_mean, prediction_variance = model.predict(x_test) prediction_std = np.sqrt(prediction_variance) return { "prediction_mean": prediction_mean.tolist(), "prediction_std": prediction_std.tolist(), "x_train": x_train.tolist(), "y_train": y_train.tolist(), "x_test": x_test.tolist(), "y_test": y_test.tolist() }
def augmented_loss(self, y_true, y_pred): _y_pred = Activation("softmax")(y_pred) loss = K.categorical_crossentropy(_y_pred, y_true) # y is (batch x seq x vocab) y_indexes = K.argmax(y_true, axis=2) # turn one hot to index. (batch x seq) y_vectors = self.embedding( y_indexes) # lookup the vector (batch x seq x vector_length) #v_length = self.setting.vector_length #y_vectors = K.reshape(y_vectors, (-1, v_length)) #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors) #y_t = K.squeeze(y_t, axis=2) # unknown but necessary operation #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # vector x embedding dot products (batch x seq x vocab) y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1) y_t = K.reshape( y_t, (-1, self.sequence_size, self.vocab_size)) # explicitly set shape y_t = K.softmax(y_t / self.temperature) _y_pred_t = Activation("softmax")(y_pred / self.temperature) aug_loss = kullback_leibler_divergence(y_t, _y_pred_t) loss += (self.gamma * self.temperature) * aug_loss return loss
def loss(y_true, y_pred): KLD = kullback_leibler_divergence(q_c, y_pred) cross_entropy = categorical_crossentropy(y_true, y_pred) F_diff_squared = K.sum(K.square(F_i - F_c)) loss_value = ( 1 - alpha) * cross_entropy + alpha * KLD + beta * F_diff_squared return loss_value
def loss(yT, yP): ''' yT, yP: (None, pix_num, 3) ''' yP_flat = tf.reshape(yP, (-1, n_clusters)) p = tf_target_distribution(yP_flat) return tf.reduce_mean(kullback_leibler_divergence(yP_flat, p))
def customized_loss(self, y_true, y_pred, alpha=0.0001, beta=3): """ linear combination of MSE and KL divergence. """ loss1 = losses.mean_absolute_error(y_true, y_pred) loss2 = losses.kullback_leibler_divergence(y_true, y_pred) #(alpha/2) * return loss1 + beta * loss2
def customLoss(yTrue, yPred): img_loss = kullback_leibler_divergence(K.reshape(yTrue, [-1])/K.sum(yTrue), K.reshape(yPred, [-1])/K.sum(yPred)) sobel_loss, mask = sobelLoss(yTrue, yPred) BCE = binary_crossentropy(yTrue, yPred) masked_loss = K.mean((K.exp(K.sum(mask, axis = 3))*K.square(yTrue-yPred))) #[16,62,62,2] vs. [16,64,64,1] reg_loss = sobelNorm(model.layers[1].locnet.output) # why does this term gives zeros? Do not use it alone... return img_loss + sobel_loss + 0.3*BCE
def loss(y_true, y_pred): #y_true = K.clip(y_true,K.epsilon(),1) #y_pred = K.clip(y_pred,K.epsilon(),1) #rt = K.mean((K.softmax(old_q)/K.softmax(y_pred))) #c = K.clip(rt,0.8,1.2) q_true = y_true q_pred = y_pred return mean_squared_error(q_true, q_pred) + K.exp( kullback_leibler_divergence(old_q, q_pred))
def loss_kldiv5(y_in, x_in): h = y_in[:, 0:NBINS] y = y_in[:, NBINS:NBINS + 2] x = x_in[:, NBINS:NBINS + 2] h_blike_slike_s = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 0]), x)) h_blike_slike_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 1]), x)) h_blike_s = h_blike_slike_s[:, 1] h_blike_s = h_blike_s / K.sum(h_blike_s, axis=0) h_slike_s = h_blike_slike_s[:, 0] h_slike_s = h_slike_s / K.sum(h_slike_s, axis=0) h_blike_b = h_blike_slike_b[:, 1] h_blike_b = h_blike_b / K.sum(h_blike_b, axis=0) h_slike_b = h_blike_slike_s[:, 0] h_slike_b = h_slike_b / K.sum(h_slike_b, axis=0) return categorical_crossentropy(y, x) + \ 0.5*kullback_leibler_divergence(h_blike_s, h_slike_s) + \ 0.5*kullback_leibler_divergence(h_blike_b, h_slike_b)
def mass_jsdiv_q(y_in,x): """ KL divergence term for anti-tag events (QCD) to be used with custom loss_kldiv """ h = y_in[:,0:NBINS] y = y_in[:,NBINS:NBINS+2] # build mass histogram for true q events weighted by q, b prob h_alltag_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x)) # select mass histogram for true q events weighted by q prob; normalize h_qtag_q = h_alltag_q[:,0] h_qtag_q = h_qtag_q / K.sum(h_qtag_q,axis=0) # select mass histogram for true q events weighted by b prob; normalize h_btag_q = h_alltag_q[:,1] h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0) h_aver_q = 0.5*h_btag_q+0.5*h_qtag_q return 0.5*kullback_leibler_divergence(h_btag_q, h_aver_q) + 0.5*kullback_leibler_divergence(h_qtag_q, h_aver_q)
def customized_loss(y_true, y_pred, alpha=0.0001, beta=3): """ Create a customized loss for the stacked AE. Linear combination of MSE and KL divergence. """ #customize your own loss components loss1 = losses.mean_absolute_error(y_true, y_pred) loss2 = losses.kullback_leibler_divergence(y_true, y_pred) #adjust the weight between loss components return (alpha / 2) * loss1 + beta * loss2
def customLoss(yTrue, yPred): # norm_T = K.pow(K.sum(K.square(yTrue)), 0.5) # norm_P = K.pow(K.sum(K.square(yPred)), 0.5) # img_loss = kullback_leibler_divergence(K.reshape(yTrue, [-1])/K.sum(yTrue), # K.reshape(yPred, [-1])/K.sum(yPred)) img_loss = kullback_leibler_divergence(K.softmax(K.reshape(yTrue, [-1])), K.softmax(K.reshape(yPred, [-1]))) # sobel_loss = sobelLoss(yTrue, yPred) BCE = binary_crossentropy(yTrue, yPred) # return img_loss return img_loss + 0.3 * BCE
def __call__(self, x): loss = 0. marginalized_vars = [] for i, size in enumerate(self.sizes): marginalized = K.sum(tf.gather(x, self.idx[i], axis=-1), axis=-1) # marginalized = marginalized / K.sum(marginalized) # this should not be needed marginalized_vars.append(marginalized) products = K.stack([ a for a in combine(marginalized_vars, self.sizes, lambda x1, x2: x1 * x2) ]) loss += self.weight * kullback_leibler_divergence(x, products) return loss
def fastbert(teacher, classifier, speed=speed): inputs = teacher.inputs # frozen layers for layer in teacher.model.layers: layer.trainable = False classifier.trainable = False x_pre = teacher.apply_embeddings(inputs) emb_name = 'FastBert-embedding' clf_pre = teacher.apply(x_pre, FastbertClassifierLayer, name=emb_name, labels_num=num_classes) student_outputs = [clf_pre] outputs = [clf_pre, x_pre] for idx in range(teacher.num_hidden_layers): clf_pre, x_pre = outputs name = 'FastBert-%d' % idx x_next = teacher.apply_attention_layers(x_pre, idx) clf_next = teacher.apply(x_pre, FastbertClassifierLayer, name=name, labels_num=num_classes) student_outputs.append(clf_next) x = SwitchTwo(speed)([clf_pre, x_pre, x_next]) clf = SwitchTwo(speed)([clf_pre, clf_pre, clf_next]) outputs = [clf, x] clf_prob, x = outputs x = classifier(x) output = SwitchTwo(speed)([clf_prob, clf_prob, x]) model_infer = Model(inputs, output) label_inputs = Input(shape=(None, )) model_train = Model(inputs + [label_inputs], student_outputs) for i, prob in enumerate(student_outputs): ce_loss = K.sparse_categorical_crossentropy(label_inputs, prob) kl_loss = kullback_leibler_divergence(x, prob) model_train.add_loss(ce_loss) model_train.add_metric(ce_loss, name='ce_loss-%d' % i) model_train.add_loss(kl_loss) model_train.add_metric(kl_loss, name='loss-%d' % i) model_1 = Model(inputs, student_outputs[1]) model_2 = Model(inputs, student_outputs[2]) return model_train, model_infer, model_1, model_2
def loss_kldiv(y_in,x): # h is the histogram vector "one hot encoded" (40 bins in this case), techically part of the "truth" y h = y_in[:,0:NBINS] y = y_in[:,NBINS:] h_all = K.dot(K.transpose(h), y) h_all_q = h_all[:,0] h_all_h = h_all[:,1] h_all_q = h_all_q / K.sum(h_all_q,axis=0) h_all_h = h_all_h / K.sum(h_all_h,axis=0) h_btag_anti_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x)) h_btag_anti_h = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x)) h_btag_q = h_btag_anti_q[:,1] h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0) h_anti_q = h_btag_anti_q[:,0] h_anti_q = h_anti_q / K.sum(h_anti_q,axis=0) h_btag_h = h_btag_anti_h[:,1] h_btag_h = h_btag_h / K.sum(h_btag_h,axis=0) h_anti_h = h_btag_anti_q[:,0] h_anti_h = h_anti_h / K.sum(h_anti_h,axis=0) return categorical_crossentropy(y, x) + \ kullback_leibler_divergence(h_btag_q, h_anti_q) + \ kullback_leibler_divergence(h_btag_h, h_anti_h)
def __init__(self, model, epsilon, k, a, random_start, loss_func): """Attack parameter initialization. The attack performs k steps of size a, while always staying within epsilon from the initial point.""" self.model = model self.epsilon = epsilon self.k = k self.a = a self.rand = random_start self.x_nat_prob = tf.placeholder(tf.float32, shape=[None, 10]) loss = tf.reduce_sum( kullback_leibler_divergence(self.x_nat_prob, model.prob)) self.grad = tf.gradients(loss, model.x_input)[0]
def mass_kldiv_h(y_in,x): """ KL divergence term for tag events (H) to be used with custom loss_kldiv """ h = y_in[:,0:NBINS] y = y_in[:,NBINS:NBINS+2] # build mass histogram for true b events weighted by q, b prob h_alltag_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x)) # select mass histogram for true b events weighted by q prob; normalize h_qtag_b = h_alltag_b[:,0] h_qtag_b = h_qtag_b / K.sum(h_qtag_b,axis=0) # select mass histogram for true b events weighted by b prob; normalize h_btag_b = h_alltag_b[:,1] h_btag_b = h_btag_b / K.sum(h_btag_b,axis=0) return kullback_leibler_divergence(h_btag_b, h_qtag_b)
def fit_uci(sampler, stepsize, data_seed, burn_in_steps=5000, num_steps=15000, num_nets=100, batch_size=32, test_split=0.1): datasets = (BostonHousing, YachtHydrodynamics, Concrete, WineQualityRed) results = {} for dataset in datasets: train_data, (x_test, y_test) = dataset.load_data( test_split=test_split, seed=data_seed ) had_nans = True while had_nans: if sampler == "sghmc": model = Robo_BNN( l_rate=stepsize, sampling_method="sghmc", n_nets=num_nets, burn_in=burn_in_steps, n_iters=num_steps, bsize=batch_size ) elif sampler.startswith("SGHMCHD"): # SGHMCHD approaches with different kwargs model = KerasBayesianNeuralNetwork( optimizer=SAMPLERS[sampler], learning_rate=stepsize, train_callbacks=(TensorBoard(histogram_freq=1, batch_size=20, ),), hyperloss=lambda y_true, y_pred: kullback_leibler_divergence(y_true=y_true, y_pred=y_pred[:, 0]) ) else: raise NotImplementedError() model.train(*train_data) prediction_mean, prediction_variance = model.predict(x_test) had_nans = np.isnan(prediction_mean).any() or np.isnan(prediction_variance).any() results[dataset.__name__] = { "x_test": x_test.tolist(), "y_test": y_test.tolist(), "prediction_mean": prediction_mean.tolist(), "prediction_variance": prediction_variance.tolist() } return results
def loss_function(y_true, y_pred): return losses.kullback_leibler_divergence(y_true, y_pred) + add_loss
def cross_network_similarity_loss(y_true, y_pred): y_pred = tf.transpose(y_pred, [1, 0, 2]) p1 = y_pred[0] p2 = y_pred[1] kl = KLoss.kullback_leibler_divergence(p1, p2) return tf.maximum(0.0, kl - 0.15)
datagen = ImageDataGenerator() epoch_num = 500 batch_size = 16 labels = tf.placeholder(tf.float32, shape=(None, num_classes)) model = image_entry_model_46(time_steps, data_dim) train_var = tf.trainable_variables() predicts = model.output inputs = model.input if ifRegularizer == True: regularizer = tf.contrib.layers.l2_regularizer(0.001) loss = tf.reduce_mean( losses.kullback_leibler_divergence( labels, predicts)) + tf.contrib.layers.apply_regularization( regularizer, weights_list=train_var[:12] + train_var[18:30]) else: loss = tf.reduce_mean( losses.kullback_leibler_divergence(labels, predicts)) UAR_value = tf.constant(0.0) tf.summary.scalar("loss", loss) tf.summary.scalar("UAR", UAR_value) train_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize( loss=loss, var_list=train_var) init = tf.global_variables_initializer() # the main training procedure ############################################################################
def vae_loss(encoder_inputs, decoder_outputs): xent_loss = K.categorical_crossentropy(encoder_inputs, decoder_outputs) kl_loss = beta * kullback_leibler_divergence(encoder_inputs, decoder_outputs) loss = xent_loss + kl_loss return loss
def KLD_loss(y_true, y_pred): return kullback_leibler_divergence(y_true, y_pred)
# Setting up the data and the model raw_data = data_input.Data(one_hot=True) global_step = tf.contrib.framework.get_or_create_global_step() x_input = tf.placeholder(tf.float32, shape=[None, 96, 96, 3]) adv_x_input = tf.placeholder(tf.float32, shape=[None, 96, 96, 3]) y_input = tf.placeholder(tf.int64, shape=[None, 10]) model = Model(x_input, y_input, mode='train') model_adv = Model(adv_x_input, y_input, mode='train', reuse=True) # Setting up the optimizer loss = model.mean_xent + 6.0 * tf.reduce_mean( kullback_leibler_divergence(model.prob, model_adv.prob)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step) # Set up adversary attack = LinfTradeAttack(model, config['epsilon'], config['k'], config['a'], config['random_start'], config['loss_func']) # Setting up the Tensorboard and checkpoint outputs model_dir = config['model_dir'] if not os.path.exists(model_dir): os.makedirs(model_dir)
def call(self, x): return -1. * kullback_leibler_divergence(x[0], x[1])
############################################################################ datagen = ImageDataGenerator() epoch_num = 500 batch_size = 16 labels = tf.placeholder(tf.float32, shape=(None, num_classes)) model = image_entry_model_46(time_steps, data_dim) train_var = tf.trainable_variables() predicts = model.output inputs = model.input if ifRegularizer == True: regularizer = tf.contrib.layers.l2_regularizer(0.001) loss = tf.reduce_mean(losses.kullback_leibler_divergence(labels, predicts)) + tf.contrib.layers.apply_regularization(regularizer, weights_list=train_var[:12]+train_var[18:30]) else: loss = tf.reduce_mean(losses.kullback_leibler_divergence(labels, predicts)) UAR_value = tf.constant(0.0) tf.summary.scalar("loss", loss) tf.summary.scalar("UAR", UAR_value) train_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss=loss, var_list=train_var) init = tf.global_variables_initializer() # the main training procedure ############################################################################ with tf.Session() as sess: sess.run(init)
def loss(y_true, y_pred): return loss_weight * kullback_leibler_divergence(y_true, y_pred)
def lossfunction(y_true, y_pred): return kullback_leibler_divergence(y_true, y_pred)