Example #1
0
def loss_kldiv(y_in,x):
    """
    mass sculpting penlaty term usking kullback_leibler_divergence
    y_in: truth [h, y]
    x: predicted NN output for y
    h: the truth mass histogram vector "one-hot encoded" (length NBINS=40)
    y: the truth categorical labels  "one-hot encoded" (length NClasses=2)
    """
    h = y_in[:,0:NBINS]
    y = y_in[:,NBINS:NBINS+2]
    h_all = K.dot(K.transpose(h), y)
    h_all_q = h_all[:,0]
    h_all_h = h_all[:,1]
    h_all_q = h_all_q / K.sum(h_all_q,axis=0)
    h_all_h = h_all_h / K.sum(h_all_h,axis=0)
    h_btag_anti_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x))
    h_btag_anti_h = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x))
    h_btag_q = h_btag_anti_q[:,1]
    h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0)
    h_anti_q = h_btag_anti_q[:,0]
    h_anti_q = h_anti_q / K.sum(h_anti_q,axis=0)
    h_btag_h = h_btag_anti_h[:,1]
    h_btag_h = h_btag_h / K.sum(h_btag_h,axis=0)
    h_anti_h = h_btag_anti_q[:,0]
    h_anti_h = h_anti_h / K.sum(h_anti_h,axis=0)

    return categorical_crossentropy(y, x) + \
        LAMBDA*kullback_leibler_divergence(h_btag_q, h_anti_q) + \
        LAMBDA*kullback_leibler_divergence(h_btag_h, h_anti_h)         
Example #2
0
def JSD(p, q):
    """
    Jensen-Shannon divergence: A smoothed and symmetric version of the KL divergence.
    """
    m = 0.5 * (p + q)
    return 0.5 * losses.kullback_leibler_divergence(
        p, m) + 0.5 * losses.kullback_leibler_divergence(q, m)
Example #3
0
def loss_kldiv(y_in, x):
    """
    mass sculpting penlaty term using kullback_leibler_divergence
    y_in: truth [h, y]
    x: predicted NN output for y
    h: the truth mass histogram vector "one-hot encoded" (length NBINS=40)
    y: the truth categorical labels  "one-hot encoded" (length NClasses=2)
    """
    h = y_in[:, 0:NBINS]
    y = y_in[:, NBINS:NBINS + 2]

    # build mass histogram for true q events weighted by q, b prob
    h_alltag_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 0]), x))
    # build mass histogram for true b events weighted by q, b prob
    h_alltag_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 1]), x))

    # select mass histogram for true q events weighted by q prob; normalize
    h_qtag_q = h_alltag_q[:, 0]
    h_qtag_q = h_qtag_q / K.sum(h_qtag_q, axis=0)
    # select mass histogram for true q events weighted by b prob; normalize
    h_btag_q = h_alltag_q[:, 1]
    h_btag_q = h_btag_q / K.sum(h_btag_q, axis=0)
    # select mass histogram for true b events weighted by q prob; normalize
    h_qtag_b = h_alltag_b[:, 0]
    h_qtag_b = h_qtag_b / K.sum(h_qtag_b, axis=0)
    # select mass histogram for true b events weighted by b prob; normalize
    h_btag_b = h_alltag_b[:, 1]
    h_btag_b = h_btag_b / K.sum(h_btag_b, axis=0)

    # compute KL divergence between true q events weighted by b vs q prob (symmetrize?)
    # compute KL divergence between true b events weighted by b vs q prob (symmetrize?)
    return categorical_crossentropy(y, x) + \
        LAMBDA_ADV*kullback_leibler_divergence(h_btag_q, h_qtag_q) + \
        LAMBDA_ADV*kullback_leibler_divergence(h_btag_b, h_qtag_b)
def fit_sinc(sampler, stepsize, data_seed, num_training_datapoints=20):
    x_train = init_random_uniform(np.zeros(1),
                                  np.ones(1),
                                  num_points=num_training_datapoints,
                                  rng=np.random.RandomState(seed=data_seed))
    y_train = sinc(x_train)

    x_test = np.linspace(0, 1, 100)[:, None]
    y_test = sinc(x_test)

    if sampler == "SGHMC":
        model = Robo_BNN(sampling_method=SAMPLERS[sampler], l_rate=stepsize)
    else:
        from keras.losses import cosine_proximity, kullback_leibler_divergence, binary_crossentropy
        model = BayesianNeuralNetwork(
            optimizer=SAMPLERS[sampler],
            learning_rate=stepsize,
            hyperloss=lambda y_true, y_pred: kullback_leibler_divergence(
                y_true=y_true, y_pred=y_pred[:, 0]))

    model.train(x_train, y_train)
    prediction_mean, prediction_variance = model.predict(x_test)

    prediction_std = np.sqrt(prediction_variance)

    return {
        "prediction_mean": prediction_mean.tolist(),
        "prediction_std": prediction_std.tolist(),
        "x_train": x_train.tolist(),
        "y_train": y_train.tolist(),
        "x_test": x_test.tolist(),
        "y_test": y_test.tolist()
    }
Example #5
0
    def augmented_loss(self, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        loss = K.categorical_crossentropy(_y_pred, y_true)

        # y is (batch x seq x vocab)
        y_indexes = K.argmax(y_true,
                             axis=2)  # turn one hot to index. (batch x seq)
        y_vectors = self.embedding(
            y_indexes)  # lookup the vector (batch x seq x vector_length)

        #v_length = self.setting.vector_length
        #y_vectors = K.reshape(y_vectors, (-1, v_length))
        #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors)
        #y_t = K.squeeze(y_t, axis=2)  # unknown but necessary operation
        #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))

        # vector x embedding dot products (batch x seq x vocab)
        y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings),
                           1)
        y_t = K.reshape(
            y_t,
            (-1, self.sequence_size, self.vocab_size))  # explicitly set shape
        y_t = K.softmax(y_t / self.temperature)
        _y_pred_t = Activation("softmax")(y_pred / self.temperature)
        aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
        loss += (self.gamma * self.temperature) * aug_loss
        return loss
Example #6
0
 def loss(y_true, y_pred):
     KLD = kullback_leibler_divergence(q_c, y_pred)
     cross_entropy = categorical_crossentropy(y_true, y_pred)
     F_diff_squared = K.sum(K.square(F_i - F_c))
     loss_value = (
         1 - alpha) * cross_entropy + alpha * KLD + beta * F_diff_squared
     return loss_value
Example #7
0
    def loss(yT, yP):
        '''
        yT, yP: (None, pix_num, 3)
        '''
        yP_flat = tf.reshape(yP, (-1, n_clusters))
        p = tf_target_distribution(yP_flat)

        return tf.reduce_mean(kullback_leibler_divergence(yP_flat, p))
Example #8
0
    def customized_loss(self, y_true, y_pred, alpha=0.0001, beta=3):
        """
		linear combination of MSE and KL divergence.
		"""
        loss1 = losses.mean_absolute_error(y_true, y_pred)
        loss2 = losses.kullback_leibler_divergence(y_true, y_pred)
        #(alpha/2) *
        return loss1 + beta * loss2
Example #9
0
def customLoss(yTrue, yPred):
     img_loss = kullback_leibler_divergence(K.reshape(yTrue, [-1])/K.sum(yTrue), K.reshape(yPred, [-1])/K.sum(yPred))
     sobel_loss, mask = sobelLoss(yTrue, yPred)
     BCE = binary_crossentropy(yTrue, yPred)
     masked_loss = K.mean((K.exp(K.sum(mask, axis = 3))*K.square(yTrue-yPred)))  #[16,62,62,2] vs. [16,64,64,1]

     reg_loss = sobelNorm(model.layers[1].locnet.output) # why does this term gives zeros? Do not use it alone...
     
     return img_loss + sobel_loss + 0.3*BCE
Example #10
0
 def loss(y_true, y_pred):
     #y_true = K.clip(y_true,K.epsilon(),1)
     #y_pred = K.clip(y_pred,K.epsilon(),1)
     #rt = K.mean((K.softmax(old_q)/K.softmax(y_pred)))
     #c = K.clip(rt,0.8,1.2)
     q_true = y_true
     q_pred = y_pred
     return mean_squared_error(q_true, q_pred) + K.exp(
         kullback_leibler_divergence(old_q, q_pred))
Example #11
0
    def loss_kldiv5(y_in, x_in):
        h = y_in[:, 0:NBINS]
        y = y_in[:, NBINS:NBINS + 2]
        x = x_in[:, NBINS:NBINS + 2]
        h_blike_slike_s = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 0]), x))
        h_blike_slike_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:, 1]), x))
        h_blike_s = h_blike_slike_s[:, 1]
        h_blike_s = h_blike_s / K.sum(h_blike_s, axis=0)
        h_slike_s = h_blike_slike_s[:, 0]
        h_slike_s = h_slike_s / K.sum(h_slike_s, axis=0)
        h_blike_b = h_blike_slike_b[:, 1]
        h_blike_b = h_blike_b / K.sum(h_blike_b, axis=0)
        h_slike_b = h_blike_slike_s[:, 0]
        h_slike_b = h_slike_b / K.sum(h_slike_b, axis=0)

        return categorical_crossentropy(y, x) + \
            0.5*kullback_leibler_divergence(h_blike_s, h_slike_s) + \
            0.5*kullback_leibler_divergence(h_blike_b, h_slike_b)
Example #12
0
def mass_jsdiv_q(y_in,x):
    """
    KL divergence term for anti-tag events (QCD) to be used with custom loss_kldiv 
    """
    h = y_in[:,0:NBINS]
    y = y_in[:,NBINS:NBINS+2]
    # build mass histogram for true q events weighted by q, b prob
    h_alltag_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x))
    
    # select mass histogram for true q events weighted by q prob; normalize
    h_qtag_q = h_alltag_q[:,0]
    h_qtag_q = h_qtag_q / K.sum(h_qtag_q,axis=0)
    # select mass histogram for true q events weighted by b prob; normalize
    h_btag_q = h_alltag_q[:,1]
    h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0)

    h_aver_q = 0.5*h_btag_q+0.5*h_qtag_q
    return 0.5*kullback_leibler_divergence(h_btag_q, h_aver_q) + 0.5*kullback_leibler_divergence(h_qtag_q, h_aver_q) 
def customized_loss(y_true, y_pred, alpha=0.0001, beta=3):
    """
    Create a customized loss for the stacked AE.
    Linear combination of MSE and KL divergence.
    """
    #customize your own loss components
    loss1 = losses.mean_absolute_error(y_true, y_pred)
    loss2 = losses.kullback_leibler_divergence(y_true, y_pred)
    #adjust the weight between loss components
    return (alpha / 2) * loss1 + beta * loss2
def customLoss(yTrue, yPred):
    #     norm_T = K.pow(K.sum(K.square(yTrue)), 0.5)
    #     norm_P = K.pow(K.sum(K.square(yPred)), 0.5)
    #     img_loss = kullback_leibler_divergence(K.reshape(yTrue, [-1])/K.sum(yTrue),
    #                                            K.reshape(yPred, [-1])/K.sum(yPred))
    img_loss = kullback_leibler_divergence(K.softmax(K.reshape(yTrue, [-1])),
                                           K.softmax(K.reshape(yPred, [-1])))
    #     sobel_loss = sobelLoss(yTrue, yPred)
    BCE = binary_crossentropy(yTrue, yPred)
    #     return img_loss
    return img_loss + 0.3 * BCE
 def __call__(self, x):
     loss = 0.
     marginalized_vars = []
     for i, size in enumerate(self.sizes):
         marginalized = K.sum(tf.gather(x, self.idx[i], axis=-1), axis=-1)
         # marginalized = marginalized / K.sum(marginalized) # this should not be needed
         marginalized_vars.append(marginalized)
     products = K.stack([
         a for a in combine(marginalized_vars, self.sizes,
                            lambda x1, x2: x1 * x2)
     ])
     loss += self.weight * kullback_leibler_divergence(x, products)
     return loss
Example #16
0
def fastbert(teacher, classifier, speed=speed):
    inputs = teacher.inputs
    # frozen layers
    for layer in teacher.model.layers:
        layer.trainable = False
    classifier.trainable = False

    x_pre = teacher.apply_embeddings(inputs)
    emb_name = 'FastBert-embedding'
    clf_pre = teacher.apply(x_pre,
                            FastbertClassifierLayer,
                            name=emb_name,
                            labels_num=num_classes)
    student_outputs = [clf_pre]
    outputs = [clf_pre, x_pre]

    for idx in range(teacher.num_hidden_layers):
        clf_pre, x_pre = outputs
        name = 'FastBert-%d' % idx
        x_next = teacher.apply_attention_layers(x_pre, idx)
        clf_next = teacher.apply(x_pre,
                                 FastbertClassifierLayer,
                                 name=name,
                                 labels_num=num_classes)
        student_outputs.append(clf_next)

        x = SwitchTwo(speed)([clf_pre, x_pre, x_next])
        clf = SwitchTwo(speed)([clf_pre, clf_pre, clf_next])
        outputs = [clf, x]

    clf_prob, x = outputs
    x = classifier(x)

    output = SwitchTwo(speed)([clf_prob, clf_prob, x])
    model_infer = Model(inputs, output)

    label_inputs = Input(shape=(None, ))
    model_train = Model(inputs + [label_inputs], student_outputs)

    for i, prob in enumerate(student_outputs):
        ce_loss = K.sparse_categorical_crossentropy(label_inputs, prob)
        kl_loss = kullback_leibler_divergence(x, prob)
        model_train.add_loss(ce_loss)
        model_train.add_metric(ce_loss, name='ce_loss-%d' % i)
        model_train.add_loss(kl_loss)
        model_train.add_metric(kl_loss, name='loss-%d' % i)

    model_1 = Model(inputs, student_outputs[1])
    model_2 = Model(inputs, student_outputs[2])

    return model_train, model_infer, model_1, model_2
Example #17
0
def loss_kldiv(y_in,x):
    # h is the histogram vector "one hot encoded" (40 bins in this case), techically part of the "truth" y                         
    h = y_in[:,0:NBINS]
    y = y_in[:,NBINS:]
    h_all = K.dot(K.transpose(h), y)
    h_all_q = h_all[:,0]
    h_all_h = h_all[:,1]
    h_all_q = h_all_q / K.sum(h_all_q,axis=0)
    h_all_h = h_all_h / K.sum(h_all_h,axis=0)
    h_btag_anti_q = K.dot(K.transpose(h), K.dot(tf.diag(y[:,0]),x))
    h_btag_anti_h = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x))
    h_btag_q = h_btag_anti_q[:,1]
    h_btag_q = h_btag_q / K.sum(h_btag_q,axis=0)
    h_anti_q = h_btag_anti_q[:,0]
    h_anti_q = h_anti_q / K.sum(h_anti_q,axis=0)
    h_btag_h = h_btag_anti_h[:,1]
    h_btag_h = h_btag_h / K.sum(h_btag_h,axis=0)
    h_anti_h = h_btag_anti_q[:,0]
    h_anti_h = h_anti_h / K.sum(h_anti_h,axis=0)

    return categorical_crossentropy(y, x) + \
        kullback_leibler_divergence(h_btag_q, h_anti_q) + \
        kullback_leibler_divergence(h_btag_h, h_anti_h)         
Example #18
0
    def __init__(self, model, epsilon, k, a, random_start, loss_func):
        """Attack parameter initialization. The attack performs k steps of
       size a, while always staying within epsilon from the initial
       point."""
        self.model = model
        self.epsilon = epsilon
        self.k = k
        self.a = a
        self.rand = random_start
        self.x_nat_prob = tf.placeholder(tf.float32, shape=[None, 10])

        loss = tf.reduce_sum(
            kullback_leibler_divergence(self.x_nat_prob, model.prob))

        self.grad = tf.gradients(loss, model.x_input)[0]
Example #19
0
def mass_kldiv_h(y_in,x):
    """
    KL divergence term for tag events (H) to be used with custom loss_kldiv 
    """
    h = y_in[:,0:NBINS]
    y = y_in[:,NBINS:NBINS+2]

    # build mass histogram for true b events weighted by q, b prob
    h_alltag_b = K.dot(K.transpose(h), K.dot(tf.diag(y[:,1]),x))
    
    # select mass histogram for true b events weighted by q prob; normalize        
    h_qtag_b = h_alltag_b[:,0]
    h_qtag_b = h_qtag_b / K.sum(h_qtag_b,axis=0)
    # select mass histogram for true b events weighted by b prob; normalize        
    h_btag_b = h_alltag_b[:,1]
    h_btag_b = h_btag_b / K.sum(h_btag_b,axis=0)
    
    return kullback_leibler_divergence(h_btag_b, h_qtag_b)
Example #20
0
def fit_uci(sampler, stepsize, data_seed, burn_in_steps=5000,
            num_steps=15000, num_nets=100, batch_size=32, test_split=0.1):
    datasets = (BostonHousing, YachtHydrodynamics, Concrete, WineQualityRed)

    results = {}

    for dataset in datasets:
        train_data, (x_test, y_test) = dataset.load_data(
            test_split=test_split, seed=data_seed
        )
        had_nans = True

        while had_nans:
            if sampler == "sghmc":
                model = Robo_BNN(
                    l_rate=stepsize,
                    sampling_method="sghmc", n_nets=num_nets, burn_in=burn_in_steps,
                    n_iters=num_steps, bsize=batch_size
                )
            elif sampler.startswith("SGHMCHD"):
                # SGHMCHD approaches with different kwargs

                model = KerasBayesianNeuralNetwork(
                    optimizer=SAMPLERS[sampler], learning_rate=stepsize,
                    train_callbacks=(TensorBoard(histogram_freq=1, batch_size=20, ),),
                    hyperloss=lambda y_true, y_pred: kullback_leibler_divergence(y_true=y_true, y_pred=y_pred[:, 0])
                )
            else:
                raise NotImplementedError()

            model.train(*train_data)
            prediction_mean, prediction_variance = model.predict(x_test)

            had_nans = np.isnan(prediction_mean).any() or np.isnan(prediction_variance).any()

        results[dataset.__name__] = {
            "x_test": x_test.tolist(),
            "y_test": y_test.tolist(),
            "prediction_mean": prediction_mean.tolist(),
            "prediction_variance": prediction_variance.tolist()
        }

    return results
Example #21
0
 def loss_function(y_true, y_pred):
     return losses.kullback_leibler_divergence(y_true, y_pred) + add_loss
Example #22
0
def cross_network_similarity_loss(y_true, y_pred):
    y_pred = tf.transpose(y_pred, [1, 0, 2])
    p1 = y_pred[0]
    p2 = y_pred[1]
    kl = KLoss.kullback_leibler_divergence(p1, p2)
    return tf.maximum(0.0, kl - 0.15)
Example #23
0
    datagen = ImageDataGenerator()

    epoch_num = 500
    batch_size = 16

    labels = tf.placeholder(tf.float32, shape=(None, num_classes))
    model = image_entry_model_46(time_steps, data_dim)

    train_var = tf.trainable_variables()
    predicts = model.output
    inputs = model.input

    if ifRegularizer == True:
        regularizer = tf.contrib.layers.l2_regularizer(0.001)
        loss = tf.reduce_mean(
            losses.kullback_leibler_divergence(
                labels, predicts)) + tf.contrib.layers.apply_regularization(
                    regularizer,
                    weights_list=train_var[:12] + train_var[18:30])
    else:
        loss = tf.reduce_mean(
            losses.kullback_leibler_divergence(labels, predicts))

    UAR_value = tf.constant(0.0)
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("UAR", UAR_value)
    train_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(
        loss=loss, var_list=train_var)
    init = tf.global_variables_initializer()

    # the main training procedure
    ############################################################################
 def vae_loss(encoder_inputs, decoder_outputs):
     xent_loss = K.categorical_crossentropy(encoder_inputs, decoder_outputs)
     kl_loss = beta * kullback_leibler_divergence(encoder_inputs,
                                                  decoder_outputs)
     loss = xent_loss + kl_loss
     return loss
def KLD_loss(y_true, y_pred):
    return kullback_leibler_divergence(y_true, y_pred)
Example #26
0
# Setting up the data and the model
raw_data = data_input.Data(one_hot=True)
global_step = tf.contrib.framework.get_or_create_global_step()

x_input = tf.placeholder(tf.float32, shape=[None, 96, 96, 3])
adv_x_input = tf.placeholder(tf.float32, shape=[None, 96, 96, 3])
y_input = tf.placeholder(tf.int64, shape=[None, 10])

model = Model(x_input, y_input, mode='train')

model_adv = Model(adv_x_input, y_input, mode='train', reuse=True)

# Setting up the optimizer
loss = model.mean_xent + 6.0 * tf.reduce_mean(
    kullback_leibler_divergence(model.prob, model_adv.prob))

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

with tf.control_dependencies(update_ops):
    train_step = tf.train.AdamOptimizer(1e-3).minimize(loss,
                                                       global_step=global_step)

# Set up adversary
attack = LinfTradeAttack(model, config['epsilon'], config['k'], config['a'],
                         config['random_start'], config['loss_func'])

# Setting up the Tensorboard and checkpoint outputs
model_dir = config['model_dir']
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
 def call(self, x):
     return -1. * kullback_leibler_divergence(x[0], x[1])
    ############################################################################
    datagen = ImageDataGenerator()

    epoch_num = 500
    batch_size = 16

    labels = tf.placeholder(tf.float32, shape=(None, num_classes))
    model = image_entry_model_46(time_steps, data_dim)

    train_var = tf.trainable_variables()
    predicts = model.output
    inputs = model.input

    if ifRegularizer == True:
        regularizer = tf.contrib.layers.l2_regularizer(0.001)
        loss = tf.reduce_mean(losses.kullback_leibler_divergence(labels, predicts)) + tf.contrib.layers.apply_regularization(regularizer, weights_list=train_var[:12]+train_var[18:30])
    else:
        loss = tf.reduce_mean(losses.kullback_leibler_divergence(labels, predicts))

    UAR_value = tf.constant(0.0)
    tf.summary.scalar("loss", loss)
    tf.summary.scalar("UAR", UAR_value)
    train_step = tf.train.RMSPropOptimizer(learning_rate=0.001).minimize(loss=loss, var_list=train_var)
    init = tf.global_variables_initializer()


    # the main training procedure
    ############################################################################
    with tf.Session() as sess:

        sess.run(init)
 def loss(y_true, y_pred):
     return loss_weight * kullback_leibler_divergence(y_true, y_pred)
Example #30
0
 def lossfunction(y_true, y_pred):
     return kullback_leibler_divergence(y_true, y_pred)