Ejemplo n.º 1
0
def logit(x,
          is_training=True,
          update_batch_stats=True,
          stochastic=True,
          seed=1234):
    return cnn.logit(x,
                     is_training=is_training,
                     update_batch_stats=update_batch_stats,
                     stochastic=stochastic,
                     seed=seed)[0]
def generate_virtual_adversarial_dropout_mask(x, logit, is_training=True):

    logit_m, init_mask = CNN.logit(x,
                                   None,
                                   is_training=True,
                                   update_batch_stats=False,
                                   stochastic=True,
                                   seed=1234)
    dist = L.kl_divergence_with_logit(logit_m, logit)
    mask_grad = tf.stop_gradient(
        tf.gradients(dist, [init_mask], aggregation_method=2)[0])
    return flipping_algorithm(init_mask, mask_grad)
def logit(x,
          masks=None,
          is_training=True,
          update_batch_stats=True,
          stochastic=True,
          seed=1234):
    logits, _ = CNN.logit(x,
                          masks,
                          is_training=is_training,
                          update_batch_stats=update_batch_stats,
                          stochastic=stochastic,
                          seed=seed)
    return logits
def virtual_adversarial_dropout_loss(x,
                                     logit,
                                     is_training=True,
                                     name="vadt_loss"):
    adv_mask = generate_virtual_adversarial_dropout_mask(
        x, logit, is_training=is_training)
    logit_p = logit
    logit_m, _ = CNN.logit(x,
                           adv_mask,
                           is_training=True,
                           update_batch_stats=True,
                           stochastic=True,
                           seed=1234)

    loss = L.kl_divergence_with_logit(logit_p, logit_m)
    return tf.identity(loss, name=name)
Ejemplo n.º 5
0
def logit(x,
          is_training=True,
          update_batch_stats=True,
          stochastic=True,
          seed=1234,
          dropout_mask=None,
          return_mask=False,
          h_before_dropout=None):
    return cnn.logit(x,
                     is_training=is_training,
                     update_batch_stats=update_batch_stats,
                     stochastic=stochastic,
                     seed=seed,
                     dropout_mask=dropout_mask,
                     return_mask=return_mask,
                     h_before_dropout=h_before_dropout)
Ejemplo n.º 6
0
def build_training_graph(x1, y1, x2, lr, mom):
    global_step = tf.get_variable(
        name="global_step",
        shape=[],
        dtype=tf.float32,
        initializer=tf.constant_initializer(0.0),
        trainable=False,
    )
    k = 1. * global_step / (FLAGS.num_iter_per_epoch * FLAGS.num_epochs)
    # lp schedule from GRL
    lp = (2. / (1. + tf.exp(-10. * k)) - 1)

    # Interpolation
    y2_logit, _ = cnn.logit(x2,
                            is_training=False,
                            update_batch_stats=False,
                            stochastic=False)
    if FLAGS.one_hot:
        y2 = tf.stop_gradient(
            tf.cast(tf.one_hot(tf.argmax(y2_logit, -1), 10), tf.float32))
    else:
        y2 = tf.stop_gradient(tf.nn.softmax(y2_logit))

    dist_beta = tf.distributions.Beta(0.1, 0.1)
    lmb = dist_beta.sample(tf.shape(x1)[0])
    lmb_x = tf.reshape(lmb, [-1, 1, 1, 1])
    lmb_y = tf.reshape(lmb, [-1, 1])
    x = x1 * lmb_x + x2 * (1. - lmb_x)
    y = y1 * lmb_y + y2 * (1. - lmb_y)

    label_dm = tf.concat(
        [tf.reshape(lmb, [-1, 1]),
         tf.reshape(1. - lmb, [-1, 1])], axis=1)

    # Calculate the feats and logits on interpolated samples
    with tf.variable_scope(tf.get_variable_scope(), reuse=True):
        logit, net = cnn.logit(x, is_training=True, update_batch_stats=True)

    # Alignment Loss
    net_ = flip_gradient(net, lp)
    logitsdm = tf.layers.dense(net_,
                               1024,
                               activation=tf.nn.relu,
                               name='linear_dm1')
    logitsdm = tf.layers.dense(logitsdm,
                               1024,
                               activation=tf.nn.relu,
                               name='linear_dm2')
    logits_dm = tf.layers.dense(logitsdm, 2, name="logits_dm")
    dm_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=label_dm,
                                                logits=logits_dm))
    additional_loss = dm_loss

    nll_loss = tf.reduce_mean(
        lmb * tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logit))

    loss = nll_loss + additional_loss

    opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=mom)
    tvars = tf.trainable_variables()
    grads_and_vars = opt.compute_gradients(loss, tvars)
    train_op = opt.apply_gradients(grads_and_vars, global_step=global_step)
    return loss, train_op, global_step