def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs):
   grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)]
   grad_vars = [
       tf.ones_like(t) * (i + len(inputs) + 1.)
       for i, t in enumerate(variables)
   ]
   return grad_inputs, grad_vars
Beispiel #2
0
def generalised_dice_loss(prediction,
                          ground_truth,
                          weight_map=None,
                          type_weight='Square'):
    """
    Function to calculate the Generalised Dice Loss defined in
        Sudre, C. et. al. (2017) Generalised Dice overlap as a deep learning
        loss function for highly unbalanced segmentations. DLMIA 2017

    :param prediction: the logits
    :param ground_truth: the segmentation ground truth
    :param weight_map:
    :param type_weight: type of weighting allowed between labels (choice
        between Square (square of inverse of volume),
        Simple (inverse of volume) and Uniform (no weighting))
    :return: the loss
    """
    ground_truth = tf.to_int64(ground_truth)
    n_voxels = ground_truth.shape[0].value
    n_classes = prediction.shape[1].value
    ids = tf.constant(np.arange(n_voxels), dtype=tf.int64)
    ids = tf.stack([ids, ground_truth], axis=1)
    one_hot = tf.SparseTensor(indices=ids,
                              values=tf.ones([n_voxels], dtype=tf.float32),
                              dense_shape=[n_voxels, n_classes])

    if weight_map is not None:
        weight_map_nclasses = tf.reshape(
            tf.tile(weight_map, [n_classes]), prediction.get_shape())
        ref_vol = tf.sparse_reduce_sum(
            weight_map_nclasses * one_hot, reduction_axes=[0])

        intersect = tf.sparse_reduce_sum(
            weight_map_nclasses * one_hot * prediction, reduction_axes=[0])
        seg_vol = tf.reduce_sum(
            tf.multiply(weight_map_nclasses, prediction), 0)
    else:
        ref_vol = tf.sparse_reduce_sum(one_hot, reduction_axes=[0])
        intersect = tf.sparse_reduce_sum(one_hot * prediction,
                                         reduction_axes=[0])
        seg_vol = tf.reduce_sum(prediction, 0)
    if type_weight == 'Square':
        weights = tf.reciprocal(tf.square(ref_vol))
    elif type_weight == 'Simple':
        weights = tf.reciprocal(ref_vol)
    elif type_weight == 'Uniform':
        weights = tf.ones_like(ref_vol)
    else:
        raise ValueError("The variable type_weight \"{}\""
                         "is not defined.".format(type_weight))
    new_weights = tf.where(tf.is_inf(weights), tf.zeros_like(weights), weights)
    weights = tf.where(tf.is_inf(weights), tf.ones_like(weights) *
                       tf.reduce_max(new_weights), weights)
    generalised_dice_numerator = \
        2 * tf.reduce_sum(tf.multiply(weights, intersect))
    generalised_dice_denominator = \
        tf.reduce_sum(tf.multiply(weights, seg_vol + ref_vol))
    generalised_dice_score = \
        generalised_dice_numerator / generalised_dice_denominator
    return 1 - generalised_dice_score
    def __init__(self,
                 sess,
                 dataset_name='facades',
                 checkpoint_dir=None):
        self.sess = sess
        self.dataset_name = dataset_name
        self.checkpoint_dir = checkpoint_dir

        self.real_data = tf.placeholder(tf.float32,
                                        [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3 + 3],
                                        name='input_images')
        self.real_A = self.real_data[:, :, :, :3]
        self.real_B = self.real_data[:, :, :, 3:6]

        self.fake_B = generator(self.real_A, name="generatorA2B")
        self.fake_A = generator(self.real_B, name="generatorB2A")
        self.fake_B_fake_A = generator(self.fake_B, reuse=True, name="generatorB2A")
        self.fake_A_fake_B = generator(self.fake_A, reuse=True, name="generatorA2B")

        self.DA_real = discriminator(self.real_A, reuse=False, name="descriminatorA")
        self.DB_real = discriminator(self.real_B, reuse=False, name="descriminatorB")
        self.DA_fake = discriminator(self.fake_A, reuse=True, name="descriminatorA")
        self.DB_fake = discriminator(self.fake_B, reuse=True, name="descriminatorB")

        self.g_loss_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_fake, labels=tf.ones_like(self.DB_fake))) + 100 * tf.reduce_mean(
            tf.abs(self.real_A - self.fake_B_fake_A)) + 100 * tf.reduce_mean(
            tf.abs(self.real_B - self.fake_B))
        self.g_loss_b2a = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_fake, labels=tf.ones_like(self.DA_fake))) + 100 * tf.reduce_mean(
            tf.abs(self.real_B - self.fake_A_fake_B)) + 100 * tf.reduce_mean(
            tf.abs(self.real_A - self.fake_A))
        self.g_loss = self.g_loss_a2b + self.g_loss_b2a

        self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_fake, labels=tf.zeros_like(self.DB_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DB_real, labels=tf.ones_like(self.DB_real))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_fake, labels=tf.zeros_like(self.DA_fake))) + tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.DA_real, labels=tf.ones_like(self.DA_real)))

        self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
        self.g_loss_a2b_sum = tf.summary.scalar("g_loss_a2b", self.g_loss_a2b)
        self.g_loss_b2a_sum = tf.summary.scalar("g_loss_b2a", self.g_loss_b2a)
        self.real_A_sum = tf.summary.image("real_A", self.real_A)
        self.real_B_sum = tf.summary.image("real_B", self.real_B)
        self.fake_A_sum = tf.summary.image("fake_A", self.fake_A)
        self.fake_B_sum = tf.summary.image("fake_B", self.fake_B)
        self.fake_AB_sum = tf.summary.image("fake_AB", self.fake_A_fake_B)
        self.fake_BA_sum = tf.summary.image("fake_BA", self.fake_B_fake_A)

        self.d_sum = tf.summary.merge([self.d_loss_sum])
        self.g_sum = tf.summary.merge([self.g_loss_sum, self.g_loss_a2b_sum, self.g_loss_b2a_sum,
                                       self.real_A_sum, self.real_B_sum, self.fake_A_sum,
                                       self.fake_B_sum, self.fake_AB_sum, self.fake_BA_sum])

        training_vars = tf.trainable_variables()
        self.d_vars = [var for var in training_vars if 'd_' in var.name]
        self.g_vars = [var for var in training_vars if 'g_' in var.name]
        self.saver = tf.train.Saver(max_to_keep=5)
    def __init__(self, q_values, observations, num_actions, stochastic, eps,
                 softmax, softmax_temp):
        if softmax:
            action_dist = Categorical(q_values / softmax_temp)
            self.action = action_dist.sample()
            self.action_prob = action_dist.sampled_action_prob()
            return

        deterministic_actions = tf.argmax(q_values, axis=1)
        batch_size = tf.shape(observations)[0]

        # Special case masked out actions (q_value ~= -inf) so that we don't
        # even consider them for exploration.
        random_valid_action_logits = tf.where(
            tf.equal(q_values, tf.float32.min),
            tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
        random_actions = tf.squeeze(
            tf.multinomial(random_valid_action_logits, 1), axis=1)

        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)
        self.action = tf.cond(stochastic, lambda: stochastic_actions,
                              lambda: deterministic_actions)
        self.action_prob = None
    def create_model(self):
        """Create tensorflow variables and graph."""
        self.enc_inp = [tf.placeholder(tf.int32, shape=(None,),
                                       name="inp%i" % t)
                        for t in range(self.bucket[0])]

        self.labels = [tf.placeholder(tf.int32, shape=(None,),
                                      name="labels%i" % t)
                       for t in range(self.bucket[1])]

        self.weights = [tf.ones_like(labels_t, dtype=tf.float32)
                        for labels_t in self.labels]

        # Decoder input: prepend some "GO" token and drop the final
        # token of the encoder input
        self.dec_inp = ([GO_ID * tf.ones_like(self.labels[0], dtype=np.int32,
                                              name="GO")] + self.labels[:-1])

        single_cell = tf.nn.rnn_cell.LSTMCell(self.memory_dim)
        if self.num_layers > 1:
            self.cell = tf.nn.rnn_cell.MultiRNNCell(
                [single_cell] * self.num_layers)
        else:
            self.cell = single_cell

        # Sequence to sequence model
        self.dec_outputs, self.dec_memory = tf.nn.seq2seq.embedding_rnn_seq2seq(
            self.enc_inp, self.dec_inp, self.cell, len(self.en_chars),
            len(self.hi_chars), self.embedding_dim)
Beispiel #6
0
def add_optimization(learning_rate, beta1, beta2, disc_gen, disc_true,
                     gen_label, disc_label):
    gen_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        disc_gen, tf.ones_like(disc_gen)), name='gen_loss')

    disc_g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        disc_gen, tf.zeros_like(disc_gen)))
    disc_x_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
        disc_true, tf.ones_like(disc_true)))
    disc_loss = tf.add(disc_g_loss, disc_x_loss, name='disc_loss')

    gen_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope=gen_label)
    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope=disc_label)
    # print 'gen vars---------------------'
    # for v in gen_vars:
    #     print v.name
    # print 'disc vars----------------'
    # for v in disc_vars:
    #     print v.name

    gen_opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                     beta1=beta1,
                                     beta2=beta2).minimize(gen_loss,
                                                           var_list=gen_vars)
    disc_opt = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                      beta1=beta1,
                                      beta2=beta2).minimize(disc_loss,
                                                            var_list=disc_vars)
    return gen_loss, disc_loss, gen_opt, disc_opt
Beispiel #7
0
    def compute_losses(self, images, wrong_images, fake_images, embeddings):
        real_logit = self.model.get_discriminator(images, embeddings)
        wrong_logit = self.model.get_discriminator(wrong_images, embeddings)
        fake_logit = self.model.get_discriminator(fake_images, embeddings)

        real_d_loss =\
            tf.nn.sigmoid_cross_entropy_with_logits(real_logit,
                                                    tf.ones_like(real_logit))
        real_d_loss = tf.reduce_mean(real_d_loss)
        wrong_d_loss =\
            tf.nn.sigmoid_cross_entropy_with_logits(wrong_logit,
                                                    tf.zeros_like(wrong_logit))
        wrong_d_loss = tf.reduce_mean(wrong_d_loss)
        fake_d_loss =\
            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
                                                    tf.zeros_like(fake_logit))
        fake_d_loss = tf.reduce_mean(fake_d_loss)
        if cfg.TRAIN.B_WRONG:
            discriminator_loss =\
                real_d_loss + (wrong_d_loss + fake_d_loss) / 2.
            self.log_vars.append(("d_loss_wrong", wrong_d_loss))
        else:
            discriminator_loss = real_d_loss + fake_d_loss
        self.log_vars.append(("d_loss_real", real_d_loss))
        self.log_vars.append(("d_loss_fake", fake_d_loss))

        generator_loss = \
            tf.nn.sigmoid_cross_entropy_with_logits(fake_logit,
                                                    tf.ones_like(fake_logit))
        generator_loss = tf.reduce_mean(generator_loss)

        return discriminator_loss, generator_loss
Beispiel #8
0
def add_dyprune(weights):
    crate = config.crate[weights.name[:-2]] #hyperpara C rate
    prune_mask = tf.Variable(tf.ones_like(weights),name=weights.name[:-2]+'mask', trainable=False)

    #calculate mask
    mean = tf.divide(tf.reduce_sum(tf.multiply(tf.abs(weights),prune_mask)),tf.reduce_sum(prune_mask))
    var = tf.multiply(weights,prune_mask)
    var = tf.square(var)
    mean_q = tf.square(mean)*tf.reduce_sum(prune_mask)
    var = tf.reduce_sum(var) - mean_q
    var = tf.divide(var,tf.reduce_sum(prune_mask))
    var = tf.sqrt(var)
    t1_lower = (mean+var*crate)*0.25 #hyperpara a
    t1_upper = (mean+var*crate)*0.45 #hyperpara b
    
    indicator_lower1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_lower)    
    indicator_upper1 = tf.greater_equal(tf.abs(weights), tf.ones_like(weights) * t1_upper)
    indicator_matrix1 = tf.greater_equal(prune_mask, tf.zeros_like(weights))
    indicator_matrix1 = tf.logical_and(indicator_matrix1,indicator_lower1)
    indicator_matrix1 = tf.logical_or(indicator_matrix1,indicator_upper1)
    indicator_matrix1 = tf.to_float(indicator_matrix1)
    update = prune_mask.assign(indicator_matrix1)

    prune_fc = tf.multiply(weights, prune_mask)
    return prune_fc
Beispiel #9
0
 def p_zt(self, prev_state, t):
   """Computes the model p(z_t| z_{t-1})."""
   batch_size = tf.shape(prev_state)[0]
   if t > 0:
     z_mu_p = prev_state + self.bs[t - 1]
     p_zt = tf.contrib.distributions.Normal(
         loc=z_mu_p, scale=tf.sqrt(tf.ones_like(z_mu_p) * self.variance))
     return p_zt
   else:  # p(z_0) is mixture of two Normals
     mu_pos = tf.ones([batch_size, self.state_size], dtype=self.dtype) * self.prior_mode_mean
     mu_neg = tf.ones([batch_size, self.state_size], dtype=self.dtype) * -self.prior_mode_mean
     z0_pos = tf.contrib.distributions.Normal(
         loc=mu_pos,
         scale=tf.sqrt(tf.ones_like(mu_pos) * self.variance))
     z0_neg = tf.contrib.distributions.Normal(
         loc=mu_neg,
         scale=tf.sqrt(tf.ones_like(mu_neg) * self.variance))
     mode_probs = tf.convert_to_tensor([self.mixing_coeff, 1-self.mixing_coeff], dtype=tf.float64)
     mode_probs = tf.tile(mode_probs[tf.newaxis, tf.newaxis, :], [batch_size, 1, 1])
     mode_selection_dist = tf.contrib.distributions.Categorical(probs=mode_probs)
     z0_dist = tf.contrib.distributions.Mixture(
         cat=mode_selection_dist,
         components=[z0_pos, z0_neg],
         validate_args=False)
     return z0_dist
Beispiel #10
0
    def build_model(self):
        if self.y_dim:
            self.y= tf.placeholder(tf.float32, [None, self.y_dim], name='y')

        self.images = tf.placeholder(tf.float32, [self.batch_size] + self.image_shape,
                                    name='real_images')
        self.sample_images= tf.placeholder(tf.float32, [self.sample_size] + self.image_shape,
                                        name='sample_images')
        self.z = tf.placeholder(tf.float32, [None, self.z_dim],
                                name='z')

        self.G = self.generator(self.z)
        self.D = self.discriminator(self.images)

        self.sampler = self.sampler(self.z)
        self.D_ = self.discriminator(self.G, reuse=True)

        self.d_loss_real = binary_cross_entropy_with_logits(tf.ones_like(self.D), self.D)
        self.d_loss_fake = binary_cross_entropy_with_logits(tf.zeros_like(self.D_), self.D_)
        self.g_loss = binary_cross_entropy_with_logits(tf.ones_like(self.D_), self.D_)
                                                    
        self.d_loss = self.d_loss_real + self.d_loss_fake

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        self.saver = tf.train.Saver()
Beispiel #11
0
def prune_outside_window(keypoints, window, scope=None):
  """Prunes keypoints that fall outside a given window.

  This function replaces keypoints that fall outside the given window with nan.
  See also clip_to_window which clips any keypoints that fall outside the given
  window.

  Args:
    keypoints: a tensor of shape [num_instances, num_keypoints, 2]
    window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
      window outside of which the op should prune the keypoints.
    scope: name scope.

  Returns:
    new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
  """
  with tf.name_scope(scope, 'PruneOutsideWindow'):
    y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)

    valid_indices = tf.logical_and(
        tf.logical_and(y >= win_y_min, y <= win_y_max),
        tf.logical_and(x >= win_x_min, x <= win_x_max))

    new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
    new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
    new_keypoints = tf.concat([new_y, new_x], 2)

    return new_keypoints
Beispiel #12
0
    def __init__(self,
                 sess,
                 batch_size=32,
                 image_size=256,
                 lam=0.8,
                 checkpoint_dir=None):
        self.sess = sess
        self.batch_size = batch_size
        self.image_size = image_size
        self.image_shape = [image_size, image_size, 3]
        self.lam = lam
        self.checkpoint_dir = checkpoint_dir
        self.global_step = tf.Variable(0, trainable=False)

        self.images = tf.placeholder(tf.float32, [batch_size] + self.image_shape, name='images')
        self.images_summary = tf.summary.image("image", self.images)

        self.d_bns = [batch_norm(name='d_bn{}'.format(i)) for i in range(5)]
        self.local_d_bns = [batch_norm(name='d_local_bn{}'.format(i)) for i in range(4)]
        self.g_bns = [batch_norm(name='g_bn{}'.format(i, )) for i in range(15)]

        self.D, self.D_logits = self.discriminator(self.images, self.image_size)
        self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.D_logits, labels=tf.ones_like(self.D)))
        self.D_summary = tf.summary.histogram("d", self.D)
        self.d_loss_real_summary = tf.summary.scalar("d_loss_real", self.d_loss_real)

        self.masks = tf.placeholder(tf.float32, [batch_size] + self.image_shape, name='masks')
        self.MG = tf.multiply(self.images, self.masks)
        self.G = self.generator(self.MG)
        self.MG_summary = tf.summary.image("mg", self.MG)
        self.G_summary = tf.summary.image("g", self.G)

        self.D_fake, self.D_fake_logits = self.discriminator(self.G, self.image_size, reuse=True)
        self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.D_fake_logits, labels=tf.zeros_like(self.D_fake)))
        self.D_fake_summary = tf.summary.histogram("d_fake", self.D_fake)
        self.d_loss_fake_summary = tf.summary.scalar("d_loss_fake", self.d_loss_fake)
        self.d_loss = self.d_loss_real + self.d_loss_fake

        self.g_loss_d = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.D_fake_logits, labels=tf.ones_like(self.D_fake)))
        self.g_loss_l = tf.reduce_mean(tf.contrib.layers.flatten(
            tf.multiply(self.G - self.images, self.G - self.images)))
        self.g_loss = (1 - self.lam) * self.g_loss_d + self.lam * self.g_loss_l
        self.g_loss_d_summary = tf.summary.scalar("g_loss_d", self.g_loss_d)
        self.g_loss_l_summary = tf.summary.scalar("g_loss_l", self.g_loss_l)
        self.g_loss_summary = tf.summary.scalar("g_loss", self.g_loss)

        t_vars = tf.trainable_variables()
        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]
        self.saver = tf.train.Saver(max_to_keep=10)

        self.g_summary = tf.summary.merge([
            self.G_summary, self.MG_summary, self.D_fake_summary, self.d_loss_fake_summary,
            self.g_loss_summary, self.g_loss_d_summary, self.g_loss_l_summary])
        self.d_summary = tf.summary.merge([
            self.images_summary, self.D_summary, self.d_loss_real_summary])
        self.writer = tf.summary.FileWriter(os.path.join(self.checkpoint_dir, "logs"), self.sess.graph)
Beispiel #13
0
  def build_loss_and_gradients(self, var_list):
    x_true = list(six.itervalues(self.data))[0]
    x_fake = list(six.iterkeys(self.data))[0]
    with tf.variable_scope("Disc"):
      d_true = self.discriminator(x_true)

    with tf.variable_scope("Disc", reuse=True):
      d_fake = self.discriminator(x_fake)

    if self.logging:
      tf.summary.histogram("discriminator_outputs",
                           tf.concat([d_true, d_fake], axis=0),
                           collections=[self._summary_key])

    loss_d = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.ones_like(d_true), logits=d_true) + \
        tf.nn.sigmoid_cross_entropy_with_logits(
            labels=tf.zeros_like(d_fake), logits=d_fake)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.ones_like(d_fake), logits=d_fake)
    loss_d = tf.reduce_mean(loss_d)
    loss = tf.reduce_mean(loss)

    var_list_d = tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES, scope="Disc")
    if var_list is None:
      var_list = [v for v in tf.trainable_variables() if v not in var_list_d]

    grads_d = tf.gradients(loss_d, var_list_d)
    grads = tf.gradients(loss, var_list)
    grads_and_vars_d = list(zip(grads_d, var_list_d))
    grads_and_vars = list(zip(grads, var_list))
    return loss, grads_and_vars, loss_d, grads_and_vars_d
Beispiel #14
0
  def _sample_n(self, n, seed=None):
    n_draws = tf.cast(self.total_count, dtype=tf.int32)
    k = self.event_shape_tensor()[0]

    # broadcast the total_count and logits to same shape
    n_draws = tf.ones_like(
        self.logits[..., 0], dtype=n_draws.dtype) * n_draws
    logits = tf.ones_like(
        n_draws[..., tf.newaxis], dtype=self.logits.dtype) * self.logits

    # flatten the total_count and logits
    flat_logits = tf.reshape(logits, [-1, k])  # [B1B2...Bm, k]
    flat_ndraws = n * tf.reshape(n_draws, [-1])  # [B1B2...Bm]

    # computes each total_count and logits situation by map_fn
    def _sample_single(args):
      logits, n_draw = args[0], args[1]  # [K], []
      x = tf.multinomial(logits[tf.newaxis, ...], n_draw,
                         seed)  # [1, n*n_draw]
      x = tf.reshape(x, shape=[n, -1])  # [n, n_draw]
      x = tf.reduce_sum(tf.one_hot(x, depth=k), axis=-2)  # [n, k]
      return x

    x = tf.map_fn(
        _sample_single, [flat_logits, flat_ndraws],
        dtype=self.dtype)  # [B1B2...Bm, n, k]

    # reshape the results to proper shape
    x = tf.transpose(x, perm=[1, 0, 2])
    final_shape = tf.concat([[n], self.batch_shape_tensor(), [k]], 0)
    x = tf.reshape(x, final_shape)  # [n, B1, B2,..., Bm, k]
    return x
Beispiel #15
0
    def build_losses(self, logits_real, logits_fake):
        """D and G play two-player minimax game with value function V(G,D)

          min_G max _D V(D, G) = IE_{x ~ p_data} [log D(x)] + IE_{z ~ p_fake} [log (1 - D(G(z)))]

        Args:
            logits_real (tf.Tensor): discrim logits from real samples
            logits_fake (tf.Tensor): discrim logits from fake samples produced by generator
        """
        with tf.name_scope("GAN_loss"):
            score_real = tf.sigmoid(logits_real)
            score_fake = tf.sigmoid(logits_fake)
            tf.summary.histogram('score-real', score_real)
            tf.summary.histogram('score-fake', score_fake)

            with tf.name_scope("discrim"):
                d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_real, labels=tf.ones_like(logits_real)), name='loss_real')
                d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.zeros_like(logits_fake)), name='loss_fake')

                d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real')
                d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake')

                d_accuracy = tf.add(.5 * d_pos_acc, .5 * d_neg_acc, name='accuracy')
                self.d_loss = tf.add(.5 * d_loss_pos, .5 * d_loss_neg, name='loss')

            with tf.name_scope("gen"):
                self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.ones_like(logits_fake)), name='loss')
                g_accuracy = tf.reduce_mean(tf.cast(score_fake > 0.5, tf.float32), name='accuracy')

            add_moving_summary(self.g_loss, self.d_loss, d_accuracy, g_accuracy)
    def build_model(self):
        self.is_training = tf.placeholder(tf.bool, name='is_training')
        self.images = tf.placeholder(
            tf.float32, [None] + self.image_shape, name='real_images')
        self.lowres_images = tf.reduce_mean(tf.reshape(self.images,
            [self.batch_size, self.lowres_size, self.lowres,
             self.lowres_size, self.lowres, self.c_dim]), [2, 4])
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.z_sum = tf.summary.histogram("z", self.z)

        self.G = self.generator(self.z)
        self.lowres_G = tf.reduce_mean(tf.reshape(self.G,
            [self.batch_size, self.lowres_size, self.lowres,
             self.lowres_size, self.lowres, self.c_dim]), [2, 4])
        self.D, self.D_logits = self.discriminator(self.images)

        self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

        self.d_sum = tf.summary.histogram("d", self.D)
        self.d__sum = tf.summary.histogram("d_", self.D_)
        self.G_sum = tf.summary.image("G", self.G)

        self.d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits,
                                                    labels=tf.ones_like(self.D)))
        self.d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_,
                                                    labels=tf.zeros_like(self.D_)))
        self.g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_,
                                                    labels=tf.ones_like(self.D_)))

        self.d_loss_real_sum = tf.summary.scalar("d_loss_real", self.d_loss_real)
        self.d_loss_fake_sum = tf.summary.scalar("d_loss_fake", self.d_loss_fake)

        self.d_loss = self.d_loss_real + self.d_loss_fake

        self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
        self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        self.saver = tf.train.Saver(max_to_keep=1)

        # Completion.
        self.mask = tf.placeholder(tf.float32, self.image_shape, name='mask')
        self.lowres_mask = tf.placeholder(tf.float32, self.lowres_shape, name='lowres_mask')
        self.contextual_loss = tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.abs(tf.multiply(self.mask, self.G) - tf.multiply(self.mask, self.images))), 1)
        self.contextual_loss += tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.abs(tf.multiply(self.lowres_mask, self.lowres_G) - tf.multiply(self.lowres_mask, self.lowres_images))), 1)
        self.perceptual_loss = self.g_loss
        self.complete_loss = self.contextual_loss + self.lam*self.perceptual_loss
        self.grad_complete_loss = tf.gradients(self.complete_loss, self.z)
    def build_model(self):
        # some parameters
        image_dims = [self.input_height, self.input_width, self.c_dim]
        bs = self.batch_size

        """ Graph Input """
        # images
        self.inputs = tf.placeholder(tf.float32, [bs] + image_dims, name='real_images')

        # noises
        self.z = tf.placeholder(tf.float32, [bs, self.z_dim], name='z')

        """ Loss Function """

        # output of D for real images
        D_real, D_real_logits, _ = self.discriminator(self.inputs, is_training=True, reuse=False)

        # output of D for fake images
        G = self.generator(self.z, is_training=True, reuse=False)
        D_fake, D_fake_logits, _ = self.discriminator(G, is_training=True, reuse=True)

        # get loss for discriminator
        d_loss_real = tf.reduce_mean(self.mse_loss(D_real_logits, tf.ones_like(D_real_logits)))
        d_loss_fake = tf.reduce_mean(self.mse_loss(D_fake_logits, tf.zeros_like(D_fake_logits)))

        self.d_loss = 0.5*(d_loss_real + d_loss_fake)

        # get loss for generator
        self.g_loss = tf.reduce_mean(self.mse_loss(D_fake_logits, tf.ones_like(D_fake_logits)))

        """ Training """
        # divide trainable variables into a group for D and a group for G
        t_vars = tf.trainable_variables()
        d_vars = [var for var in t_vars if 'd_' in var.name]
        g_vars = [var for var in t_vars if 'g_' in var.name]

        # optimizers
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            self.d_optim = tf.train.AdamOptimizer(self.learning_rate, beta1=self.beta1) \
                      .minimize(self.d_loss, var_list=d_vars)
            self.g_optim = tf.train.AdamOptimizer(self.learning_rate*5, beta1=self.beta1) \
                      .minimize(self.g_loss, var_list=g_vars)

        # weight clipping
        self.clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in d_vars]

        """" Testing """
        # for test
        self.fake_images = self.generator(self.z, is_training=False, reuse=True)

        """ Summary """
        d_loss_real_sum = tf.summary.scalar("d_loss_real", d_loss_real)
        d_loss_fake_sum = tf.summary.scalar("d_loss_fake", d_loss_fake)
        d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)
        g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)

        # final summary operations
        self.g_sum = tf.summary.merge([d_loss_fake_sum, g_loss_sum])
        self.d_sum = tf.summary.merge([d_loss_real_sum, d_loss_sum])
Beispiel #18
0
def din_fcn_attention(query, facts, attention_size, mask, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, forCnn=False):
    if isinstance(facts, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN
        # outputs.
        facts = tf.concat(facts, 2)
    if len(facts.get_shape().as_list()) == 2:
        facts = tf.expand_dims(facts, 1)

    if time_major:
        # (T,B,D) => (B,T,D)
        facts = tf.array_ops.transpose(facts, [1, 0, 2])
    # Trainable parameters
    mask = tf.equal(mask, tf.ones_like(mask))
    # D value - hidden size of the RNN layer
    facts_size = facts.get_shape().as_list()[-1]
    print("facts_size %s" % facts_size)
    querry_size = query.get_shape().as_list()[-1]
    print("querry_size %s" % querry_size)
    #tf.truncated_normal_initializer(dtype=tf.float32, stddev=0.36, seed=3)
    query = tf.layers.dense(
        query, facts_size, activation=None, kernel_initializer=get_tf_initializer(), name='f1' + stag)
    query = prelu(query, scope=stag)
    queries = tf.tile(query, [1, tf.shape(facts)[1]])
    queries = tf.reshape(queries, tf.shape(facts))
    din_all = tf.concat(
        [queries, facts, queries - facts, queries * facts], axis=-1)
    d_layer_1_all = tf.layers.dense(
        din_all, 80, activation=tf.nn.sigmoid, kernel_initializer=get_tf_initializer(), name='f1_att' + stag)
    d_layer_2_all = tf.layers.dense(
        d_layer_1_all, 40, activation=tf.nn.sigmoid, kernel_initializer=get_tf_initializer(), name='f2_att' + stag)
    d_layer_3_all = tf.layers.dense(
        d_layer_2_all, 1, activation=None, kernel_initializer=get_tf_initializer(), name='f3_att' + stag)
    d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(facts)[1]])
    scores = d_layer_3_all
    # Mask
    # key_masks = tf.sequence_mask(facts_length, tf.shape(facts)[1])   # [B, T]
    key_masks = tf.expand_dims(mask, 1)  # [B, 1, T]
    paddings = tf.ones_like(scores) * (-2 ** 32 + 1)
    if not forCnn:
        scores = tf.where(key_masks, scores, paddings)  # [B, 1, T]

    # Scale
    # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5)

    # Activation
    if softmax_stag:
        scores = tf.nn.softmax(scores)  # [B, 1, T]

    # Weighted sum
    if mode == 'SUM':
        output = tf.matmul(scores, facts)  # [B, 1, H]
        # output = tf.reshape(output, [-1, tf.shape(facts)[-1]])
    else:
        scores = tf.reshape(scores, [-1, tf.shape(facts)[1]])
        output = facts * tf.expand_dims(scores, -1)
        output = tf.reshape(output, tf.shape(facts))
    if return_alphas:
        return output, scores
    return output
    def __loss__(self):
        """
        Calculate loss
        :return:
        """
        # regularization ?

        # self.d_loss_real = tf.reduce_mean(
        #     tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits,
        #                                             labels=tf.ones_like(self.predict_d)))
        self.d_loss_real = tf.reduce_mean(
            ops.binary_cross_entropy(preds=self.predict_d, targets=tf.ones_like(self.predict_d)))

        tf.summary.scalar('d_loss_real', self.d_loss_real, collections='D')

        # self.d_loss_fake = tf.reduce_mean(
        #     tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g,
        #                                             labels=tf.zeros_like(self.predict_d_for_g)))

        self.d_loss_fake = tf.reduce_mean(
            ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.zeros_like(self.predict_d_for_g)))

        tf.summary.scalar('d_loss_fake', self.d_loss_fake, collections='D')

        self.d_loss = self.d_loss_real + self.d_loss_fake
        tf.summary.scalar('d_loss', self.d_loss, collections='D')

        if len(self.regularization_values_d) > 0:
            reg_loss_d = self.reg_w * tf.reduce_sum(self.regularization_values_d)
            self.d_loss += reg_loss_d
            tf.summary.scalar('d_loss_plus_reg', self.d_loss, collections='D')
            tf.summary.scalar('d_loss_reg_only', reg_loss_d, collections='D')

        # Generative loss
        # g_loss = tf.reduce_mean(
        #     tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g,
        #                                             labels=tf.ones_like(self.predict_d_for_g)))
        g_loss = tf.reduce_mean(
            ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.ones_like(self.predict_d_for_g)))

        tf.summary.scalar('g_loss', g_loss, collections='G')

        context_loss = tf.reduce_mean(tf.square(tf.squeeze(self.predict_g) - self.labels), name='L2-Loss')
        tf.summary.scalar('g_loss_context_only', context_loss, collections='G')

        # print("from inside %f" % self.FLAGS.gen_loss_adversarial)
        # self.g_loss = self.FLAGS.gen_loss_adversarial * g_loss + self.FLAGS.gen_loss_context * context_loss

        self.g_loss = self.adb_loss_w * g_loss + self.FLAGS.gen_loss_context * context_loss

        tf.summary.scalar('g_loss_plus_context', self.g_loss, collections='G')

        if len(self.regularization_values) > 0:
            reg_loss_g = self.reg_w * tf.reduce_sum(self.regularization_values)
            self.g_loss += reg_loss_g
            tf.summary.scalar('g_loss_plus_context_plus_reg', self.g_loss, collections='G')
            tf.summary.scalar('g_loss_reg_only', reg_loss_g, collections='D')

        tf.summary.scalar('diff-loss', tf.abs(self.d_loss - self.g_loss), collections='G')
Beispiel #20
0
 def cross_entropy_op(pred, **kwargs):
     if is_real:
         target = (1. - softness) * tf.ones_like(pred)
     else:
         target = softness * tf.ones_like(pred)
     entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=pred,
                                                       labels=target)
     return tf.reduce_mean(entropy)
Beispiel #21
0
 def scale(self, x):
   """Scale x from -0.5 - 0.5 to 0 - 255."""
   x = tf.where(tf.is_nan(x), tf.ones_like(x), x)
   x = tf.where(tf.is_inf(x), tf.ones_like(x), x)
   x = tf.clip_by_value(x, -0.5, 0.5)
   x += 0.5
   x = x * 2**self.hparams.n_bits_x
   return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
Beispiel #22
0
 def loss_function(self, batch_logits, neg_logits):
     batch_xent = tf.sigmoid_cross_entropy_with_logits(
             batch_logits, tf.ones_like(batch_logits))
     neg_xent = tf.sigmoid_cross_entropy_with_logits(
             neg_logits, tf.ones_like(neg_logits))
     nce_loss_tensor = (tf.reduce_sum(batch_logits) + 
                        tf.reduce_sum(neg_logits)) / self.batch_size
     return nce_loss_tensor
 def _logistic_label(X, Y, rPos, rNeg):
   # dist_to_center = tf.sqrt(tf.square(X) + tf.square(Y))  # L2 metric
   dist_to_center = tf.abs(X) + tf.abs(Y)  # Block metric
   Z = tf.where(dist_to_center <= rPos,
                tf.ones_like(X),
                tf.where(dist_to_center < rNeg,
                         0.5 * tf.ones_like(X),
                         tf.zeros_like(X)))
   return Z
Beispiel #24
0
    def __init__(self,
                 z_dim, img_h, img_w, c_dim,
                 g_learning_rate, d_learning_rate,
                 g_beta1, d_beta1,
                 gf_dim=64, df_dim=64):

        # initialize batch normalization
        self.g_bn0 = batch_norm(name='g_bn0')
        self.g_bn1 = batch_norm(name='g_bn1')
        self.g_bn2 = batch_norm(name='g_bn2')
        self.g_bn3 = batch_norm(name='g_bn3')

        self.d_bn1 = batch_norm(name='d_bn1')
        self.d_bn2 = batch_norm(name='d_bn2')
        self.d_bn3 = batch_norm(name='d_bn3')

        self.z_dim = z_dim
        self.img_h = img_h
        self.img_w = img_w
        self.c_dim = c_dim
        self.g_learning_rate = g_learning_rate
        self.d_learning_rate = d_learning_rate
        self.g_beta1 = g_beta1
        self.d_beta1 = d_beta1
        self.gf_dim = gf_dim
        self.df_dim = df_dim

        # set placeholder
        self.z = tf.placeholder(tf.float32, shape=[None, self.z_dim], name='noise')
        self.x = tf.placeholder(tf.float32, shape=[None, self.img_h, self.img_w, self.c_dim], name='real_data')

        self.G = self.generator(self.z, reuse=False)
        self.D_real, self.D_real_logits = self.discriminator(self.x, reuse=False)
        self.D_fake, self.D_fake_logits = self.discriminator(self.G, reuse=True)

        # calculate loss
        self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
            logits=self.D_real_logits, labels=tf.ones_like(self.D_real)))
        self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                logits=self.D_fake_logits, labels=tf.zeros_like(self.D_fake)))
        self.d_loss = self.d_loss_real + self.d_loss_fake
        self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                logits=self.D_fake_logits, labels=tf.ones_like(self.D_fake)))

        # get trainable variables
        var_list = tf.trainable_variables()
        self.d_vars = [v for v in var_list if v.name.startswith('D/')]
        self.g_vars = [v for v in var_list if v.name.startswith('G/')]

        # set optimizer
        self.d_opt = optimizer(self.d_loss, self.d_vars, self.d_learning_rate, self.d_beta1)
        self.g_opt = optimizer(self.g_loss, self.g_vars, self.g_learning_rate, self.g_beta1)

        # other tensors
        self.saver = tf.train.Saver()
        self.sampler = self.sampler(self.z)
        self.d_features = self.features_discriminator(self.x)
Beispiel #25
0
  def build_model(self):
    if self.y_dim:
      self.y = tf.placeholder(tf.float32, [self.batch_size, self.y_dim], name='y')
    else:
      self.y = None

    if self.crop:
      image_dims = [self.output_height, self.output_width, self.c_dim]
    else:
      image_dims = [self.input_height, self.input_width, self.c_dim]

    self.inputs = tf.placeholder(
      tf.float32, [self.batch_size] + image_dims, name='real_images')

    inputs = self.inputs

    self.z = tf.placeholder(
      tf.float32, [None, self.z_dim], name='z')
    self.z_sum = histogram_summary("z", self.z)

    self.G                  = self.generator(self.z, self.y)
    self.D, self.D_logits   = self.discriminator(inputs, self.y, reuse=False)
    self.sampler            = self.sampler(self.z, self.y)
    self.D_, self.D_logits_ = self.discriminator(self.G, self.y, reuse=True)
    
    self.d_sum = histogram_summary("d", self.D)
    self.d__sum = histogram_summary("d_", self.D_)
    self.G_sum = image_summary("G", self.G)

    def sigmoid_cross_entropy_with_logits(x, y):
      try:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=y)
      except:
        return tf.nn.sigmoid_cross_entropy_with_logits(logits=x, targets=y)

    self.d_loss_real = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits, tf.ones_like(self.D)))
    self.d_loss_fake = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits_, tf.zeros_like(self.D_)))
    self.g_loss = tf.reduce_mean(
      sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))

    self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
    self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)
                          
    self.d_loss = self.d_loss_real + self.d_loss_fake

    self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
    self.d_loss_sum = scalar_summary("d_loss", self.d_loss)

    t_vars = tf.trainable_variables()

    self.d_vars = [var for var in t_vars if 'd_' in var.name]
    self.g_vars = [var for var in t_vars if 'g_' in var.name]

    self.saver = tf.train.Saver()
Beispiel #26
0
def tf_F1_score(actuals, predictions):
    actuals = tf.reshape(actuals, [-1, 1])
    predictions = tf.reshape(predictions, [-1, 1])

    ones_like_actuals = tf.ones_like(actuals)
    zeros_like_actuals = tf.zeros_like(actuals)
    ones_like_predictions = tf.ones_like(predictions)
    zeros_like_predictions = tf.zeros_like(predictions)

    #true-positive
    tp_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, ones_like_actuals),
                tf.equal(predictions, ones_like_predictions)
            ),
            dtype=tf.float32
        )
    )
    #true-Negative
    tn_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, zeros_like_actuals),
                tf.equal(predictions, zeros_like_predictions)
            ),
            dtype=tf.float32
        )
    )
    #false-positive
    fp_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, zeros_like_actuals),
                tf.equal(predictions, ones_like_predictions)
            ),
            dtype=tf.float32
        )
    )
    #false_Neg
    fn_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, ones_like_actuals),
                tf.equal(predictions, zeros_like_predictions)
            ),
            dtype=tf.float32
        )
    )

    accuracy = (tp_op + tn_op) / (tp_op + tn_op + fp_op + fn_op)
    prediction = tp_op / (tp_op + fp_op)
    recall = tp_op / (tp_op + fn_op)
    f1_score = (2 * (prediction * recall)) / (prediction + recall)

    return accuracy, [tp_op, tn_op, fp_op, fn_op, f1_score]
Beispiel #27
0
 def _log_prob(self, x):
   if self.validate_args:
     x = distribution_util.embed_check_nonnegative_integer_form(x)
   else:
     # For consistency with cdf, we take the floor.
     x = tf.floor(x)
   x *= tf.ones_like(self.probs)
   probs = self.probs * tf.ones_like(x)
   safe_domain = tf.where(tf.equal(x, 0.), tf.zeros_like(probs), probs)
   return x * tf.log1p(-safe_domain) + tf.log(probs)
Beispiel #28
0
  def build_losses(self):
    """Builds the training losses.

    Inputs:
      self.predicted_distributions

    Outputs:
      self.batch_losses
      self.total_loss
    """
    autoregressive_target = self.autoregressive_input

    # Quantize the target if the output distribution is categorical.
    if self.hparams.output_distribution.type == "categorical":
      min_val = self.hparams.output_distribution.min_quantization_value
      max_val = self.hparams.output_distribution.max_quantization_value
      num_classes = self.hparams.output_distribution.num_classes
      clipped_target = tf.keras.backend.clip(autoregressive_target, min_val,
                                             max_val)
      quantized_target = tf.floor(
          (clipped_target - min_val) / (max_val - min_val) * num_classes)
      # Deal with the corner case where clipped_target equals max_val by mapping
      # the label num_classes to num_classes - 1. Essentially, this makes the
      # final quantized bucket a closed interval while all the other quantized
      # buckets are half-open intervals.
      quantized_target = tf.where(
          quantized_target >= num_classes,
          tf.ones_like(quantized_target) * (num_classes - 1), quantized_target)
      autoregressive_target = quantized_target

    log_prob = self.predicted_distributions.log_prob(autoregressive_target)

    weights = self.weights
    if weights is None:
      weights = tf.ones_like(log_prob)
    weights_dim = len(weights.shape)
    per_example_weight = tf.reduce_sum(
        weights, axis=list(range(1, weights_dim)))
    per_example_indicator = tf.to_float(tf.greater(per_example_weight, 0))
    num_examples = tf.reduce_sum(per_example_indicator)

    batch_losses = -log_prob * weights
    losses_ndims = batch_losses.shape.ndims
    per_example_loss_sum = tf.reduce_sum(
        batch_losses, axis=list(range(1, losses_ndims)))
    per_example_loss = tf.where(per_example_weight > 0,
                                per_example_loss_sum / per_example_weight,
                                tf.zeros_like(per_example_weight))
    total_loss = tf.reduce_sum(per_example_loss) / num_examples

    self.autoregressive_target = autoregressive_target
    self.batch_losses = batch_losses
    self.per_example_loss = per_example_loss
    self.num_nonzero_weight_examples = num_examples
    self.total_loss = total_loss
Beispiel #29
0
    def __loss__(self):
        """
        Calculate loss
        :return:
        """
        with tf.variable_scope("discriminator") as scope:
            self.d_loss_real = tf.reduce_mean(
                ops.binary_cross_entropy(preds=self.predict_d, targets=tf.ones_like(self.predict_d)))
            tf.summary.scalar('d_loss_real', self.d_loss_real, collections='D')
            scope.reuse_variables()
            self.d_loss_fake = tf.reduce_mean(
                ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.zeros_like(self.predict_d_for_g)))
            tf.summary.scalar('d_loss_fake', self.d_loss_fake, collections='D')

            self.d_loss = self.d_loss_fake + self.d_loss_real
            tf.summary.scalar('d_loss', self.d_loss, collections='D')

        # if len(self.regularization_values_d) > 0:
        # reg_loss_d = self.reg_w * tf.reduce_sum(self.regularization_values_d)
        self.reg_loss_d = self.get_weights_regularization(dump=self.FLAGS.dump_debug, collection='D')
        self.d_loss_no_reg = self.d_loss
        self.d_loss += self.reg_loss_d
        if self.FLAGS.dump_debug:
            tf.summary.scalar('d_loss_plus_reg', self.d_loss, collections='D')
            tf.summary.scalar('d_loss_reg_only', self.reg_loss_d, collections='D')

        # Generative loss
        g_loss = tf.reduce_mean(
            ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.ones_like(self.predict_d_for_g)))

        tf.summary.scalar('g_loss', g_loss, collections='G')

        # Context loss L2
        mask_not = tf.cast(tf.logical_not(tf.cast(self.labels['mask'], tf.bool)), tf.float32)
        real_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['real'] - self.labels['real'], mask_not))
        imag_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['imag'] - self.labels['imag'], mask_not))
        self.context_loss = tf.reduce_mean(tf.square(real_diff) + tf.square(imag_diff), name='Context_loss_mean')
        print("You are using L2 loss")

        tf.summary.scalar('g_loss_context_only', self.context_loss, collections='G')

        self.g_loss = self.adv_loss_w * g_loss + self.FLAGS.gen_loss_context * self.context_loss
        # self.g_loss = self.FLAGS.gen_loss_adversarial * g_loss + self.FLAGS.gen_loss_context * context_loss
        tf.summary.scalar('g_loss_plus_context', self.g_loss, collections='G')

        # if len(self.regularization_values) > 0:
        # reg_loss_g = self.reg_w * tf.reduce_sum(self.regularization_values)
        self.reg_loss_g = self.get_weights_regularization(dump=self.FLAGS.dump_debug, collection='G')
        self.g_loss_no_reg = self.g_loss
        self.g_loss += self.reg_loss_g
        if self.FLAGS.dump_debug:
            tf.summary.scalar('g_loss_plus_context_plus_reg', self.g_loss, collections='G')
            tf.summary.scalar('g_loss_reg_only', self.reg_loss_g, collections='D')

        tf.summary.scalar('diff-loss', tf.abs(self.d_loss - self.g_loss), collections='G')
Beispiel #30
0
def sensitivity(logits, labels):
    predictions = tf.argmax(logits, axis=-1)
    actuals = tf.argmax(labels, axis=-1)


    nodule_actuals = tf.ones_like(actuals)
    non_nodule_actuals = tf.zeros_like(actuals)
    nodule_predictions = tf.ones_like(predictions)
    non_nodule_predictions = tf.zeros_like(predictions)

    tp_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, nodule_actuals),
                tf.equal(predictions, nodule_predictions)
            ),
            tf.float32
        )
    )

    tn_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, non_nodule_actuals),
                tf.equal(predictions, non_nodule_predictions)
            ),
            tf.float32
        )
    )

    fp_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, non_nodule_actuals),
                tf.equal(predictions, nodule_predictions)
            ),
            tf.float32
        )
    )

    fn_op = tf.reduce_sum(
        tf.cast(
            tf.logical_and(
                tf.equal(actuals, nodule_actuals),
                tf.equal(predictions, non_nodule_predictions)
            ),
            tf.float32
        )
    )

    false_positive_rate = fp_op / (fp_op + tn_op)

    recall = tp_op / (tp_op + fn_op)

    return recall, false_positive_rate
Beispiel #31
0
 def pad_mask(seqs):
     mask = tf.where(seqs == 0, tf.zeros_like(seqs),
                     tf.ones_like(seqs))  # 0 idx is padding
     return tf.cast(tf.expand_dims(mask, axis=1) *
                    tf.expand_dims(mask, axis=2),
                    dtype=tf.bool)  # [n, step, step]
Beispiel #32
0
def decoder(target, state, params):
    mask = dtype.tf_to_float(tf.cast(target, tf.bool))
    hidden_size = params.hidden_size

    is_training = ('decoder' not in state)

    if is_training:
        target, mask = util.remove_invalid_seq(target, mask)

    embed_name = "embedding" if params.shared_source_target_embedding \
        else "tgt_embedding"
    tgt_emb = tf.get_variable(embed_name,
                              [params.tgt_vocab.size(), params.embed_size])
    tgt_bias = tf.get_variable("bias", [params.embed_size])

    inputs = tf.gather(tgt_emb, target)
    inputs = tf.nn.bias_add(inputs, tgt_bias)

    # shift
    if is_training:
        inputs = tf.pad(inputs, [[0, 0], [1, 0], [0, 0]])
        inputs = inputs[:, :-1, :]
    else:
        inputs = tf.cond(
            tf.reduce_all(tf.equal(target, params.tgt_vocab.pad())),
            lambda: tf.zeros_like(inputs), lambda: inputs)
        mask = tf.ones_like(mask)

    inputs = util.valid_apply_dropout(inputs, params.dropout)

    with tf.variable_scope("decoder"):
        init_state = state["decoder_initializer"]
        if not is_training:
            init_state = state["decoder"]["state"]
        returns = rnn.cond_rnn(params.cell,
                               inputs,
                               state["encodes"],
                               hidden_size,
                               init_state=init_state,
                               mask=mask,
                               mem_mask=state["mask"],
                               ln=params.layer_norm,
                               sm=params.swap_memory,
                               one2one=False)
        (_, hidden_state), (outputs, _), contexts, attentions = returns

    feature = linear([outputs, contexts, inputs],
                     params.embed_size,
                     ln=params.layer_norm,
                     scope="pre_logits")
    if 'dev_decode' in state:
        feature = feature[:, -1, :]

    feature = tf.tanh(feature)
    feature = util.valid_apply_dropout(feature, params.dropout)

    embed_name = "tgt_embedding" if params.shared_target_softmax_embedding \
        else "softmax_embedding"
    embed_name = "embedding" if params.shared_source_target_embedding \
        else embed_name
    softmax_emb = tf.get_variable(embed_name,
                                  [params.tgt_vocab.size(), params.embed_size])
    feature = tf.reshape(feature, [-1, params.embed_size])
    logits = tf.matmul(feature, softmax_emb, False, True)

    logits = tf.cast(logits, tf.float32)

    soft_label, normalizer = util.label_smooth(target,
                                               util.shape_list(logits)[-1],
                                               factor=params.label_smooth)
    centropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                          labels=soft_label)
    centropy -= normalizer
    centropy = tf.reshape(centropy, tf.shape(target))

    mask = tf.cast(mask, tf.float32)
    per_sample_loss = tf.reduce_sum(centropy * mask, -1) / tf.reduce_sum(
        mask, -1)
    loss = tf.reduce_mean(per_sample_loss)

    # these mask tricks mainly used to deal with zero shapes, such as [0, 1]
    loss = tf.cond(tf.equal(tf.shape(target)[0], 0),
                   lambda: tf.constant(0, dtype=tf.float32), lambda: loss)

    if not is_training:
        state['decoder']['state'] = hidden_state

    return loss, logits, state, per_sample_loss
Beispiel #33
0
def masked_conv_v2(u,
                   filter_1,
                   filter_2,
                   mask_1,
                   mask_2,
                   conductivity_1,
                   conductivity_2,
                   eps=0.01,
                   filter_banks=None):
    '''not accurate still around the corner'''
    # center
    mask_filter = np.asarray(
        [[1 / 4., 0., 1 / 4.], [0., 0., 0.], [1 / 4., 0., 1 / 4.]], 'float32')
    mask_filter = tf.constant(mask_filter.reshape((3, 3, 1, 1)))
    padded_mask_1 = tf.pad(mask_1, [[0, 0], [1, 1], [1, 1], [0, 0]],
                           "SYMMETRIC")
    boundary_weight = tf.nn.conv2d(input=padded_mask_1,
                                   filter=mask_filter,
                                   strides=[1, 1, 1, 1],
                                   padding='VALID')
    w = 1000.
    boundary_weight = 1 / 4. + 1 / 4. * tf.sigmoid(
        w * (boundary_weight - 1.25 / 4.)) + 1 / 4. * tf.sigmoid(
            w * (boundary_weight - 2.75 / 4.))
    boundary_mask = tf.round(mask_1 + 0.1) + tf.round(mask_2 + 0.1) - 1
    mat1_mask = (boundary_weight) * boundary_mask * (-8 / 3. * conductivity_1)
    mat2_mask = (1 - boundary_weight) * boundary_mask * (-8 / 3. *
                                                         conductivity_2)
    boundary_d_matrix = mat1_mask + mat2_mask
    mat1_d_matrix = tf.round(mask_1 - 0.1) * (-8 / 3. * conductivity_1)
    mat2_d_matrix = tf.round(mask_2 - 0.1) * (-8 / 3. * conductivity_2)
    d_matrix = boundary_d_matrix + mat1_d_matrix + mat2_d_matrix
    d_u = d_matrix * u

    # surrounding
    # not accurate only on the boundary, two parts from two different material
    padded_u_1 = tf.pad(u * tf.round(mask_1 + eps),
                        [[0, 0], [1, 1], [1, 1], [0, 0]], "SYMMETRIC")
    output_1 = tf.nn.conv2d(input=padded_u_1,
                            filter=filter_1,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    padded_u_2 = tf.pad(u * tf.round(mask_2 + eps),
                        [[0, 0], [1, 1], [1, 1], [0, 0]], "SYMMETRIC")
    output_2 = tf.nn.conv2d(input=padded_u_2,
                            filter=filter_2,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    res1 = output_1 * mask_1 + output_2 * mask_2
    # accurate only on the boundary, two parts from two different material
    padded_u_1 = tf.pad(u * mask_1, [[0, 0], [1, 1], [1, 1], [0, 0]],
                        "SYMMETRIC")
    output_1 = tf.nn.conv2d(input=padded_u_1,
                            filter=filter_1,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    padded_u_2 = tf.pad(u * mask_2, [[0, 0], [1, 1], [1, 1], [0, 0]],
                        "SYMMETRIC")
    output_2 = tf.nn.conv2d(input=padded_u_2,
                            filter=filter_2,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    res2 = output_1 + output_2
    LU_u = res1 * (tf.round(mask_1-0.1) + tf.round(mask_2-0.1)) + \
          res2 * (tf.ones_like(mask_1) - tf.round(mask_1-0.1) - tf.round(mask_2-0.1))

    result = d_u + LU_u
    tmp = {
        'result': result,
        'res1': res1,
        'res2': res2,
        'LU_u': LU_u,
        'd_matrix': d_matrix,
        'd_u': d_u,
        'boundary_d_matrix': boundary_d_matrix,
        'boundary_weight': boundary_weight,
        'mat1_d_matrix': mat1_d_matrix,
        'mat2_d_matrix': mat2_d_matrix,
    }
    return tmp
Beispiel #34
0
def masked_conv_v3(u, filter_1, filter_2, mask_1, mask_2, conductivity_1,
                   conductivity_2, filter_banks):
    '''
    problematic in d_matrix corner and conv_corner
    :param u:
    :param filter_1:
    :param filter_2:
    :param mask_1:
    :param mask_2:
    :param conductivity_1:
    :param conductivity_2:
    :param filter_banks:
    :return:
    '''
    boundary_mask = tf.round(mask_1 + 0.1) + tf.round(mask_2 + 0.1) - 1
    boundary_d_matrix = boundary_mask * (-8 / 3. *
                                         (conductivity_1 + conductivity_2) /
                                         2.)
    mat1_d_matrix = tf.round(mask_1 - 0.1) * (-8 / 3. * conductivity_1)
    mat2_d_matrix = tf.round(mask_2 - 0.1) * (-8 / 3. * conductivity_2)
    d_matrix = boundary_d_matrix + mat1_d_matrix + mat2_d_matrix
    d_u = d_matrix * u

    # padded_input = tf.pad(u * mask_1, [[0, 0], [1, 1], [1, 1], [0, 0]], "SYMMETRIC")  # convolution with symmetric padding at boundary
    # output_1 = tf.nn.conv2d(input=padded_input, filter=filter_banks['filter_1_side'], strides=[1, 1, 1, 1], padding='VALID')
    # padded_input = tf.pad(u * mask_2, [[0, 0], [1, 1], [1, 1], [0, 0]], "SYMMETRIC")  # convolution with symmetric padding at boundary
    # output_2 = tf.nn.conv2d(input=padded_input, filter=filter_banks['filter_2_side'], strides=[1, 1, 1, 1], padding='VALID')
    # side_conv_res = output_1 + output_2
    padded_input = tf.pad(
        u * tf.round(mask_1 + 0.1), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_1_side = tf.nn.conv2d(input=padded_input,
                                 filter=filter_banks['filter_1_side'],
                                 strides=[1, 1, 1, 1],
                                 padding='VALID')
    padded_input = tf.pad(
        u * tf.round(mask_2 + 0.1), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_2_side = tf.nn.conv2d(input=padded_input,
                                 filter=filter_banks['filter_2_side'],
                                 strides=[1, 1, 1, 1],
                                 padding='VALID')
    res1 = output_1_side * mask_1 + output_2_side * mask_2
    padded_input = tf.pad(
        u * mask_1, [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_1_side_refine = tf.nn.conv2d(input=padded_input,
                                        filter=filter_banks['filter_1_side'],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID')
    padded_input = tf.pad(
        u * mask_2, [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_2_side_refine = tf.nn.conv2d(input=padded_input,
                                        filter=filter_banks['filter_2_side'],
                                        strides=[1, 1, 1, 1],
                                        padding='VALID')
    res2 = output_1_side_refine * mask_1 + output_2_side_refine * mask_1
    side_conv_res = res1 * (tf.round(mask_1-0.1) + tf.round(mask_2-0.1))  + \
                    res2 * (tf.ones_like(mask_1) - tf.round(mask_1-0.1) - tf.round(mask_2-0.1))

    padded_input = tf.pad(
        u * tf.round(mask_1 + 0.1), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_1_corner = tf.nn.conv2d(input=padded_input,
                                   filter=filter_banks['filter_1_corner'],
                                   strides=[1, 1, 1, 1],
                                   padding='VALID')
    padded_input = tf.pad(
        u * tf.round(mask_2 + 0.1), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_2_corner = tf.nn.conv2d(input=padded_input,
                                   filter=filter_banks['filter_2_corner'],
                                   strides=[1, 1, 1, 1],
                                   padding='VALID')
    corner_conv_res = output_1_corner + output_2_corner

    LU_u = corner_conv_res + side_conv_res
    result = d_u + LU_u
    tmp = {
        'result': result,
        'LU_u': LU_u,
        'd_matrix': d_matrix,
        'res1': res1,
        'res2': res2,
        'side_conv_res': side_conv_res,
        'output_1_side': output_1_side,
        'output_2_side': output_2_side,
        'output_1_side_refine': output_1_side_refine,
        'output_2_side_refine': output_2_side_refine,
        'corner_conv_res': corner_conv_res,
        'output_1_corner': output_1_corner,
        'output_2_corner': output_2_corner,
    }
    return tmp
Beispiel #35
0
def do_dual_max_match(overlap_matrix,
                      low_thres,
                      high_thres,
                      ignore_between=True,
                      gt_max_first=True):
    '''
    overlap_matrix: num_gt * num_anchors
    '''
    with tf.name_scope('dual_max_match', [overlap_matrix]):

        # first match from anchors' side
        anchors_to_gt = tf.argmax(overlap_matrix, axis=0)
        # the matching degree
        match_values = tf.reduce_max(overlap_matrix, axis=0)

        #positive_mask = tf.greater(match_values, high_thres)
        less_mask = tf.less(match_values, low_thres)
        between_mask = tf.logical_and(
            tf.less(match_values, high_thres),
            tf.greater_equal(match_values, low_thres))
        negative_mask = less_mask if ignore_between else between_mask
        ignore_mask = between_mask if ignore_between else less_mask
        # fill all negative positions with -1, all ignore positions is -2
        match_indices = tf.where(negative_mask,
                                 -1 * tf.ones_like(anchors_to_gt),
                                 anchors_to_gt)
        match_indices = tf.where(ignore_mask, -2 * tf.ones_like(match_indices),
                                 match_indices)

        # negtive values has no effect in tf.one_hot, that means all zeros along that axis
        # so all positive match positions in anchors_to_gt_mask is 1, all others are 0
        anchors_to_gt_mask = tf.one_hot(tf.clip_by_value(
            match_indices, -1, tf.cast(tf.shape(overlap_matrix)[0], tf.int64)),
                                        tf.shape(overlap_matrix)[0],
                                        on_value=1,
                                        off_value=0,
                                        axis=0,
                                        dtype=tf.int32)
        # match from ground truth's side
        gt_to_anchors = tf.argmax(overlap_matrix, axis=1)

        if gt_max_first:
            # the max match from ground truth's side has higher priority
            left_gt_to_anchors_mask = tf.one_hot(gt_to_anchors,
                                                 tf.shape(overlap_matrix)[1],
                                                 on_value=1,
                                                 off_value=0,
                                                 axis=1,
                                                 dtype=tf.int32)
        else:
            # the max match from anchors' side has higher priority
            # use match result from ground truth's side only when the the matching degree from anchors' side is lower than position threshold
            left_gt_to_anchors_mask = tf.cast(
                tf.logical_and(
                    tf.reduce_max(anchors_to_gt_mask, axis=1, keep_dims=True) <
                    1,
                    tf.one_hot(gt_to_anchors,
                               tf.shape(overlap_matrix)[1],
                               on_value=True,
                               off_value=False,
                               axis=1,
                               dtype=tf.bool)), tf.int64)
        # can not use left_gt_to_anchors_mask here, because there are many ground truthes match to one anchor, we should pick the highest one even when we are merging matching from ground truth side
        left_gt_to_anchors_scores = overlap_matrix * tf.to_float(
            left_gt_to_anchors_mask)
        # merge matching results from ground truth's side with the original matching results from anchors' side
        # then select all the overlap score of those matching pairs
        selected_scores = tf.gather_nd(
            overlap_matrix,
            tf.stack([
                tf.where(
                    tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0,
                    tf.argmax(left_gt_to_anchors_scores, axis=0),
                    anchors_to_gt),
                tf.range(tf.cast(tf.shape(overlap_matrix)[1], tf.int64))
            ],
                     axis=1))
        # return the matching results for both foreground anchors and background anchors, also with overlap scores
        return tf.where(
            tf.reduce_max(left_gt_to_anchors_mask, axis=0) > 0,
            tf.argmax(left_gt_to_anchors_scores, axis=0),
            match_indices), selected_scores
Beispiel #36
0
 def _trainable_score_fn(context_features, example_features, mode):
     del context_features, mode
     input_layer = tf.ones_like(example_features["f1"])
     return tf.compat.v1.layers.dense(input_layer, units=1)
Beispiel #37
0
    def masking(self, inputs, q, k, mask_type='key'):
        '''
            inputs:
                product from tf.matmul(Q_mh, tf.transpose(K_mh, [0, 2, 1]))
                inputs.shape = (N, Tq, Tk)

            q:
                q.shape = (h*N, Tq, d_model/h)
            k:
                k.shape = (h*N, Tk, d_model/h)
            type:
                k, key, keys
                    the masking basing on k
                q, query, queries
                    the masking basing on q
                f, feature
                    the masking basing on the lower triangular identity matrix

            Because need to prevent leftward information flow in the decoder to preserve the auto-regressive property,
            they implement this inside of scaled dot-product attention by masking out all values in the input (minus infinity)
             of the softmax which correspond to illegal connections.
        '''

        padding_num = -2**32 + 1
        if mask_type in ("k", "key", "keys"):
            # Generate masks
            masks = tf.sign(tf.reduce_sum(tf.abs(k),
                                          axis=-1))  # shape = (h*N, Tk)
            masks = tf.expand_dims(masks, 1)  # shape = (h*N, 1, Tk)
            masks = tf.tile(masks,
                            [1, tf.shape(q)[1], 1])  # shape = (h*N, Tq, Tk)

            # Apply masks to inputs
            paddings = tf.ones_like(inputs) * padding_num
            outputs = tf.where(tf.equal(masks, 0), paddings,
                               inputs)  # shape = (h*N, Tq, Tk)

        elif mask_type in ("q", "query", "queries"):
            # Generate masks
            masks = tf.sign(tf.reduce_sum(tf.abs(q),
                                          axis=-1))  # shape = (h*N, Tq)
            masks = tf.expand_dims(masks, -1)  # shape = (h*N, Tq, 1)
            masks = tf.tile(masks,
                            [1, 1, tf.shape(k)[1]])  # shape = (h*N, Tq, Tk)

            # Apply masks to inputs
            outputs = inputs * masks

        elif mask_type in ("f", "future"):
            diag_vals = tf.ones_like(inputs[0, :, :])  # shape = (Tq, Tk)
            tril = tf.linalg.LinearOperatorLowerTriangular(
                diag_vals).to_dense()  # shape = (Tq, Tk)
            masks = tf.tile(tf.expand_dims(
                tril, 0), [tf.shape(inputs)[0], 1, 1])  # shape = (h*N, Tq, Tk)

            paddings = tf.ones_like(masks) * padding_num
            outputs = tf.where(tf.equal(masks, 0), paddings, inputs)

        else:
            print("Check if you entered type correctly!")

        return outputs
Beispiel #38
0
def create_model(
    bert_config,
    is_training,
    input_ids,
    input_mask,
    input_type_ids,
    labels,
    num_labels,
    use_one_hot_embeddings,
    tsa,
    unsup_ratio,
    global_step,
    num_train_steps,
    ):

  num_sample = input_ids.shape[0].value
  if is_training:
    assert num_sample % (1 + 2 * unsup_ratio) == 0
    sup_batch_size = num_sample // (1 + 2 * unsup_ratio)
    unsup_batch_size = sup_batch_size * unsup_ratio
  else:
    sup_batch_size = num_sample
    unsup_batch_size = 0

  model = modeling.AlbertModel(
      config=bert_config,
      is_training=True,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=input_type_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)


  pooled = model.get_pooled_output()


  clas_logits = hidden_to_logits(
      hidden=pooled,
      is_training=is_training,
      num_classes=num_labels,
      scope="classifier")

  log_probs = tf.nn.log_softmax(clas_logits, axis=-1)
  correct_label_probs = None

  with tf.variable_scope("sup_loss"):
    sup_log_probs = log_probs[:sup_batch_size]
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)
    tgt_label_prob = one_hot_labels

    per_example_loss = -tf.reduce_sum(tgt_label_prob * sup_log_probs, axis=-1)
    loss_mask = tf.ones_like(per_example_loss, dtype=per_example_loss.dtype)
    correct_label_probs = tf.reduce_sum(
        one_hot_labels * tf.exp(sup_log_probs), axis=-1)

    if tsa:
      tsa_start = 1. / num_labels
      tsa_threshold = get_tsa_threshold(
          tsa, global_step, num_train_steps,
          tsa_start, end=1)

      larger_than_threshold = tf.greater(
          correct_label_probs, tsa_threshold)
      loss_mask = loss_mask * (1 - tf.cast(larger_than_threshold, tf.float32))
    else:
      tsa_threshold = 1

    loss_mask = tf.stop_gradient(loss_mask)
    per_example_loss = per_example_loss * loss_mask
    sup_loss = (tf.reduce_sum(per_example_loss) /
                tf.maximum(tf.reduce_sum(loss_mask), 1))

  unsup_loss_mask = None
  if is_training and unsup_ratio > 0:
    print("pooled:{} ".format(pooled))
    print("clas_logits:{} ".format(clas_logits))
    print("log_probs:{} ".format(log_probs))
    with tf.variable_scope("unsup_loss"):
      ori_start = sup_batch_size
      ori_end = ori_start + unsup_batch_size
      aug_start = sup_batch_size + unsup_batch_size
      aug_end = aug_start + unsup_batch_size

      ori_log_probs = log_probs[ori_start : ori_end]
      aug_log_probs = log_probs[aug_start : aug_end]
      unsup_loss_mask = 1
      if FLAGS.uda_softmax_temp != -1:
        tgt_ori_log_probs = tf.nn.log_softmax(
            clas_logits[ori_start : ori_end] / FLAGS.uda_softmax_temp,
            axis=-1)
        tgt_ori_log_probs = tf.stop_gradient(tgt_ori_log_probs)
      else:
        tgt_ori_log_probs = tf.stop_gradient(ori_log_probs)

      if FLAGS.uda_confidence_thresh != -1:
        largest_prob = tf.reduce_max(tf.exp(ori_log_probs), axis=-1)
        unsup_loss_mask = tf.cast(tf.greater(
            largest_prob, FLAGS.uda_confidence_thresh), tf.float32)
        unsup_loss_mask = tf.stop_gradient(unsup_loss_mask)

      per_example_kl_loss = kl_for_log_probs(
          tgt_ori_log_probs, aug_log_probs) * unsup_loss_mask
      unsup_loss = tf.reduce_mean(per_example_kl_loss)

  else:
    unsup_loss = 0.

  return (sup_loss, unsup_loss, clas_logits[:sup_batch_size],
          per_example_loss, loss_mask,
          tsa_threshold, unsup_loss_mask, correct_label_probs,pooled)
 def recurseKdiag(self, Kdiag):
     # angle is zero, hence the diagonal stays the same (if scaled relu is used)        
     return self.variance * Kdiag  + self.bias_variance * tf.ones_like(Kdiag)      
Beispiel #40
0
    def _multiheadAttention(self,
                            rawKeys,
                            queries,
                            keys,
                            numUnits=None,
                            causality=False,
                            scope="multiheadAttention"):
        # rawKeys 的作用是为了计算mask时用的,因为keys是加上了position embedding的,其中不存在padding为0的值
        numHeads = self.config.model.numHeads  #头数,目前设置为8
        keepProp = self.config.model.keepProp  #dropout数量

        if numUnits is None:  #若是没传入值,直接去输入数据的最后一维,即embedding size.embeddingSize = 200
            numUnits = queries.get_shape().as_list()[-1]

        # tf.layers.dense可以做多维tensor数据的非线性映射,在计算self-Attention时,一定要对这三个值进行非线性映射,
        # 其实这一步就是论文中Multi-Head Attention中的对分割后的数据进行权重映射的步骤,我们在这里先映射后分割,原则上是一样的。
        # Q, K, V的维度都是[batch_size, sequence_length, embedding_size]
        Q = tf.layers.dense(queries, numUnits, activation=tf.nn.relu)
        K = tf.layers.dense(keys, numUnits, activation=tf.nn.relu)
        V = tf.layers.dense(keys, numUnits, activation=tf.nn.relu)

        # 将数据按最后一维分割成num_heads个, 然后按照第一维拼接
        # Q, K, V 的维度都是[batch_size * numHeads, sequence_length, embedding_size/numHeads]
        Q_ = tf.concat(tf.split(Q, numHeads, axis=-1), axis=0)
        K_ = tf.concat(tf.split(K, numHeads, axis=-1), axis=0)
        V_ = tf.concat(tf.split(V, numHeads, axis=-1), axis=0)

        # 计算keys和queries之间的点积,维度[batch_size * numHeads, queries_len, key_len], 后两维是queries和keys的序列长度
        similary = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))

        # 对计算的点积进行缩放处理,除以向量长度的根号值
        scaledSimilary = similary / (K_.get_shape().as_list()[-1]**0.5)

        # 在我们输入的序列中会存在padding这个样的填充词,这种词应该对最终的结果是毫无帮助的,原则上说当padding都是输入0时,
        # 计算出来的权重应该也是0,但是在transformer中引入了位置向量,当和位置向量相加之后,其值就不为0了,因此在添加位置向量
        # 之前,我们需要将其mask为0。虽然在queries中也存在这样的填充词,但原则上模型的结果之和输入有关,而且在self-Attention中
        # queryies = keys,因此只要一方为0,计算出的权重就为0。
        # 具体关于key mask的介绍可以看看这里: https://github.com/Kyubyong/transformer/issues/3

        # 利用tf,tile进行张量扩张, 维度[batch_size * numHeads, keys_len] keys_len = keys 的序列长度
        keyMasks = tf.tile(rawKeys, [numHeads, 1])

        # 增加一个维度,并进行扩张,得到维度[batch_size * numHeads, queries_len, keys_len]
        keyMasks = tf.tile(tf.expand_dims(keyMasks, 1),
                           [1, tf.shape(queries)[1], 1])

        # tf.ones_like生成元素全为1,维度和scaledSimilary相同的tensor, 然后得到负无穷大的值
        paddings = tf.ones_like(scaledSimilary) * (-2**(32 + 1))

        # tf.where(condition, x, y),condition中的元素为bool值,其中对应的True用x中的元素替换,对应的False用y中的元素替换
        # 因此condition,x,y的维度是一样的。下面就是keyMasks中的值为0就用paddings中的值替换
        maskedSimilary = tf.where(
            tf.equal(keyMasks, 0), paddings,
            scaledSimilary)  # 维度[batch_size * numHeads, queries_len, key_len]

        # 在计算当前的词时,只考虑上文,不考虑下文,出现在Transformer Decoder中。在文本分类时,可以只用Transformer Encoder。
        # Decoder是生成模型,主要用在语言生成中
        if causality:
            diagVals = tf.ones_like(
                maskedSimilary[0, :, :])  # [queries_len, keys_len]
            tril = tf.contrib.linalg.LinearOperatorTriL(
                diagVals).to_dense()  # [queries_len, keys_len]
            masks = tf.tile(tf.expand_dims(
                tril, 0), [tf.shape(maskedSimilary)[0], 1, 1
                           ])  # [batch_size * numHeads, queries_len, keys_len]

            paddings = tf.ones_like(masks) * (-2**(32 + 1))
            maskedSimilary = tf.where(
                tf.equal(masks, 0), paddings, maskedSimilary
            )  # [batch_size * numHeads, queries_len, keys_len]

        # 通过softmax计算权重系数,维度 [batch_size * numHeads, queries_len, keys_len]
        weights = tf.nn.softmax(maskedSimilary)

        # 加权和得到输出值, 维度[batch_size * numHeads, sequence_length, embedding_size/numHeads]
        outputs = tf.matmul(weights, V_)

        # 将多头Attention计算的得到的输出重组成最初的维度[batch_size, sequence_length, embedding_size]
        outputs = tf.concat(tf.split(outputs, numHeads, axis=0), axis=2)

        outputs = tf.nn.dropout(outputs, keep_prob=keepProp)

        # 对每个subLayers建立残差连接,即H(x) = F(x) + x
        outputs += queries
        # normalization 层
        outputs = self._layerNormalization(outputs)
        return outputs
Beispiel #41
0
    def build_model(self):
        self.is_training = tf.placeholder(tf.bool, name='is_training')
        self.images = tf.placeholder(tf.float32, [None] + self.image_shape,
                                     name='real_images')
        self.lowres_images = tf.reduce_mean(
            tf.reshape(self.images, [
                self.batch_size, self.lowres_size, self.lowres,
                self.lowres_size, self.lowres, self.c_dim
            ]), [2, 4])
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.z_sum = tf.summary.histogram("z", self.z)

        self.G = self.generator(self.z)
        self.lowres_G = tf.reduce_mean(
            tf.reshape(self.G, [
                self.batch_size, self.lowres_size, self.lowres,
                self.lowres_size, self.lowres, self.c_dim
            ]), [2, 4])
        self.D, self.D_logits = self.discriminator(self.images)

        self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

        self.d_sum = tf.summary.histogram("d", self.D)
        self.d__sum = tf.summary.histogram("d_", self.D_)
        self.G_sum = tf.summary.image("G", self.G)

        self.d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits,
                                                    labels=tf.ones_like(
                                                        self.D)))
        self.d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_,
                                                    labels=tf.zeros_like(
                                                        self.D_)))
        self.g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=self.D_logits_,
                                                    labels=tf.ones_like(
                                                        self.D_)))

        self.d_loss_real_sum = tf.summary.scalar("d_loss_real",
                                                 self.d_loss_real)
        self.d_loss_fake_sum = tf.summary.scalar("d_loss_fake",
                                                 self.d_loss_fake)

        self.d_loss = self.d_loss_real + self.d_loss_fake

        self.g_loss_sum = tf.summary.scalar("g_loss", self.g_loss)
        self.d_loss_sum = tf.summary.scalar("d_loss", self.d_loss)

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        self.saver = tf.train.Saver(max_to_keep=1)

        # Completion.
        self.mask = tf.placeholder(tf.float32, self.image_shape, name='mask')
        self.lowres_mask = tf.placeholder(tf.float32,
                                          self.lowres_shape,
                                          name='lowres_mask')
        self.contextual_loss = tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.abs(
                    tf.multiply(self.mask, self.G) -
                    tf.multiply(self.mask, self.images))), 1)
        self.contextual_loss += tf.reduce_sum(
            tf.contrib.layers.flatten(
                tf.abs(
                    tf.multiply(self.lowres_mask, self.lowres_G) -
                    tf.multiply(self.lowres_mask, self.lowres_images))), 1)
        self.perceptual_loss = self.g_loss
        self.complete_loss = self.contextual_loss + self.lam * self.perceptual_loss
        self.grad_complete_loss = tf.gradients(self.complete_loss, self.z)
Beispiel #42
0
def build_model(x, labels=None, reuse=False):
    if reuse:
        prefix = 'test_'
    else:
        prefix = 'train_'
    with tf.variable_scope(tf.get_variable_scope(), reuse=reuse):
        conv_pointers = [InputLayer(x, name='c_disc_inputs')]
        for i, v in enumerate(CONVOLUTIONS):
            if v < 0:
                strides = (2, 2)
                v *= -1
            else:
                strides = (1, 1)
            curr_layer = BatchNormLayer(Conv2d(conv_pointers[-1],
                                               CONVOLUTIONS[i], (5, 5),
                                               strides=strides,
                                               name='c_conv1_%s' % (i)),
                                        act=tf.nn.leaky_relu,
                                        is_train=True,
                                        name='c_batch_norm%s' % (i))

            if i < len(CONVOLUTIONS) - 1:
                # conv_pointers.append(ConcatLayer([curr_layer, conv_pointers[-1]],
                #  3, name = 'concat_layer%s'%(i)))
                conv_pointers.append(curr_layer)
            else:
                conv_pointers.append(curr_layer)
        # y_conv = DenseLayer(flat, m.fully_connected_size, act=tf.nn.relu,name =  'hidden_encode')
        pre_max_pool = Conv2d(conv_pointers[-1],
                              1, (1, 1),
                              strides=(1, 1),
                              name='c_Final_Conv')
        _, pm_width, pm_height, _ = pre_max_pool.outputs.get_shape()
        max_pool_width, max_pool_height = pm_width / DIVIDEND, pm_height / DIVIDEND
        max_pool = MaxPool2d(pre_max_pool,
                             filter_size=(max_pool_width, max_pool_height),
                             strides=(max_pool_width, max_pool_height),
                             name='c_Final_Pool')
        if labels is None:
            return tf.round(tf.sigmoid(max_pool.outputs))
        logits = FlattenLayer(max_pool).outputs
        final_guess = tf.round(tf.sigmoid(logits))

        flat_labels = tf.contrib.layers.flatten(labels)
        cross_entropy = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_labels,
                                                    logits=logits))
        train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(
            cross_entropy)
        accuracy_summary = tf.summary.scalar(
            'Accuracy',
            tf.reduce_mean(
                tf.cast(tf.equal(flat_labels, final_guess), tf.float32)))
        percent_found_summary_round = tf.summary.scalar(
            'Percent Found Rounded', tf.reduce_mean(final_guess))
        percent_found_summary = tf.summary.scalar(
            'Percent Found Nonrounded', tf.reduce_mean(tf.sigmoid(logits)))

        flat_labels = tf.cast(flat_labels, tf.float64)
        final_guess = tf.cast(final_guess, tf.float64)
        TP = tf.count_nonzero(final_guess * flat_labels, dtype=tf.float32)
        TN = tf.count_nonzero((final_guess - 1) * (flat_labels - 1),
                              dtype=tf.float32)
        FP = tf.count_nonzero(final_guess * (flat_labels - 1),
                              dtype=tf.float32)
        FN = tf.count_nonzero((final_guess - 1) * flat_labels,
                              dtype=tf.float32)
        true_positive = tf.divide(TP, TP + FP)
        true_negative = tf.divide(TN, TN + FN)
        # accuracy = tf.divide(TP + TN, TN + FN + TP + FP)
        # accuracy_summary = tf.summary.scalar('Accuracy', accuracy)
        true_positive_summary = tf.summary.scalar('True Positive',
                                                  true_positive)
        true_negative_summary = tf.summary.scalar('True Negative',
                                                  true_negative)
        # resized_label = tf.image.resize_images(labels,size = (BASE_INPUT_SHAPE[0], BASE_INPUT_SHAPE[1]))
        # resized_output = tf.image.resize_images(tf.sigmoid(max_pool.outputs),size = (BASE_INPUT_SHAPE[0], BASE_INPUT_SHAPE[1]))
        # tiled_labels = tf.tile(tf.expand_dims(resized_label, axis = -1), [1,1,1,3])
        # tiled_outputs = tf.tile(resized_output, [1,1,1,3])

        image_summary = tf.summary.image(
            "Example",
            tf.concat([
                tf.sigmoid(max_pool.outputs),
                tf.expand_dims(labels, axis=-1),
                tf.ones_like(max_pool.outputs)
            ],
                      axis=2),
            max_outputs=MAX_OUTPUTS)  #show fake image
        # image_summary_2 = tf.summary.image("Example_2", x,max_outputs = MAX_OUTPUTS)#show fake image
        # image_summary_merge = tf.summary.merge([image_summary,image_summary_2])

        cross_entropy_summary = tf.summary.scalar('Loss', cross_entropy)
        real_summary = tf.summary.merge([
            cross_entropy_summary, accuracy_summary,
            percent_found_summary_round, percent_found_summary,
            true_negative_summary, true_positive_summary
        ])
        return real_summary, image_summary, train_step
    def _init_graph(self):
        #初始化Tensorflow计算图,包括输入数据,变量,模型,损失和优化
        
        self.graph = tf.Graph()
        with self.graph.as_default():  # 默认使用cpu:
            
            tf.set_random_seed(self.random_seed)
            # 输入数据
            self.train_features = tf.placeholder(tf.int32, shape=[None, None], name="train_features")  # None * features_M
            self.train_labels = tf.placeholder(tf.float32, shape=[None, 1], name="train_labels")  # None * 1
            self.dropout_keep = tf.placeholder(tf.float32, shape=[None], name="dropout_keep")
            self.train_phase = tf.placeholder(tf.bool, name="train_phase")

            # 变量
            self.weights = self._initialize_weights()        
        
        
            # 模型定义
            self.nonzero_embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'], self.train_features) # None * M' * K; M'即fields, K即em_factor
            #Pair-wise Interation Layer
            element_wise_product_list = []
            for i in range(0, self.fields):
                for j in range(i+1, self.fields):
                    element_wise_product_list.append(tf.multiply(self.nonzero_embeddings[:,i,:], self.nonzero_embeddings[:,j,:]))
            #将一个list变为一个tensor,上述list由M'*(M'-1)个None * K的tensor组成
            self.element_wise_product = tf.stack(element_wise_product_list) # (M'*(M'-1)) * None * K
            self.element_wise_product = tf.transpose(self.element_wise_product, perm=[1,0,2], name="element_wise_product") # None * (M'*(M'-1)) * K
            self.interactions = tf.reduce_sum(self.element_wise_product, 2, name="interactions")  # None * (M'*(M'-1))
            
            # _________ 注意力机制部分 _____________
            num_interactions = int(self.fields*(self.fields-1)/2)
            if self.attention:
                self.attention_mul = tf.reshape(tf.matmul(tf.reshape(self.element_wise_product, shape=[-1, self.em_factor]), \
                    self.weights['attention_W']), shape=[-1, num_interactions, self.attention_factor])
                #上式中第一个reshape的目的size由None * (M'*(M'-1)) * K 变为 (None*(M'*(M'-1))) * K, 因为后面的权重为二维tensor
                #第一个reshpae再讲size变回None * (M'*(M'-1)) * attention_factor
                self.attention_exp = tf.exp(tf.reduce_sum(tf.multiply(self.weights['attention_p'], tf.nn.relu(self.attention_mul + \
                    self.weights['attention_b'])), 2, keep_dims=True)) # None * (M'*(M'-1)) * 1
                self.attention_sum = tf.reduce_sum(self.attention_exp, 1, keep_dims=True) # None * 1 * 1
                self.attention_out = tf.div(self.attention_exp, self.attention_sum, name="attention_out") # None * (M'*(M'-1)) * 1
            #attention不使用dropout和bn处理,对该网络的权重使用L2正则化
            
            # _________ 基于注意力机制的池化层 _____________
            if self.attention:
                self.AFM = tf.reduce_sum(tf.multiply(self.attention_out, self.element_wise_product), 1, name="afm") # None * K
            else:
                self.AFM = tf.reduce_sum(self.element_wise_product, 1, name="afm") # None * K
            
            #对attention后的输出执行BN操作
            if self.bn:
                self.AFM = self.batch_norm_layer(self.AFM, train_phase=self.train_phase, scope_bn='bn_fm')                
            #对attention后的输出执行dropout操作
            self.AFM = tf.nn.dropout(self.AFM, self.dropout_keep[0]) # dropout
            
            # ___________ 输出层 ___________________
            self.Bilinear = tf.matmul(self.AFM, self.weights['prediction']) # None * 1
            #Bilinear = tf.reduce_sum(self.Bilinear, 1, keep_dims=True)  # None * 1
            self.Feature_bias = tf.reduce_sum(tf.nn.embedding_lookup(self.weights['feature_bias'], self.train_features) , 1)  # None * 1
            Bias = self.weights['bias'] * tf.ones_like(self.train_labels)  # None * 1
            self.out = tf.add_n([self.Bilinear, self.Feature_bias, Bias], name="out_afm")  # None * 1   
        
            # 计算损失
            if self.attention and self.lamda_attention > 0:
                self.loss = tf.nn.l2_loss(tf.subtract(self.train_labels, self.out)) + tf.contrib.layers.l2_regularizer(self.lamda_attention)(self.weights['attention_W'])  # regulizer
            else:
                self.loss = tf.nn.l2_loss(tf.subtract(self.train_labels, self.out))     
            
            if self.lamde_em > 0:
                self.loss = self.loss + tf.contrib.layers.l2_regularizer(self.lamda_em)(self.weights['feature_embeddings'])  # regulizer
                
            # 优化方法
            if self.optimizer_type == 'AdamOptimizer':
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8).minimize(self.loss)
            elif self.optimizer_type == 'AdagradOptimizer':
                self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss)
            elif self.optimizer_type == 'GradientDescentOptimizer':
                self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
            elif self.optimizer_type == 'MomentumOptimizer':
                self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss)

            # 初始化
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(init)

            # 参数数目
            total_parameters = 0
            for variable in self.weights.values():
                shape = variable.get_shape() # shape is an array of tf.Dimension
                variable_parameters = 1
                for dim in shape:
                    variable_parameters *= dim.value
                total_parameters += variable_parameters
            if self.verbose > 0:
                print ("#params: %d" %total_parameters)              
Beispiel #44
0
    def __init__(
        self, sequence_length, num_classes,
        embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):

            # Placeholders for input, output, dropout
        self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name = "input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name = "input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name = "dropout_keep_prob")
        
        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
            # self.embedded_chars = [None(batch_size), sequence_size, embedding_size]
            # self.embedded_chars = [None(batch_size), sequence_size, embedding_size, 1(num_channels)]
        self.embedded_chars = self.input_x
        self.embedded_chars_expended = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                self.embedded_chars_expended,
                W,
                strides=[1,1,1,1],
                padding="VALID",
                name="conv")
            # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name = "relu")
            # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                h,
                ksize=[1, sequence_length - filter_size + 1, 1, 1],
                strides=[1,1,1,1],
                padding="VALID",
                name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
        
        # Final (unnomalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
            "W",
            shape = [num_filters_total, num_classes],
            initializer = tf.contrib.layers.xavier_initializer())
        b = tf.Variable(tf.constant(0.1, shape=[num_classes], name = "b"))
        l2_loss += tf.nn.l2_loss(W)
        l2_loss += tf.nn.l2_loss(b)
        self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name = "scores")
        self.predictions = tf.argmax(self.scores, 1, name = "predictions")

        predictions = self.predictions
        # Calculate Mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits = self.scores, labels = self.input_y)
        self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
        
        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
        self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name = "accuracy")
        actuals = tf.argmax(self.input_y, 1)

        ones_like_actuals = tf.ones_like(actuals)
        zeros_like_actuals = tf.zeros_like(actuals)
        ones_like_predictions = tf.ones_like(self.predictions)
        zeros_like_predictions = tf.zeros_like(self.predictions)

        tp_op = tf.reduce_sum(
                tf.cast(
                    tf.logical_and(
                        tf.equal(actuals, ones_like_actuals),
                        tf.equal(predictions, ones_like_predictions)
                    ),
                    "float"
                )
            )
         
        tn_op = tf.reduce_sum(
                tf.cast(
                  tf.logical_and(
                    tf.equal(actuals, zeros_like_actuals),
                    tf.equal(predictions, zeros_like_predictions)
                  ),
                  "float"
                )
            )
         
        fp_op = tf.reduce_sum(
                tf.cast(
                  tf.logical_and(
                    tf.equal(actuals, zeros_like_actuals),
                    tf.equal(predictions, ones_like_predictions)
                  ),
                  "float"
                )
            )
         
        fn_op = tf.reduce_sum(
                tf.cast(
                  tf.logical_and(
                    tf.equal(actuals, ones_like_actuals),
                    tf.equal(predictions, zeros_like_predictions)
                  ),
                  "float"
                )
            )
        tpr = tp_op/(tp_op + fn_op)
        print(self.predictions)
        print(self.predictions.shape)
        print(tf.argmax(self.input_y, 1))
        self.recall = tpr
        self.f1_score = (2 * (self.accuracy * self.recall)) / (self.accuracy + self.recall)
Beispiel #45
0
def multihead_attention(queries,
						keys,
						num_units = None,
						num_heads = 8,
						dropout_rate = 0,
						is_training = True,
						causality = False,
						scope = "multihead_attention",
						reuse = None):
	'''
	Implement multihead attention

	Args:
		queries: [Tensor], A 3-dimensions tensor with shape of [N, T_q, S_q]
		keys: [Tensor], A 3-dimensions tensor with shape of [N, T_k, S_k]
		num_units: [Int], Attention size
		num_heads: [Int], Number of heads
		dropout_rate: [Float], A ratio of dropout
		is_training: [Boolean], If true, controller of mechanism for dropout
		causality: [Boolean], If true, units that reference the future are masked
		scope: [String], Optional scope for "variable_scope"
		reuse: [Boolean], If to reuse the weights of a previous layer by the same name
	
	Returns:
		A 3-dimensions tensor with shape of [N, T_q, S]
	'''
	with tf.variable_scope(scope, reuse = reuse):
		if num_units is None:
			# length of sentence
			num_units = queries.get_shape().as_list()[-1]

		# Linear layers in Figure 2(right)
		# shape = [N, T_q, S]
		Q = tf.layers.dense(queries, num_units, activation = tf.nn.relu)
		# shape = [N, T_k, S]
		K = tf.layers.dense(keys, num_units, activation = tf.nn.relu)
		# shape = [N, T_k, S]
		V = tf.layers.dense(keys, num_units, activation = tf.nn.relu)

		# Split and concat
		# shape = [N*h, T_q, S/h]
		Q_ = tf.concat(tf.split(Q, num_heads, axis = 2), axis = 0)
		# shape = [N*h, T_k, S/h]
		K_ = tf.concat(tf.split(K, num_heads, axis = 2), axis = 0)
		# shape = [N*h, T_k, S/h]
		V_ = tf.concat(tf.split(V, num_heads, axis = 2), axis = 0)

		# shape = [N*h, T_q, T_k]
		outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))

		# Scale
		outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)

		# Masking
		# shape = [N, T_k]
		key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis = -1)))
		# shape = [N*h, T_k]
		key_masks = tf.tile(key_masks, [num_heads, 1])
		# shape = [N*h, T_q, T_k]
		key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1])

		# If key_masks == 0 outputs = [1]*length(outputs)
		paddings = tf.ones_like(outputs) * (-math.pow(2, 32) + 1)
		# shape = [N*h, T_q, T_k]
		outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

		if causality:
			# reduce dims : shape = [T_q, T_k]
			diag_vals = tf.ones_like(outputs[0, :, :])
			# shape = [T_q, T_k]
			# use triangular matrix to ignore the affect from future words
			# like : [[1,0,0]
			#         [1,2,0]
			#         [1,2,3]]
			tril = tf.contrib.linalg.LinearOperatorTriL(diag_vals).to_dense()
			# shape = [N*h, T_q, T_k]
			masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1])

			paddings = tf.ones_like(masks) * (-math.pow(2, 32) + 1)
			# shape = [N*h, T_q, T_k]
			outputs = tf.where(tf.equal(masks, 0), paddings, outputs)

		# Output Activation
		outputs = tf.nn.softmax(outputs)

		# Query Masking
		# shape = [N, T_q]
		query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis = -1)))
		# shape = [N*h, T_q]
		query_masks = tf.tile(query_masks, [num_heads, 1])
		# shape = [N*h, T_q, T_k]
		query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]])
		outputs *= query_masks 

		# Dropouts
		outputs = tf.layers.dropout(outputs, rate = dropout_rate, training = tf.convert_to_tensor(is_training))

		# Weighted sum
		# shape = [N*h, T_q, S/h]
		outputs = tf.matmul(outputs, V_)

		# Restore shape
		# shape = [N, T_q, S]
		outputs = tf.concat(tf.split(outputs, num_heads, axis = 0), axis = 2)

		# Residual connection
		outputs += queries

		# Normalize
		# shape = [N, T_q, S]
		outputs = normalize(outputs)

	return outputs
Beispiel #46
0
    def __call__(self, inputVector, bhPhase=True, trainable=True):
        print "priornet - " + self._nameScope
        with tf.variable_scope(self._nameScope, reuse=self._reuse):
            ratio = np.power(
                float(self._outputDim) / float(self._inputDim),
                1.0 / float(self._hiddenLayerNum))
            layerDim = self._inputDim
            print inputVector.shape
            hidden = 2.0 * inputVector - 1.0
            print "mean prior"
            for i in range(self._hiddenLayerNum - 1):
                layerDim = layerDim * ratio
                hidden = tf.layers.dense(inputs=hidden,
                                         units=int(layerDim),
                                         activation=None,
                                         use_bias=True,
                                         trainable=self._trainable)
                hidden = tf.layers.batch_normalization(
                    hidden, training=self._training, trainable=self._trainable)
                hidden = tf.layers.dropout(hidden,
                                           rate=0.5,
                                           training=self._training)
                if self._coreAct != None:
                    hidden = self._coreAct(hidden)
                print hidden.shape
            meanPrior = tf.layers.dense(inputs=hidden,
                                        units=self._outputDim,
                                        activation=None,
                                        use_bias=True,
                                        trainable=self._trainable)
            if self._lastAct != None:
                meanPrior = self._lastAct(meanPrior)
            print meanPrior.shape

            if self._constLogVar == None:
                print "logVar prior"
                layerDim = self._inputDim
                print inputVector.shape
                hidden = 2.0 * inputVector - 1.0
                for i in range(self._hiddenLayerNum - 1):
                    layerDim = layerDim * ratio
                    hidden = tf.layers.dense(inputs=hidden,
                                             units=int(layerDim),
                                             activation=None,
                                             use_bias=True,
                                             trainable=self._trainable)
                    hidden = tf.layers.batch_normalization(
                        hidden,
                        training=self._training,
                        trainable=self._trainable)
                    hidden = tf.layers.dropout(hidden,
                                               rate=0.5,
                                               training=self._training)
                    if self._coreAct != None:
                        hidden = self._coreAct(hidden)
                    print hidden.shape
                logVarPrior = tf.layers.dense(inputs=hidden,
                                              units=self._outputDim,
                                              activation=None,
                                              use_bias=True,
                                              trainable=self._trainable)
                if self._lastAct != None:
                    logVarPrior = self._lastAct(logVarPrior)
                print logVarPrior.shape
            elif self._constLogVar == self._constLogVar:
                print "logVar prior : constant " + str(self._constLogVar)
                logVarPrior = self._constLogVar * tf.ones_like(meanPrior)
            else:
                logVarPrior = None
        self._reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                           scope=self._nameScope)
        self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                            scope=self._nameScope)
        self.saver = tf.train.Saver(var_list=self.variables)
        return meanPrior, logVarPrior
Beispiel #47
0
    def build_model(self):
        if self.y_dim:
            self.y = tf.placeholder(tf.float32, [self.batch_size, self.y_dim],
                                    name='y')

        if self.is_crop:
            image_dims = [self.output_height, self.output_width, self.c_dim]
        else:
            image_dims = [self.input_height, self.input_width, self.c_dim]

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + image_dims,
                                     name='real_images')
        self.sample_inputs = tf.placeholder(tf.float32,
                                            [self.sample_num] + image_dims,
                                            name='sample_inputs')

        inputs = self.inputs
        sample_inputs = self.sample_inputs
        '''
        Possible way of change z's dimension
        '''
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.z_sum = histogram_summary("z", self.z)

        if self.y_dim:
            self.G = self.generator(self.z, self.y)
            self.D, self.D_logits = self.discriminator(inputs, self.y)

            self.sampler = self.sampler(self.z, self.y)
            self.D_, self.D_logits_ = self.discriminator(self.G,
                                                         self.y,
                                                         reuse=True)
        else:
            self.G = self.generator(self.z)
            self.D, self.D_logits = self.discriminator(inputs)

            self.sampler = self.sampler(self.z)
            self.D_, self.D_logits_ = self.discriminator(self.G, reuse=True)

        self.d_sum = histogram_summary("d", self.D)
        self.d__sum = histogram_summary("d_", self.D_)
        self.G_sum = image_summary("G", self.G)

        def sigmoid_cross_entropy_with_logits(x, y):
            try:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               labels=y)
            except:
                return tf.nn.sigmoid_cross_entropy_with_logits(logits=x,
                                                               targets=y)

        self.d_loss_real = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits,
                                              tf.ones_like(self.D)))
        self.d_loss_fake = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits_,
                                              tf.zeros_like(self.D_)))
        self.g_loss = tf.reduce_mean(
            sigmoid_cross_entropy_with_logits(self.D_logits_,
                                              tf.ones_like(self.D_)))

        self.d_loss_real_sum = scalar_summary("d_loss_real", self.d_loss_real)
        self.d_loss_fake_sum = scalar_summary("d_loss_fake", self.d_loss_fake)

        self.d_loss = self.d_loss_real + self.d_loss_fake

        self.g_loss_sum = scalar_summary("g_loss", self.g_loss)
        self.d_loss_sum = scalar_summary("d_loss", self.d_loss)

        t_vars = tf.trainable_variables()

        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.g_vars = [var for var in t_vars if 'g_' in var.name]

        # Wasserstein-GAN
        # self.d_loss_real = tf.reduce_mean(self.D_logits)
        # self.d_loss_fake = tf.reduce_mean(self.D_logits_)
        # self.g_loss = -tf.reduce_mean(self.D_logits_)
        # self.d_loss = self.d_loss_real - self.d_loss_fake

        self.saver = tf.train.Saver()
Beispiel #48
0
	def __init__(self, z_dim, h_dim, learning_rate, scale, generator_output_layer):

		self.z_dim = z_dim
		self.h_dim = h_dim

		self.g_net = Generator(z_dim, h_dim, generator_output_layer)
		self.d_net = Discriminator(h_dim)

		self.training = tf.placeholder(tf.bool, [])

		self.with_text = tf.placeholder(tf.float32, [None])

		self.x = tf.placeholder(tf.float32, [None, 64, 64, 3])
		self.x_w_ = tf.placeholder(tf.float32, [None, 64, 64, 3])

		self.z = tf.placeholder(tf.float32, [None, self.z_dim])
		# true h
		self.h = tf.placeholder(tf.float32, [None, h_dim])
		# false h
		self.h_ = tf.placeholder(tf.float32, [None, h_dim])

		# false image
		self.x_ = self.g_net(self.z, self.h, self.training)

		# true image, true h
		self.d = self.d_net(self.x, self.h, self.training, reuse=False)

		# fake image, true h
		self.d_ = self.d_net(self.x_, self.h, self.training)

		# wrong image, true h
		self.d_w_ = self.d_net(self.x_w_, self.h, self.training)

		# true image, false h
		self.d_h_ = self.d_net(self.x, self.h_, self.training)

		# self.g_loss = - tf.reduce_mean(self.d_) #+ tf.reduce_mean(tf.square(self.x - self.x_))
		# self.d_loss = tf.reduce_mean(self.d) \
		# 			- ( 1 * tf.reduce_mean(self.d_) + 1 * tf.reduce_mean(self.d_h_) + 1 * tf.reduce_mean(self.d_w_)) / (1 + 1 + 1)

		self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_, labels=tf.ones_like(self.d_))) 

		self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d, labels=tf.ones_like(self.d))) \
					+ (tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_, labels=tf.zeros_like(self.d_))) + \
					   tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_w_, labels=tf.zeros_like(self.d_w_))) +\
					   tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_h_, labels=tf.zeros_like(self.d_h_))) ) / 3 
		

		self.d_opt, self.g_opt = None, None
		with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
			self.d_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\
				.minimize(self.d_loss, var_list=self.d_net.vars)
			self.g_opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5, beta2=0.9)\
				.minimize(self.g_loss, var_list=self.g_net.vars)
Beispiel #49
0
    def loss_layer(self, feature_map_i, y_true, anchors):
        '''
        calc loss function from a certain scale
        input:
            feature_map_i: feature maps of a certain scale. shape: [N, 13, 13, 3*(5 + num_class)] etc.
            y_true: y_ture from a certain scale. shape: [N, 13, 13, 3, 5 + num_class + 1] etc.
            anchors: shape [9, 2]
        '''

        # size in [h, w] format! don't get messed up!
        grid_size = tf.shape(feature_map_i)[1:3]
        # the downscale ratio in height and weight
        ratio = tf.cast(self.img_size / grid_size, tf.float32)
        # N: batch_size
        N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)

        x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(
            feature_map_i, anchors)

        ###########
        # get mask
        ###########

        # shape: take 416x416 input image and 13*13 feature_map for example:
        # [N, 13, 13, 3, 1]
        object_mask = y_true[..., 4:5]

        # the calculation of ignore mask if referred from
        # https://github.com/pjreddie/darknet/blob/master/src/yolo_layer.c#L179
        ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

        def loop_cond(idx, ignore_mask):
            return tf.less(idx, tf.cast(N, tf.int32))

        def loop_body(idx, ignore_mask):
            # shape: [13, 13, 3, 4] & [13, 13, 3]  ==>  [V, 4]
            # V: num of true gt box of each image in a batch
            valid_true_boxes = tf.boolean_mask(
                y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0],
                                               'bool'))
            # shape: [13, 13, 3, 4] & [V, 4] ==> [13, 13, 3, V]
            iou = self.box_iou(pred_boxes[idx], valid_true_boxes)
            # shape: [13, 13, 3]
            best_iou = tf.reduce_max(iou, axis=-1)
            # shape: [13, 13, 3]
            ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)
            # finally will be shape: [N, 13, 13, 3]
            ignore_mask = ignore_mask.write(idx, ignore_mask_tmp)
            return idx + 1, ignore_mask

        _, ignore_mask = tf.while_loop(cond=loop_cond,
                                       body=loop_body,
                                       loop_vars=[0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        # shape: [N, 13, 13, 3, 1]
        ignore_mask = tf.expand_dims(ignore_mask, -1)

        # shape: [N, 13, 13, 3, 2]
        pred_box_xy = pred_boxes[..., 0:2]
        pred_box_wh = pred_boxes[..., 2:4]

        # get xy coordinates in one cell from the feature_map
        # numerical range: 0 ~ 1
        # shape: [N, 13, 13, 3, 2]
        true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
        pred_xy = pred_box_xy / ratio[::-1] - x_y_offset

        # get_tw_th
        # numerical range: 0 ~ 1
        # shape: [N, 13, 13, 3, 2]
        true_tw_th = y_true[..., 2:4] / anchors
        pred_tw_th = pred_box_wh / anchors
        # for numerical stability
        true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
                              x=tf.ones_like(true_tw_th),
                              y=true_tw_th)
        pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
                              x=tf.ones_like(pred_tw_th),
                              y=pred_tw_th)
        true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
        pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))

        # box size punishment:
        # box with smaller area has bigger weight. This is taken from the yolo darknet C source code.
        # shape: [N, 13, 13, 3, 1]
        box_loss_scale = 2. - (
            y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (
                y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))

        ############
        # loss_part
        ############
        # mix_up weight
        # [N, 13, 13, 3, 1]
        mix_w = y_true[..., -1:]
        # shape: [N, 13, 13, 3, 1]
        xy_loss = tf.reduce_sum(
            tf.square(true_xy - pred_xy) * object_mask * box_loss_scale *
            mix_w) / N
        wh_loss = tf.reduce_sum(
            tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale *
            mix_w) / N

        # shape: [N, 13, 13, 3, 1]
        conf_pos_mask = object_mask
        conf_neg_mask = (1 - object_mask) * ignore_mask
        conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=object_mask, logits=pred_conf_logits)
        conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=object_mask, logits=pred_conf_logits)
        # TODO: may need to balance the pos-neg by multiplying some weights
        conf_loss = conf_loss_pos + conf_loss_neg
        if self.use_focal_loss:
            alpha = 1.0
            gamma = 2.0
            # TODO: alpha should be a mask array if needed
            focal_mask = alpha * tf.pow(
                tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma)
            conf_loss *= focal_mask
        conf_loss = tf.reduce_sum(conf_loss * mix_w) / N

        # shape: [N, 13, 13, 3, 1]
        # whether to use label smooth
        if self.use_label_smooth:
            delta = 0.01
            label_target = (
                1 - delta) * y_true[..., 5:-1] + delta * 1. / self.class_num
        else:
            label_target = y_true[..., 5:-1]
        class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(
            labels=label_target, logits=pred_prob_logits) * mix_w
        class_loss = tf.reduce_sum(class_loss) / N

        return xy_loss, wh_loss, conf_loss, class_loss
Beispiel #50
0
def mask_attn_score(score, memory_sequence_length, score_mask_value=-1e8):
    score_mask = tf.sequence_mask(memory_sequence_length,
                                  maxlen=score.shape[1])
    score_mask_values = score_mask_value * tf.ones_like(score)
    return tf.where(score_mask, score, score_mask_values)
Beispiel #51
0
def masked_conv_v1(u,
                   filter_1,
                   filter_2,
                   mask_1,
                   mask_2,
                   conductivity_1,
                   conductivity_2,
                   eps=0.01,
                   filter_banks=None):
    '''
    convolution with two different kernels
    :param u: input image
    :param filter_1:
    :param filter_2:
    :param mask_1: region where filter_1 is applied, with value (0.5-eps,0.5+eps) on the boundary
    :param mask_2: region where filter_2 is applied, with value (0.5-eps,0.5+eps) on the boundary
    :param eps: some numerical consideration
    :return:
    '''
    boundary_mask = tf.round(mask_1 + 0.1) + tf.round(mask_2 + 0.1) - 1
    boundary_d_matrix = boundary_mask * (-8 / 3. *
                                         (conductivity_1 + conductivity_2) /
                                         2.)
    mat1_d_matrix = tf.round(mask_1 - 0.1) * (-8 / 3. * conductivity_1)
    mat2_d_matrix = tf.round(mask_2 - 0.1) * (-8 / 3. * conductivity_2)
    d_matrix = boundary_d_matrix + mat1_d_matrix + mat2_d_matrix

    padded_input = tf.pad(
        u * tf.round(mask_1 + eps), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_1 = tf.nn.conv2d(input=padded_input,
                            filter=filter_1,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    padded_input = tf.pad(
        u * tf.round(mask_2 + eps), [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_2 = tf.nn.conv2d(input=padded_input,
                            filter=filter_2,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    res1 = output_1 * mask_1 + output_2 * mask_2
    padded_input = tf.pad(
        u * mask_1, [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_1 = tf.nn.conv2d(input=padded_input,
                            filter=filter_1,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    padded_input = tf.pad(
        u * mask_2, [[0, 0], [1, 1], [1, 1], [0, 0]],
        "SYMMETRIC")  # convolution with symmetric padding at boundary
    output_2 = tf.nn.conv2d(input=padded_input,
                            filter=filter_2,
                            strides=[1, 1, 1, 1],
                            padding='VALID')
    res2 = output_1 + output_2
    LU_u = res1 * (tf.round(mask_1) + tf.round(mask_2))  + \
          res2 * (tf.ones_like(mask_1) - tf.round(mask_1) - tf.round(mask_2))
    result = LU_u + (d_matrix * u)

    tmp = {
        'd_matrix': d_matrix,
        'LU_u': LU_u,
        'result': result,
    }
    return tmp
Beispiel #52
0
    def _build_model(self):

        # define some placeholders
        self.real_data = tf.compat.v1.placeholder(tf.float32, [
            self.batch_size, self.time_step, self.pitch_range,
            self.input_c_dim + self.output_c_dim
        ],
                                                  name='real_A_and_B')
        if self.model != 'base':
            self.real_mixed = tf.compat.v1.placeholder(
                tf.float32, [
                    self.batch_size, self.time_step, self.pitch_range,
                    self.input_c_dim
                ],
                name='real_A_and_B_mixed')

        self.real_A = self.real_data[:, :, :, :self.input_c_dim]
        self.real_B = self.real_data[:, :, :,
                                     self.input_c_dim:self.input_c_dim +
                                     self.output_c_dim]

        self.gaussian_noise = tf.compat.v1.placeholder(tf.float32, [
            self.batch_size, self.time_step, self.pitch_range, self.input_c_dim
        ],
                                                       name='gaussian_noise')
        # Generator: A - B - A
        self.fake_B = self.generator(self.real_A,
                                     self.options,
                                     False,
                                     name="generatorA2B")
        self.fake_A_ = self.generator(self.fake_B,
                                      self.options,
                                      False,
                                      name="generatorB2A")
        # Generator: B - A - B
        self.fake_A = self.generator(self.real_B,
                                     self.options,
                                     True,
                                     name="generatorB2A")
        self.fake_B_ = self.generator(self.fake_A,
                                      self.options,
                                      True,
                                      name="generatorA2B")
        # to binary
        self.real_A_binary = to_binary(self.real_A, 0.5)
        self.real_B_binary = to_binary(self.real_B, 0.5)
        self.fake_A_binary = to_binary(self.fake_A, 0.5)
        self.fake_B_binary = to_binary(self.fake_B, 0.5)
        self.fake_A__binary = to_binary(self.fake_A_, 0.5)
        self.fake_B__binary = to_binary(self.fake_B_, 0.5)

        # Discriminator: Fake
        self.DB_fake = self.discriminator(self.fake_B + self.gaussian_noise,
                                          self.options,
                                          reuse=False,
                                          name="discriminatorB")
        self.DA_fake = self.discriminator(self.fake_A + self.gaussian_noise,
                                          self.options,
                                          reuse=False,
                                          name="discriminatorA")
        # Discriminator: Real
        self.DA_real = self.discriminator(self.real_A + self.gaussian_noise,
                                          self.options,
                                          reuse=True,
                                          name="discriminatorA")
        self.DB_real = self.discriminator(self.real_B + self.gaussian_noise,
                                          self.options,
                                          reuse=True,
                                          name="discriminatorB")

        self.fake_A_sample = tf.compat.v1.placeholder(tf.float32, [
            self.batch_size, self.time_step, self.pitch_range, self.input_c_dim
        ],
                                                      name='fake_A_sample')
        self.fake_B_sample = tf.compat.v1.placeholder(tf.float32, [
            self.batch_size, self.time_step, self.pitch_range, self.input_c_dim
        ],
                                                      name='fake_B_sample')
        self.DA_fake_sample = self.discriminator(self.fake_A_sample +
                                                 self.gaussian_noise,
                                                 self.options,
                                                 reuse=True,
                                                 name="discriminatorA")
        self.DB_fake_sample = self.discriminator(self.fake_B_sample +
                                                 self.gaussian_noise,
                                                 self.options,
                                                 reuse=True,
                                                 name="discriminatorB")
        if self.model != 'base':
            # Discriminator: All
            self.DA_real_all = self.discriminator(self.real_mixed +
                                                  self.gaussian_noise,
                                                  self.options,
                                                  reuse=False,
                                                  name="discriminatorA_all")
            self.DA_fake_sample_all = self.discriminator(
                self.fake_A_sample + self.gaussian_noise,
                self.options,
                reuse=True,
                name="discriminatorA_all")
            self.DB_real_all = self.discriminator(self.real_mixed +
                                                  self.gaussian_noise,
                                                  self.options,
                                                  reuse=False,
                                                  name="discriminatorB_all")
            self.DB_fake_sample_all = self.discriminator(
                self.fake_B_sample + self.gaussian_noise,
                self.options,
                reuse=True,
                name="discriminatorB_all")
        # Generator loss
        self.cycle_loss = self.L1_lambda * abs_criterion(self.real_A, self.fake_A_) \
            + self.L1_lambda * abs_criterion(self.real_B, self.fake_B_)
        self.g_loss_a2b = self.criterionGAN(
            self.DB_fake, tf.ones_like(self.DB_fake)) + self.cycle_loss
        self.g_loss_b2a = self.criterionGAN(
            self.DA_fake, tf.ones_like(self.DA_fake)) + self.cycle_loss
        self.g_loss = self.g_loss_a2b + self.g_loss_b2a - self.cycle_loss
        # Discriminator loss
        self.db_loss_real = self.criterionGAN(self.DB_real,
                                              tf.ones_like(self.DB_real))
        self.db_loss_fake = self.criterionGAN(
            self.DB_fake_sample, tf.zeros_like(self.DB_fake_sample))
        self.db_loss = (self.db_loss_real + self.db_loss_fake) / 2
        self.da_loss_real = self.criterionGAN(self.DA_real,
                                              tf.ones_like(self.DA_real))
        self.da_loss_fake = self.criterionGAN(
            self.DA_fake_sample, tf.zeros_like(self.DA_fake_sample))
        self.da_loss = (self.da_loss_real + self.da_loss_fake) / 2
        self.d_loss = self.da_loss + self.db_loss

        if self.model != 'base':
            self.db_all_loss_real = self.criterionGAN(
                self.DB_real_all, tf.ones_like(self.DB_real_all))
            self.db_all_loss_fake = self.criterionGAN(
                self.DB_fake_sample_all,
                tf.zeros_like(self.DB_fake_sample_all))
            self.db_all_loss = (self.db_all_loss_real +
                                self.db_all_loss_fake) / 2
            self.da_all_loss_real = self.criterionGAN(
                self.DA_real_all, tf.ones_like(self.DA_real_all))
            self.da_all_loss_fake = self.criterionGAN(
                self.DA_fake_sample_all,
                tf.zeros_like(self.DA_fake_sample_all))
            self.da_all_loss = (self.da_all_loss_real +
                                self.da_all_loss_fake) / 2
            self.d_all_loss = self.da_all_loss + self.db_all_loss
            self.D_loss = self.d_loss + self.gamma * self.d_all_loss

        # Define all summaries
        self.g_loss_a2b_sum = tf.compat.v1.summary.scalar(
            "g_loss_a2b", self.g_loss_a2b)
        self.g_loss_b2a_sum = tf.compat.v1.summary.scalar(
            "g_loss_b2a", self.g_loss_b2a)
        self.g_loss_sum = tf.compat.v1.summary.scalar("g_loss", self.g_loss)
        self.cycle_loss_sum = tf.compat.v1.summary.scalar(
            "cycle_loss", self.cycle_loss)
        self.g_sum = tf.compat.v1.summary.merge([
            self.g_loss_a2b_sum, self.g_loss_b2a_sum, self.g_loss_sum,
            self.cycle_loss_sum
        ])
        self.db_loss_sum = tf.compat.v1.summary.scalar("db_loss", self.db_loss)
        self.da_loss_sum = tf.compat.v1.summary.scalar("da_loss", self.da_loss)
        self.d_loss_sum = tf.compat.v1.summary.scalar("d_loss", self.d_loss)
        self.db_loss_real_sum = tf.compat.v1.summary.scalar(
            "db_loss_real", self.db_loss_real)
        self.db_loss_fake_sum = tf.compat.v1.summary.scalar(
            "db_loss_fake", self.db_loss_fake)
        self.da_loss_real_sum = tf.compat.v1.summary.scalar(
            "da_loss_real", self.da_loss_real)
        self.da_loss_fake_sum = tf.compat.v1.summary.scalar(
            "da_loss_fake", self.da_loss_fake)
        if self.model != 'base':
            self.d_all_loss_sum = tf.compat.v1.summary.scalar(
                "d_all_loss", self.d_all_loss)
            self.D_loss_sum = tf.compat.v1.summary.scalar(
                "D_loss", self.d_loss)
            self.d_sum = tf.compat.v1.summary.merge([
                self.da_loss_sum, self.da_loss_real_sum, self.da_loss_fake_sum,
                self.db_loss_sum, self.db_loss_real_sum, self.db_loss_fake_sum,
                self.d_loss_sum, self.d_all_loss_sum, self.D_loss_sum
            ])
        else:
            self.d_sum = tf.compat.v1.summary.merge([
                self.da_loss_sum, self.da_loss_real_sum, self.da_loss_fake_sum,
                self.db_loss_sum, self.db_loss_real_sum, self.db_loss_fake_sum,
                self.d_loss_sum
            ])

        # Test
        self.test_A = tf.compat.v1.placeholder(
            tf.float32,
            [None, self.time_step, self.pitch_range, self.input_c_dim],
            name='test_A')
        self.test_B = tf.compat.v1.placeholder(
            tf.float32,
            [None, self.time_step, self.pitch_range, self.output_c_dim],
            name='test_B')
        # A - B - A
        self.testB = self.generator(self.test_A,
                                    self.options,
                                    True,
                                    name="generatorA2B")
        self.testA_ = self.generator(self.testB,
                                     self.options,
                                     True,
                                     name='generatorB2A')
        # B - A - B
        self.testA = self.generator(self.test_B,
                                    self.options,
                                    True,
                                    name="generatorB2A")
        self.testB_ = self.generator(self.testA,
                                     self.options,
                                     True,
                                     name='generatorA2B')
        # to binary
        self.test_A_binary = to_binary(self.test_A, 0.5)
        self.test_B_binary = to_binary(self.test_B, 0.5)
        self.testA_binary = to_binary(self.testA, 0.5)
        self.testB_binary = to_binary(self.testB, 0.5)
        self.testA__binary = to_binary(self.testA_, 0.5)
        self.testB__binary = to_binary(self.testB_, 0.5)

        t_vars = tf.compat.v1.trainable_variables()
        self.d_vars = [var for var in t_vars if 'discriminator' in var.name]
        self.g_vars = [var for var in t_vars if 'generator' in var.name]
        for var in t_vars:
            print(var.name)
Beispiel #53
0
    def _apply_transposed(self, is_train, x):
        w_init = get_keras_initialization(self.w_init)
        r_init = None if self.recurrent_init is None else get_keras_initialization(
            self.recurrent_init)
        x_size = x.shape.as_list()[-1]
        if x_size is None:
            raise ValueError("Last dimension must be defined (have shape %s)" %
                             str(x.shape))

        if self._kind == "GRU":
            cell = cudnn_rnn_ops.CudnnGRU(self.n_layers,
                                          self.n_units,
                                          x_size,
                                          input_mode="linear_input")
        elif self._kind == "LSTM":
            cell = cudnn_rnn_ops.CudnnLSTM(self.n_layers,
                                           self.n_units,
                                           x_size,
                                           input_mode="linear_input")
        else:
            raise ValueError()

        n_params = cell.params_size().eval()
        weights, biases = cell.params_to_canonical(tf.zeros([n_params]))

        def init(shape, dtype=None, partition_info=None):
            # This a bit hacky, since the api for these models is akward. We have to compute the shape of
            # the weights / biases by calling `cell.params_to_canonical` with a unused tensor, and then
            # use .eval() to actually get the shape. Then we can apply the user-requested initialzers
            if self._kind == "LSTM":
                is_recurrent = [
                    False, False, False, False, True, True, True, True
                ]
                is_forget_bias = [
                    False, True, False, False, False, True, False, False
                ]
            else:
                is_recurrent = [False, False, False, True, True, True]
                is_forget_bias = [False] * 6

            init_biases = [
                tf.constant(self.lstm_bias / 2.0, tf.float32,
                            (self.n_units, )) if z else tf.zeros(self.n_units)
                for z in is_forget_bias
            ]
            init_weights = []

            for w, r in zip(weights, is_recurrent):
                if r and r_init is not None:
                    init_weights.append(
                        tf.reshape(
                            r_init((self.n_units, self.n_units), w.dtype),
                            tf.shape(w)))
                else:
                    init_weights.append(w_init(tf.shape(w).eval(), w.dtype))
            out = cell.canonical_to_params(init_weights, init_biases)
            out.set_shape((n_params, ))

            return out

        parameters = tf.get_variable("gru_parameters",
                                     n_params,
                                     tf.float32,
                                     initializer=init)

        if self.keep_recurrent < 1:
            # Not super well test, try to figure out which indices in `parameters` are recurrent weights and drop them
            # this is implementing drop-connect for the recurrent weights
            is_recurrent = weights[:len(weights) // 2] + [
                tf.ones_like(w) for w in weights[len(weights) // 2:]
            ]
            recurrent_mask = cell.canonical_to_params(
                is_recurrent, biases)  # ones at recurrent weights
            recurrent_mask = 1 - recurrent_mask * (
                1 - self.keep_recurrent
            )  # ones are non-recurrent param, keep_prob elsewhere
            parameters = tf.cond(
                is_train, lambda: tf.floor(
                    tf.random_uniform(
                        (n_params, )) + recurrent_mask) * parameters,
                lambda: parameters)

        if self._kind == "LSTM":
            if self.learn_initial_states:
                raise NotImplementedError()
            else:
                initial_state_h = tf.zeros(
                    (self.n_layers, tf.shape(x)[1], self.n_units), tf.float32)
                initial_state_c = tf.zeros(
                    (self.n_layers, tf.shape(x)[1], self.n_units), tf.float32)
            out = cell(x, initial_state_h, initial_state_c, parameters, True)
        else:
            if self.learn_initial_states:
                initial_state = tf.get_variable("initial_state", self.n_units,
                                                tf.float32,
                                                tf.zeros_initializer())
                initial_state = tf.tile(
                    tf.expand_dims(tf.expand_dims(initial_state, 0), 0),
                    [self.n_layers, tf.shape(x)[1], 1])
            else:
                initial_state = tf.zeros(
                    (self.n_layers, tf.shape(x)[1], self.n_units), tf.float32)
            out = cell(x, initial_state, parameters, True)
        return out
Beispiel #54
0
def main(layers):

    np.random.seed(seed)
    tf.set_random_seed(seed)

    # We load the original dataset
    data = np.load(original_file)

    # =========================================================================
    #  Parameters of the complete system
    # =========================================================================

    # We obtain the features and the targets

    X = data[ :, range(data.shape[ 1 ] - 1) ]
    y = data[ :, data.shape[ 1 ] - 1 ]

    data_size = X.shape[ 0 ]
    size_train = data_size - n_size_test
    total_training_data = size_train

    X_train = X[0 : size_train, : ]
    y_train = np.vstack(y[ 0 : size_train ])
    X_test = X[size_train : data_size, : ]
    y_test = np.vstack(y[ size_train : data_size ])

    # Normalize the values

    meanXTrain = np.mean(X_train, axis = 0)
    stdXTrain = np.std(X_train, axis = 0)

    meanyTrain = np.mean(y_train)
    stdyTrain = np.std(y_train)

    X_train = (X_train - meanXTrain) / stdXTrain
    y_train = (y_train - meanyTrain) / stdyTrain
    X_test = (X_test - meanXTrain) / stdXTrain

    std_targets = stdyTrain
    mean_targets = meanyTrain

    # Create the model

    dim_data = X_train.shape[ 1 ]

    # Placeholders for data and number of samples

    x = tf.placeholder(tf.float32, [ None, dim_data ])
    y_ = tf.placeholder(tf.float32, [ None, 1 ])
    n_samples = tf.placeholder(tf.int32, [ 1 ])[ 0 ]

    n_layers_nn = n_layers_gen = n_layers_disc = layers

    if n_layers_nn == 2:
        total_weights = n_units_nn * (dim_data + n_units_nn) + n_units_nn # Number of weights for the 2 hidden layers case
    else:
        total_weights = (dim_data + 1) * n_units_nn  # Total number of weights used

    generator = create_generator(n_units_gen, noise_comps_gen, total_weights, n_layers_gen)
    discriminator = create_discriminator(n_units_disc, total_weights, n_layers_disc)
    main_NN = create_main_NN(n_units_nn, n_layers_nn)

    weights = compute_output_generator(generator, tf.shape(x)[ 0 ], n_samples, noise_comps_gen)

    # Obtain the moments of the weights and pass the values through the disc

    mean_w , var_w = tf.nn.moments(weights, axes = [0, 1])

    mean_w = tf.stop_gradient(mean_w)
    var_w = tf.stop_gradient(var_w)

    # Normalize real weights

    norm_weights = (weights - mean_w) / tf.sqrt(var_w)

    # Generate samples of a normal distribution with the moments of the weights

    w_gaussian = tf.random_normal(shape = tf.shape(weights), mean = 0, stddev = 1, seed = seed)

    # Obtain the T(z,x) for the real and the sampled weights

    T_real = compute_output_discriminator(discriminator, norm_weights, layers)
    T_sampled = compute_output_discriminator(discriminator, w_gaussian, layers)

    # Calculate the cross entropy loss for the discriminator

    d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=T_real, labels=tf.ones_like(T_real)))
    d_loss_sampled = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=T_sampled, labels=tf.zeros_like(T_sampled)))

    cross_entropy_per_point = (d_loss_real + d_loss_sampled) / 2.0

    # Obtain the KL and ELBO

    logr = -0.5 * tf.reduce_sum(norm_weights**2 + tf.log(var_w) + np.log(2.0 * np.pi), [ 2 ])
    logz = -0.5 * tf.reduce_sum((weights)**2 / tf.exp(main_NN['log_vars_prior']) + main_NN['log_vars_prior'] + np.log(2.0 * np.pi), [ 2 ])
    KL = T_real + logr - logz

    res_train, error, log_prob_data, results_mean, results_std = compute_outputs_main_NN(main_NN, x, y_, mean_targets, std_targets, weights, \
        n_samples, dim_data)

    # Make the estimates of the ELBO for the primary classifier

    ELBO = (tf.reduce_sum(res_train) - tf.reduce_mean(KL) * tf.cast(tf.shape(x)[ 0 ], tf.float32) / \
        tf.cast(total_training_data, tf.float32)) * tf.cast(total_training_data, tf.float32) / tf.cast(tf.shape(x)[ 0 ], tf.float32)

    neg_ELBO = -ELBO
    main_loss = neg_ELBO
    mean_ELBO = ELBO

    # KL y res_train have shape batch_size x n_samples

    mean_KL = tf.reduce_mean(KL)

    # Create the variable lists to be updated

    vars_primal = get_variables_generator(generator) + get_variables_main_NN(main_NN)
    vars_dual = get_variables_discriminator(discriminator)

    train_step_primal = tf.train.AdamOptimizer(primal_rate).minimize(main_loss, var_list = vars_primal)
    train_step_dual = tf.train.AdamOptimizer(dual_rate).minimize(cross_entropy_per_point, var_list = vars_dual)

    config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, \
        allow_soft_placement=True, device_count = {'CPU': 1})

    with tf.Session(config = config) as sess:

        sess.run(tf.global_variables_initializer())

        total_ini = time.time()

        # Change the value of alpha to begin exploring using the second value given

        for epoch in range(n_epochs):

            L = 0.0
            ce_estimate = 0.0
            kl = 0.0

            for i_batch in range(int(np.ceil(size_train / n_batch))):

                ini = time.clock()
                ini_ref = time.time()
                ini_train = time.clock()

                last_point = np.minimum(n_batch * (i_batch + 1), size_train)

                batch = [ X_train[ i_batch * n_batch : last_point, : ] , y_train[ i_batch * n_batch : last_point, ] ]

                sess.run(train_step_dual, feed_dict={x: batch[ 0 ], y_: batch[ 1 ], n_samples: n_samples_train})
                sess.run(train_step_primal, feed_dict={x: batch[ 0 ], y_: batch[ 1 ], n_samples: n_samples_train})

                L_tmp, kl_tmp, ce_estimate_tmp = sess.run([ mean_ELBO, mean_KL, cross_entropy_per_point] , feed_dict={x: batch[ 0 ], y_: batch[ 1 ], n_samples: n_samples_train})
                L += L_tmp
                kl += kl_tmp
                ce_estimate += ce_estimate_tmp

                fini_train = time.clock()

                if i_batch % n_batches_to_report == 0 and not (i_batch == 0 and epoch == 0):

                    sys.stdout.write('\n')
                    ini_test = time.time()

                    ###################################################
                    # CRPS by the ensemble method for each test value #
                    ###################################################

                    # crps_raw = np.empty(len(labels))
                    # for i in range(len(labels)): crps_raw[i] = crps_ensemble(labels[i,0], results[i,:])
                    # mean_crps_ensemble = np.mean(crps_raw)

                    # np.savetxt('results_AADM/' + str(alpha) + 'raw_CRPS_' + str(split) + ".txt", crps_raw)
                    # np.savetxt('results_AADM/' + str(alpha) + 'mean_CRPS_' + str(split) + ".txt", [ mean_crps ])

                    ###########################################
                    # Exact CRPS for the mixture of gaussians #
                    ###########################################


                    # np.savetxt('results_AADM/' + str(alpha) + 'raw_exact_CRPS_' + str(split) + ".txt", crps_exact)
                    # np.savetxt('results_AADM/' + str(alpha) + 'mean_exact_CRPS_' + str(split) + ".txt", [ mean_crps_exact ])

                    # We do the test evaluation for the error metrics
                    SE = 0.0
                    LL  = 0.0
                    mean_crps_exact = 0.0
                    n_batches_to_process = int(np.ceil(X_test.shape[ 0 ] / n_batch))
                    for i in range(n_batches_to_process):

                        last_point = np.minimum(n_batch * (i + 1), X_test.shape[ 0 ])

                        batch = [ X_test[ i * n_batch : last_point, : ] , y_test[ i * n_batch : last_point, ] ]

                        SE_tmp, LL_tmp, labels, res_mean, res_std = sess.run([ error, log_prob_data, y_, results_mean, results_std ], \
                            feed_dict={x: batch[0], y_: batch[1], n_samples: n_samples_test})

                        SE += SE_tmp
                        LL += LL_tmp

                        # Exact CRPS
                        shape_quad = res_mean.shape

                        res_var = res_std ** 2
                        crps_exact = np.empty([ shape_quad[0] ])

                        for i in range(shape_quad[0]):
                            means_vec = res_mean[i, :]
                            vars_vec = res_var[i, :]

                            means_diff = np.empty([shape_quad[1], shape_quad[1]])
                            vars_sum = np.empty([shape_quad[1], shape_quad[1]])
                            ru, cu = np.triu_indices(means_vec.size,1)
                            rl, cl = np.tril_indices(means_vec.size,1)

                            means_diff[ru, cu] = means_vec[ru] - means_vec[cu]
                            means_diff[rl, cl] = means_vec[rl] - means_vec[cl]
                            vars_sum[ru, cu] = vars_vec[ru] + vars_vec[cu]
                            vars_sum[rl, cl] = vars_vec[rl] + vars_vec[cl]

                            # Term only depending on the means and vars
                            fixed_term = 1 / 2 * np.mean(aux_crps(means_diff, vars_sum))

                            # Term that depends on the real value of the data
                            dev_mean = labels[i, 0] - means_vec
                            data_term = np.mean(aux_crps(dev_mean, vars_vec))

                            crps_exact[i] = data_term - fixed_term

                        mean_crps_exact += np.mean(crps_exact)

                    RMSE = np.sqrt(SE / float(X_test.shape[ 0 ]))
                    TestLL = (LL / float(X_test.shape[ 0 ]))
                    mean_CRPS = (mean_crps_exact / float(X_test.shape[ 0 ]) )

                    fini_test = time.time()
                    fini = time.clock()
                    fini_ref = time.time()
                    total_fini = time.time()

                    with open("results_AADM_airlines/res_avb_airlines.txt", "a") as res_file:
                        string = ('AVB batch %g datetime %s epoch %d ELBO %g CROSS-ENT %g KL %g real_time %g cpu_time %g ' + \
                            'train_time %g test_time %g total_time %g LL %g RMSE %g CRPS_exact %g ') % (i_batch, str(datetime.now()), epoch, \
                            L / n_batches_to_report, ce_estimate / n_batches_to_report, kl / n_batches_to_report, (fini_ref - \
                            ini_ref), (fini - ini), (fini_train - ini_train), (fini_test - ini_test), (total_fini - total_ini), TestLL, \
                            RMSE, mean_CRPS)
                        res_file.write(string + "\n")
                        print(string)
                        sys.stdout.flush()

                    L = 0.0
                    ce_estimate = 0.0
                    kl = 0.0
def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]

    cell_x = tf.to_float(
        tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]),
                   (1, GRID_H, GRID_W, 1, 1)))
    cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4))

    cell_grid = tf.tile(tf.concat([cell_x, cell_y], -1),
                        [BATCH_SIZE, 1, 1, 5, 1])

    coord_mask = tf.zeros(mask_shape)
    conf_mask = tf.zeros(mask_shape)
    class_mask = tf.zeros(mask_shape)

    seen = tf.Variable(0.)

    total_AP = tf.Variable(0.)
    """
    Adjust prediction
    """
    ### adjust x and y
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid

    ### adjust w and h
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(
        ANCHORS, [1, 1, 1, BOX, 2])

    ### adjust confidence
    pred_box_conf = tf.sigmoid(y_pred[..., 4])

    ### adjust class probabilities
    pred_box_class = y_pred[..., 5:]
    """
    Adjust ground truth
    """
    ### adjust x and y
    true_box_xy = y_true[..., 0:2]  # relative position to the containing cell

    ### adjust w and h
    true_box_wh = y_true[
        ..., 2:4]  # number of cells accross, horizontally and vertically

    ### adjust confidence
    true_wh_half = true_box_wh / 2.
    true_mins = true_box_xy - true_wh_half
    true_maxes = true_box_xy + true_wh_half

    pred_wh_half = pred_box_wh / 2.
    pred_mins = pred_box_xy - pred_wh_half
    pred_maxes = pred_box_xy + pred_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_box_wh[..., 0] * true_box_wh[..., 1]
    pred_areas = pred_box_wh[..., 0] * pred_box_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = tf.truediv(intersect_areas, union_areas)

    true_box_conf = iou_scores * y_true[..., 4]

    ### adjust class probabilities
    true_box_class = tf.to_int32(y_true[..., 5])
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE

    ### confidence mask: penelize predictors + penalize boxes with low IOU
    # penalize the confidence of the boxes, which have IOU with some ground truth box < 0.6
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    pred_xy = tf.expand_dims(pred_box_xy, 4)
    pred_wh = tf.expand_dims(pred_box_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    intersect_mins = tf.maximum(pred_mins, true_mins)
    intersect_maxes = tf.minimum(pred_maxes, true_maxes)
    intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    true_areas = true_wh[..., 0] * true_wh[..., 1]
    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = tf.truediv(intersect_areas, union_areas)

    best_ious = tf.reduce_max(iou_scores, axis=4)
    conf_mask = conf_mask + tf.to_float(
        best_ious < 0.6) * (1 - y_true[..., 4]) * NO_OBJECT_SCALE

    # penalize the confidence of the boxes, which are reponsible for corresponding ground truth box
    conf_mask = conf_mask + y_true[..., 4] * OBJECT_SCALE

    ### class mask: simply the position of the ground truth boxes (the predictors)
    class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS,
                                            true_box_class) * CLASS_SCALE
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE / 2.)
    seen = tf.assign_add(seen, 1.)

    true_box_xy, true_box_wh, coord_mask = tf.cond(
        tf.less(seen, WARM_UP_BATCHES), lambda: [
            true_box_xy + (0.5 + cell_grid) * no_boxes_mask,
            true_box_wh + tf.ones_like(true_box_wh) * np.reshape(
                ANCHORS, [1, 1, 1, BOX, 2]) * no_boxes_mask,
            tf.ones_like(coord_mask)
        ], lambda: [true_box_xy, true_box_wh, coord_mask])
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    nb_conf_box = tf.reduce_sum(tf.to_float(conf_mask > 0.0))
    nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))

    loss_xy = tf.reduce_sum(tf.square(true_box_xy - pred_box_xy) *
                            coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh = tf.reduce_sum(tf.square(true_box_wh - pred_box_wh) *
                            coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf = tf.reduce_sum(
        tf.square(true_box_conf - pred_box_conf) * conf_mask) / (nb_conf_box +
                                                                 1e-6) / 2.
    loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=true_box_class, logits=pred_box_class)
    loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)

    loss = loss_xy + loss_wh + loss_conf + loss_class

    # nb_true_box = tf.reduce_sum(y_true[..., 4])
    # nb_pred_box = tf.reduce_sum(tf.to_float(true_box_conf > 0.5))

    # total_AP = tf.assign_add(total_AP, nb_pred_box/nb_true_box)

    # loss = tf.Print(loss, [loss_xy, loss_wh, loss_conf, loss_class, loss, total_AP/seen], message='DEBUG', summarize=1000)

    return loss
        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.

      Args:
        ids: Current decoded sequences. int tensor with shape [batch_size *
          beam_size, i + 1].
        i: Loop index.
        cache: dictionary of values storing the encoder output, encoder-decoder
          attention bias, and previous decoder attention values.

      Returns:
        Tuple of
          (logits with shape [batch_size * beam_size, vocab_size],
           updated cache values)
      """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            # decoder_input = self.embedding_softmax_layer(decoder_input)
            source_decoder_input = decoder_input
            decoder_input = self.embedding_lookup(decoder_input)
            embedding_mask = tf.cast(tf.not_equal(source_decoder_input, 0),
                                     self.embedding_lookup.embeddings.dtype)
            decoder_input *= tf.expand_dims(embedding_mask, -1)

            if self._padded_decode:
                timing_signal_shape = timing_signal.shape.as_list()
                decoder_input += tf.slice(timing_signal, [i, 0],
                                          [1, timing_signal_shape[1]])

                bias_shape = decoder_self_attention_bias.shape.as_list()
                self_attention_bias = tf.slice(
                    decoder_self_attention_bias, [0, 0, i, 0],
                    [bias_shape[0], bias_shape[1], 1, bias_shape[3]])
            else:
                decoder_input += timing_signal[i:i + 1]

                self_attention_bias = decoder_self_attention_bias[:, :, i:i +
                                                                  1, :i + 1]

            decoder_shape = tf_utils.get_shape_list(decoder_input,
                                                    expected_rank=3)
            batch_size = decoder_shape[0]
            decoder_length = decoder_shape[1]

            attention_bias = cache.get("encoder_decoder_attention_bias")
            attention_bias = tf.where(attention_bias < 0,
                                      tf.zeros_like(attention_bias),
                                      tf.ones_like(attention_bias))
            attention_bias = tf.squeeze(attention_bias, axis=[1])
            attention_mask = tf.tile(attention_bias, [1, decoder_length, 1])

            self_attention_bias = tf.where(self_attention_bias < 0,
                                           tf.zeros_like(self_attention_bias),
                                           tf.ones_like(self_attention_bias))
            self_attention_bias = tf.squeeze(self_attention_bias, axis=[1])
            self_attention_mask = tf.tile(self_attention_bias,
                                          [batch_size, 1, 1])

            decoder_outputs = self.decoder_layer(
                decoder_input,
                cache.get("encoder_outputs"),
                memory_mask=self_attention_mask,
                target_mask=attention_mask,
                cache=cache,
                decode_loop_step=i if self._padded_decode else None)

            logits = self._embedding_linear(self.embedding_lookup.embeddings,
                                            decoder_outputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache
Beispiel #57
0
print("---------00000000000000000-----")
# use 2.0.8 keara, otherwise, get_session() will fail due to graph is empty. don't know why.
session = keras.backend.get_session()

print("---------1111111111111111111111111111--------")
print(device_A + ",,,,,,," + device_B)
with tf.device(device_A):

    ipa = tf.placeholder(dtype=tf.float32, shape=(None, 1))
    ip1 = tf.placeholder(dtype=tf.float32, shape=(None, None, None, 1))
    ip3 = tf.placeholder(dtype=tf.float32, shape=(None, None, None, 3))
    ip4 = tf.placeholder(dtype=tf.float32, shape=(None, None, None, 4))
    ip3x = tf.placeholder(dtype=tf.float32, shape=(None, None, None, 3))

    baby = load_model('baby.net')
    baby_place = tf.concat([- 512 * tf.ones_like(ip4[:, :, :, 3:4]), 128 * tf.ones_like(ip4[:, :, :, 3:4]), 128 * tf.ones_like(ip4[:, :, :, 3:4])], axis=3)
    baby_yuv = RGB2YUV(ip4[:, :, :, 0:3])
    baby_alpha = tf.where(x=tf.zeros_like(ip4[:, :, :, 3:4]), y=tf.ones_like(ip4[:, :, :, 3:4]), condition=tf.less(ip4[:, :, :, 3:4], 128))
    baby_hint = baby_alpha * baby_yuv + (1 - baby_alpha) * baby_place
    baby_op = YUV2RGB(baby(tf.concat([ip1, baby_hint], axis=3)))
    baby_finder = tf.add(baby_op, baby_op, name="baby_finder")

    girder = load_model('girder.net')
    gird_op = (1 - girder([1 - ip1 / 255.0, ip4, 1 - ip3 / 255.0])) * 255.0
    gird_finder = tf.add(gird_op, gird_op, name="gird_finder")

    reader = load_model('reader.net')
    features = reader(ip3 / 255.0)
    featuresx = reader(ip3x / 255.0)

    head = load_model('head.net')
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise
    """
    if FLAGS.use_vacab and os.path.exists("./vocab.txt"):
        bk_tree = BKTree(levenshtein, list_words('./vocab.txt'))
        # bk_tree = bktree.Tree()
    """
    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        input_feature_map = tf.placeholder(tf.float32,
                                           shape=[None, None, None, 32],
                                           name='input_feature_map')
        input_transform_matrix = tf.placeholder(tf.float32,
                                                shape=[None, 6],
                                                name='input_transform_matrix')
        input_box_mask = []
        input_box_mask.append(tf.placeholder(tf.int32,
                                             shape=[None],
                                             name='input_box_masks_0'))
        input_box_widths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name='input_box_widths')

        input_seq_len = input_box_widths[tf.argmax(input_box_widths, 0)] * tf.ones_like(input_box_widths)
        global_step = tf.get_variable('global_step',
                                      [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        shared_feature, f_score, f_geometry = detect_part.model(input_images)
        pad_rois = roi_rotate_part.roi_rotate_tensor_pad(input_feature_map,
                                                         input_transform_matrix,
                                                         input_box_mask,
                                                         input_box_widths)
        recognition_logits = recognize_part.build_graph(pad_rois,
                                                        input_box_widths)
        _, dense_decode = recognize_part.decode(recognition_logits,
                                                input_box_widths)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path,
                                      os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()

            df = pd.DataFrame()
            for iteration, im_fn in enumerate(im_fn_list):                
                if iteration%50 == 0:  print(iteration,os.path.basename(im_fn).split('.')[0])
                else :  print(iteration,os.path.basename(im_fn).split('.')[0][-6:], end =" ")
                
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                # im_resized_d, (ratio_h_d, ratio_w_d) = resize_image_detection(im)

                timer = {'detect': 0, 'restore': 0, 'nms': 0, 'recog': 0}
                start = time.time()
                shared_feature_map, score, geometry = sess.run([shared_feature,
                                                                f_score,
                                                                f_geometry],
                                                               feed_dict={input_images: [im_resized]})

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                timer['detect'] = time.time() - start
                start = time.time() # reset for recognition
                if boxes is not None and boxes.shape[0] != 0:
                    res_file_path = os.path.join(FLAGS.output_dir,
                                                 'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    input_roi_boxes = boxes[:, :8].reshape(-1, 8)
                    recog_decode_list = []
                    # Here avoid too many text area leading to OOM
                    for batch_index in range(input_roi_boxes.shape[0] // 32 + 1): # test roi batch size is 32
                        start_slice_index = batch_index * 32
                        end_slice_index = (batch_index + 1) * 32 if input_roi_boxes.shape[0] >= (batch_index + 1) * 32 else input_roi_boxes.shape[0]
                        tmp_roi_boxes = input_roi_boxes[start_slice_index:end_slice_index]

                        boxes_masks = [0] * tmp_roi_boxes.shape[0]
                        transform_matrixes, box_widths = get_project_matrix_and_width(tmp_roi_boxes)
                        # max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len

                        # Run end to end
                        recog_decode = sess.run(dense_decode,
                                                feed_dict={input_feature_map: shared_feature_map,
                                                           input_transform_matrix: transform_matrixes,
                                                           input_box_mask[0]: boxes_masks,
                                                           input_box_widths: box_widths})
                        recog_decode_list.extend([r for r in recog_decode])

                    timer['recog'] = time.time() - start
                    # Preparing for draw boxes
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                    if len(recog_decode_list) != boxes.shape[0]:
                        print("detection and recognition result are not equal!")
                        exit(-1)

                    with open(res_file_path, 'w') as f:
                        for i, box in enumerate(boxes):
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                                continue
                            recognition_result = ground_truth_to_word(recog_decode_list[i])
                            """
                            if FLAGS.use_vacab:
                                fix_result = bktree_search(bk_tree, recognition_result.upper())
                                if len(fix_result) != 0:
                                    recognition_result = fix_result[0][1]
			    """
                            f.write('{},{},{},{},{},{},{},{},{},{}\r\n'.format(
                                #im_fn,
                                os.path.basename(im_fn).split('.')[0][-6:],
                                box[0, 0],
                                box[0, 1],
                                box[1, 0],
                                box[1, 1],
                                box[2, 0],
                                box[2, 1],
                                box[3, 0],
                                box[3, 1],
                                recognition_result
                            ))
                            df = df.append({'f_name': os.path.basename(im_fn).split('.')[0][-6:],
                                            'box_0_0': box[0, 0],
                                            'box_0_1': box[0, 1],
                                            'box_1_0': box[1, 0],
                                            'box_1_1': box[1, 1],
                                            'box_2_0': box[2, 0],
                                            'box_2_1': box[2, 1],
                                            'box_3_0': box[3, 0],
                                            'box_3_1': box[3, 1],
                                            'rec_txt': recognition_result},
                                             ignore_index=True)
                            df.to_csv('/content/file1.csv') 
                            #print(box.astype(np.int32).reshape((-1, 1, 2)))

                            # Draw bounding box
                            cv2.polylines(im[:, :, ::-1],
                                          [box.astype(np.int32).reshape((-1, 1, 2))],
                                          True,
                                          color=(255, 255, 0),
                                          thickness=1)
                            # Draw recognition results area
                            text_area = box.copy()
                            text_area[2, 1] = text_area[1, 1]
                            text_area[3, 1] = text_area[0, 1]
                            text_area[0, 1] = text_area[0, 1] - 15
                            text_area[1, 1] = text_area[1, 1] - 15
                            cv2.fillPoly(im[:, :, ::-1],
                                         [text_area.astype(np.int32).reshape((-1, 1, 2))],
                                         color=(0, 255, 0))
                            im_txt = cv2.putText(im[:, :, ::-1],
                                                 recognition_result,
                                                 (box[0, 0], box[0, 1]),
                                                 font,
                                                 0.5,
                                                 (0, 0, 255),
                                                 1)
                else:
                    res_file = os.path.join(FLAGS.output_dir,
                                            'res_' + '{}.txt'.format(
                                                os.path.basename(im_fn).split('.')[0]))
                    f = open(res_file, "w")
                    im_txt = None
                    f.close()

                #print('{} : detect {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms, recog {:.0f}ms'.format(
                #    im_fn,
                #    timer['detect']*1000,
                #    timer['restore']*1000,
                #    timer['nms']*1000,
                #    timer['recog']*1000))

                duration = time.time() - start_time
                #print('[timing] {}'.format(duration))

                if FLAGS.write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
Beispiel #59
0
def multihead_attention(queries,
                        keys,
                        num_units=None,
                        num_heads=8,
                        dropout=0,
                        causality=False,
                        scope="multihead_attention",
                        reuse=None):
    '''Applies multihead attention.

    Args:
      queries: A 3d tensor with shape of [N, T_q, C_q].
      keys: A 3d tensor with shape of [N, T_k, C_k].
      num_units: A scalar. Attention size.
      dropout_rate: A floating point number.
      is_training: Boolean. Controller of mechanism for dropout.
      causality: Boolean. If true, units that reference the future are masked.
      num_heads: An int. Number of heads.
      scope: Optional scope for `variable_scope`.
      reuse: Boolean, whether to reuse the weights of a previous layer
        by the same name.

    Returns
      A 3d tensor with shape of (N, T_q, C)
    '''
    with tf.variable_scope(scope, reuse=reuse):
        print("use multihead_attention")
        # Set the fall back option for num_units
        if num_units is None:
            num_units = queries.get_shape().as_list()[-1]

        # Linear projections
        Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu)  # (N, T_q, C)
        K = tf.layers.dense(keys, num_units, activation=tf.nn.relu)  # (N, T_k, C)
        V = tf.layers.dense(keys, num_units, activation=tf.nn.relu)  # (N, T_k, C)

        # Split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0)  # (h*N, T_q, C/h)
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0)  # (h*N, T_k, C/h)

        # Multiplication
        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))  # (h*N, T_q, T_k)

        # Scale
        outputs = outputs / (K_.get_shape().as_list()[-1] ** 0.5)

        # Key Masking
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))  # (N, T_k)
        key_masks = tf.tile(key_masks, [num_heads, 1])  # (h*N, T_k)
        key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1])  # (h*N, T_q, T_k)

        paddings = tf.ones_like(outputs) * (-2 ** 32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)  # (h*N, T_q, T_k)

        # Causality = Future blinding
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])  # (T_q, T_k)
            tril = tf.contrib.linalg.LinearOperatorTriL(diag_vals).to_dense()  # (T_q, T_k)
            masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1])  # (h*N, T_q, T_k)

            paddings = tf.ones_like(masks) * (-2 ** 32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)  # (h*N, T_q, T_k)

        # Activation
        outputs = tf.nn.softmax(outputs)  # (h*N, T_q, T_k)

        # Query Masking
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))  # (N, T_q)
        query_masks = tf.tile(query_masks, [num_heads, 1])  # (h*N, T_q)
        query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]])  # (h*N, T_q, T_k)
        outputs *= query_masks  # broadcasting. (N, T_q, C)

        # Dropouts
        outputs = tf.nn.dropout(outputs,dropout)

        # Weighted sum
        outputs = tf.matmul(outputs, V_)  # ( h*N, T_q, C/h)

        # Restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)  # (N, T_q, C)

        # Residual connection
        outputs += queries

        # Normalize
        outputs = normalize(outputs)  # (N, T_q, C)

    return outputs
Beispiel #60
0
def get_losses(d_out_real, d_out_fake, x_real_onehot, x_fake_onehot_appr,
               gen_o, discriminator, config):
    batch_size = config['batch_size']
    gan_type = config['gan_type']

    if gan_type == 'standard':  # the non-satuating GAN loss
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.ones_like(d_out_fake)))

    elif gan_type == 'JS':  # the vanilla GAN loss
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = -d_loss_fake

    elif gan_type == 'KL':  # the GAN loss implicitly minimizing KL-divergence
        d_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real, labels=tf.ones_like(d_out_real)))
        d_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake, labels=tf.zeros_like(d_out_fake)))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(-d_out_fake)

    elif gan_type == 'hinge':  # the hinge loss
        d_loss_real = tf.reduce_mean(tf.nn.relu(1.0 - d_out_real))
        d_loss_fake = tf.reduce_mean(tf.nn.relu(1.0 + d_out_fake))
        d_loss = d_loss_real + d_loss_fake

        g_loss = -tf.reduce_mean(d_out_fake)

    elif gan_type == 'tv':  # the total variation distance
        d_loss = tf.reduce_mean(tf.tanh(d_out_fake) - tf.tanh(d_out_real))
        g_loss = tf.reduce_mean(-tf.tanh(d_out_fake))

    elif gan_type == 'wgan-gp':  # WGAN-GP
        d_loss = tf.reduce_mean(d_out_fake) - tf.reduce_mean(d_out_real)
        GP = gradient_penalty(discriminator, x_real_onehot, x_fake_onehot_appr,
                              config)
        d_loss += GP

        g_loss = -tf.reduce_mean(d_out_fake)

    elif gan_type == 'LS':  # LS-GAN
        d_loss_real = tf.reduce_mean(tf.squared_difference(d_out_real, 1.0))
        d_loss_fake = tf.reduce_mean(tf.square(d_out_fake))
        d_loss = d_loss_real + d_loss_fake

        g_loss = tf.reduce_mean(tf.squared_difference(d_out_fake, 1.0))

    elif gan_type == 'RSGAN':  # relativistic standard GAN
        d_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_real - d_out_fake,
                labels=tf.ones_like(d_out_real)))
        g_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(
                logits=d_out_fake - d_out_real,
                labels=tf.ones_like(d_out_fake)))

    else:
        raise NotImplementedError("Divergence '%s' is not implemented" %
                                  gan_type)

    log_pg = tf.reduce_mean(tf.log(gen_o +
                                   EPS))  # [1], measures the log p_g(x)

    return log_pg, g_loss, d_loss