Example #1
0
    def pretrained_visual_encoder(self, features, hparams):
        # we want the exact hparams used for training this vv
        vae_hparams = trainer_lib.create_hparams(
            hparams.vae_hparam_set,
            hparams.vae_hparams,
            data_dir=hparams.vae_data_dir,
            problem_name=hparams.vae_problem)

        # go back to root variable scope
        with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''),
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            vae = image_vae.ImageVAE(
                vae_hparams,
                mode=self._hparams.mode,
                problem_hparams=vae_hparams.problem_hparams)
            # the real input to vae will be features['rendered_targets']
            vae_features = copy.copy(features)
            vae_features['inputs'] = tf.reshape(
                vae_features['targets_psr'][:, -1, :], [-1, 64, 64, 1])
            vae_features['targets'] = vae_features['inputs']
            # we want vae to return bottleneck
            vae_features['bottleneck'] = tf.zeros((0, 128))
            sampled_bottleneck, _ = vae(vae_features)
            vae.initialize_from_ckpt(hparams.vae_ckpt_dir)

            if tf.executing_eagerly():
                sampled_bottleneck, _ = vae(vae_features)

        return sampled_bottleneck
Example #2
0
        def infer_step(logits_so_far, current_hidden):
            """Inference step of LSTM while loop."""
            # unflatten hidden:
            current_hidden = tuple(tf.nn.rnn_cell.LSTMStateTuple(c=s[0], h=s[1])
                                   for s in current_hidden)

            # put logits_so_far through top
            tm = self._problem_hparams.modality['targets']
            # need to reuse top params
            reset_scope = tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''),
                                            reuse=tf.AUTO_REUSE,
                                            auxiliary_name_scope=False)
            top_scope = tf.variable_scope('svg_decoder/{}_modality'.format(tm),
                                          reuse=tf.AUTO_REUSE)
            with reset_scope, top_scope:
                samples_so_far = self.hparams.top['targets'](
                    logits_so_far, None, self.hparams, self.problem_hparams.vocab_size)
            # append a zero pad to the samples. this effectively shifts the samples
            # right, but, unlike shift_right, by not removing the last element, we
            # allow an empty samples_so_far to not be empty after padding
            samples_so_far = tf.concat([zero_pad, samples_so_far], axis=1)
            shifted_targets = common_layers.flatten4d3d(samples_so_far)
            # now take the very last one here, will be the actual input to the rnn
            shifted_targets = shifted_targets[:, -1:, :]

            # tile and append the bottleneck to inputs
            sln_offset = 0
            if hparams.condition_on_sln:
                sln_offset = 51
            pre_tile_y = tf.reshape(
                bottleneck,
                [common_layers.shape_list(bottleneck)[0], 1,
                 hparams.bottleneck_bits + hparams.num_categories + sln_offset])
            overlay_x = tf.tile(pre_tile_y,
                                [1, common_layers.shape_list(shifted_targets)[1], 1])
            inputs = tf.concat([shifted_targets, overlay_x], -1)

            seq_len_batch = tf.ones([common_layers.shape_list(inputs)[0]])

            # RUN PRE-LSTM LAYER
            with tf.variable_scope('pre_decoder', reuse=tf.AUTO_REUSE):
                inputs = tf.layers.dense(
                    inputs, hparams.hidden_size, name='bottom')
                inputs = tf.nn.tanh(inputs)

            # RUN LSTM
            with tf.variable_scope('lstm_decoder', reuse=tf.AUTO_REUSE):
                next_step, next_state = tf.nn.dynamic_rnn(
                    layers, inputs, seq_len_batch, initial_state=current_hidden,
                    dtype=tf.float32, time_major=False)

            next_step = tf.expand_dims(next_step, [1])

            logits_so_far = tf.concat([logits_so_far, next_step], 1)
            #print('concat success')
            # input()
            # flatten state
            next_state = tuple((s.c, s.h) for s in next_state)

            return logits_so_far, next_state
    def pretrained_visual_encoder(self, features, hparams, train):
        # we want the exact hparams used for training this vv
        vae_hparams = trainer_lib.create_hparams(
            hparams.vae_hparam_set,
            hparams.vae_hparams,
            data_dir=hparams.vae_data_dir,
            problem_name=hparams.vae_problem)

        # go back to root variable scope
        with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''),
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            vae = image_vae_joint.ImageVAE(
                vae_hparams,
                mode=self._hparams.mode,
                problem_hparams=vae_hparams.problem_hparams)

            # source image feat
            vae_features_source = copy.copy(features)
            vae_features_source['inputs'] = tf.reshape(
                vae_features_source['source_psr'][:, -1, :], [-1, 64, 64, 1])
            vae_features_source['targets'] = vae_features_source['inputs']
            vae_features_source['cls'] = vae_features_source['targets_cls']
            # we want vae to return bottleneck
            # vae_features_source['bottleneck'] = tf.zeros((0, 128))
            # we want vae return all
            sampled_bottleneck_source, dec_out_source, losses_source = vae.vae_internal(
                vae_features_source, hparams, train)

            if tf.executing_eagerly():
                sampled_bottleneck_source, dec_out_source, losses_source = vae.vae_internal(
                    vae_features_source, hparams, train)

            # the real input to vae will be features['rendered_targets']
            vae_features_target = copy.copy(features)
            #print('checking shape')
            # print(vae_features_target['targets_psr'])
            # input()
            vae_features_target['inputs'] = tf.reshape(
                vae_features_target['targets_psr'][:, -1, :], [-1, 64, 64, 1])
            vae_features_target['targets'] = vae_features_target['inputs']
            vae_features_target['cls'] = vae_features_target['targets_cls']
            # we want vae to return bottleneck
            # vae_features_target['bottleneck'] = tf.zeros((0, 128))
            sampled_bottleneck_target, dec_out_target, losses_target = vae.vae_internal(
                vae_features_target, hparams, train)

            if tf.executing_eagerly():
                sampled_bottleneck_target, dec_out_target, losses_target = vae.vae_internal(
                    vae_features_target, hparams, train)

            vae.initialize_from_ckpt(hparams.vae_ckpt_dir)

            vae_losses = {}
            for k in losses_source.keys():
                vae_losses[k] = losses_source[k] + losses_target[k]
        return sampled_bottleneck_target - sampled_bottleneck_source, dec_out_source, dec_out_target, vae_losses
Example #4
0
def build_bidirectional_lstm(layer_sizes, use_cudnn, dropout_keep_prob,
                             residual, is_training, name_or_scope):
    """Build the Tensorflow graph for a bidirectional LSTM."""
    if use_cudnn and residual:
        raise ValueError('Residual connections not supported in cuDNN.')

    if isinstance(name_or_scope, tf.VariableScope):
        name = name_or_scope.name
        reuse = name_or_scope.reuse
    else:
        name = name_or_scope
        reuse = None

    cells_fw = []
    cells_bw = []
    for i, layer_size in enumerate(layer_sizes):
        if use_cudnn:
            cells_fw.append(
                cudnn_lstm_layer([layer_size],
                                 dropout_keep_prob,
                                 is_training,
                                 name_or_scope=tf.VariableScope(
                                     reuse, name +
                                     '/cell_%d/bidirectional_rnn/fw' % i)))
            cells_bw.append(
                cudnn_lstm_layer([layer_size],
                                 dropout_keep_prob,
                                 is_training,
                                 name_or_scope=tf.VariableScope(
                                     reuse, name +
                                     '/cell_%d/bidirectional_rnn/bw' % i)))
        else:
            cells_fw.append(
                rnn_cell([layer_size], dropout_keep_prob, residual,
                         is_training))
            cells_bw.append(
                rnn_cell([layer_size], dropout_keep_prob, residual,
                         is_training))

        return cells_fw, cells_bw
Example #5
0
def maybe_convert_to_variable(tensor):
    """Read value of a tensor from a variable when possible.

  This function is intended to make tensors from inside the TPU while loop
  available on the CPU by reading it from the variable to which the tensor was
  written earlier. Note that the read may not reflect any writes that happened
  in the same session.run(), unless control dependencies are added.

  Args:
    tensor: A tf.Tensor.

  Returns:
    A tf.Tensor. If input tensor is an output of reading a ResourceVariable, we
    return an equivalent tensor produced in the current context. Otherwise, we
    return the original input tensor.
  """
    op = tensor.op
    if is_on_cpu() and tensor in var_store:
        return var_store[tensor]
    while op.type == 'Identity':
        assert len(op.inputs) == 1
        op = op.inputs[0].op
    if op.type != 'ReadVariableOp':
        # No need to convert.
        return tensor
    with tf.variable_scope(
            # Reset the scope because variable_name contains all the scopes we need.
            name_or_scope=tf.VariableScope(''),
            # We are looking for a reference to an existing variable, so we want to
            # raise an exception if variable is not found.
            reuse=True,
    ):
        variable_name = get_variable_name(op)
        tf.logging.info('Converting tensor %s --> variable %s', tensor,
                        variable_name)
        try:
            return tf.get_variable(variable_name)
        except ValueError:
            tf.logging.info(
                'Variable %s was not created with tf.get_variable(). '
                'Attempting to find it in GLOBAL_VARIABLES collection.',
                variable_name)
        global_vars = tensor.graph.get_collection(
            tf.GraphKeys.GLOBAL_VARIABLES)
        matched_vars = [
            v for v in global_vars if v.name == variable_name + ':0'
        ]
        if not matched_vars:
            raise ValueError(
                'Variable %s is in GraphDef but not in the live graph.')
        assert len(matched_vars) == 1
        return matched_vars[0]
    def cls_embedding(self, sources_cls, sources_fnt, targets_cls,
                      targets_fnt):
        cls_size = 52
        cls_embedding_size = 16
        fnt_size = 36632
        fnt_embedding_size = 128
        print(common_layers.shape_list(sources_cls))
        print(common_layers.shape_list(sources_fnt))
        print(common_layers.shape_list(targets_cls))
        print(common_layers.shape_list(targets_fnt))

        with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''),
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            W_cls = tf.Variable(
                tf.random.uniform([cls_size, cls_embedding_size], -1.0, 1.0))
            embedded_sources_cls = tf.nn.embedding_lookup(W_cls, sources_cls)
            embedded_sources_cls = tf.squeeze(embedded_sources_cls, 1)
            embedded_targets_cls = tf.nn.embedding_lookup(W_cls, targets_cls)
            embedded_targets_cls = tf.squeeze(embedded_targets_cls, 1)

            W_fnt = tf.Variable(
                tf.random.uniform([fnt_size, fnt_embedding_size], -1.0, 1.0))
            embedded_sources_fnt = tf.nn.embedding_lookup(W_fnt, sources_fnt)
            embedded_sources_fnt = tf.squeeze(embedded_sources_fnt, 1)
            embedded_targets_fnt = tf.nn.embedding_lookup(W_fnt, targets_fnt)
            embedded_targets_fnt = tf.squeeze(embedded_targets_fnt, 1)

            src_cls = tf.layers.dense(embedded_sources_cls,
                                      16,
                                      activation=None)
            # src_cls = tf.nn.relu(src_cls)
            src_fnt = tf.layers.dense(embedded_sources_fnt,
                                      32,
                                      activation=None)
            # src_fnt = tf.nn.relu(src_fnt)
            tgt_cls = tf.layers.dense(embedded_targets_cls,
                                      16,
                                      activation=None)
            # tgt_cls = tf.nn.relu(tgt_cls)
            tgt_fnt = tf.layers.dense(embedded_targets_fnt,
                                      32,
                                      activation=None)
            # tgt_fnt = tf.nn.relu(tgt_fnt)
            emd = tf.concat([src_cls, src_fnt, tgt_cls, tgt_fnt], -1)
            ret = tf.layers.dense(emd, 32, activation='relu')
        return ret
Example #7
0
    def recursive_decode(initial_input, path=None):
      """Recursive hierarchical decode function."""
      path = path or []
      level = len(path)

      if level == num_levels:
        with tf.variable_scope('core_decoder', reuse=tf.AUTO_REUSE):
          return base_decode_fn(initial_input, path)

      scope = tf.VariableScope(
          tf.AUTO_REUSE, 'decoder/hierarchical_level_%d' % level)
      num_steps = self._level_lengths[level]
      with tf.variable_scope(scope):
        state = lstm_utils.initial_cell_state_from_embedding(
            self._hier_cells[level], initial_input, name='initial_state')
      if level not in self._disable_autoregression:
        # The initial input should be the same size as the tensors returned by
        # next level.
        if self._hierarchical_encoder:
          input_size = self._hierarchical_encoder.level(0).output_depth
        elif level == num_levels - 1:
          input_size = sum(tf.nest.flatten(self._core_decoder.state_size))
        else:
          input_size = sum(
              tf.nest.flatten(self._hier_cells[level + 1].state_size))
        next_input = tf.zeros([batch_size, input_size])
      lower_level_embeddings = []
      for i in range(num_steps):
        if level in self._disable_autoregression:
          next_input = tf.zeros([batch_size, 1])
        else:
          next_input = tf.concat([next_input, initial_input], axis=1)
        with tf.variable_scope(scope):
          output, state = self._hier_cells[level](next_input, state, scope)
        next_input = recursive_decode(output, path + [i])
        lower_level_embeddings.append(next_input)
      if self._hierarchical_encoder:
        # Return the encoding of the outputs using the appropriate level of the
        # hierarchical encoder.
        enc_level = num_levels - level
        return self._hierarchical_encoder.level(enc_level).encode(
            sequence=tf.stack(lower_level_embeddings, axis=1),
            sequence_length=tf.fill([batch_size], num_steps))
      else:
        # Return the final state.
        return tf.concat(tf.nest.flatten(state), axis=-1)
Example #8
0
 def build(self, hparams, is_training=True):
   self._total_length = hparams.max_seq_len
   if self._total_length != np.prod(self._level_lengths):
     raise ValueError(
         'The product of the HierarchicalLstmEncoder level lengths (%d) must '
         'equal the padded input sequence length (%d).' % (
             np.prod(self._level_lengths), self._total_length))
   tf.logging.info('\nHierarchical Encoder:\n'
                   '  input length: %d\n'
                   '  level lengths: %s\n',
                   self._total_length,
                   self._level_lengths)
   self._hierarchical_encoders = []
   num_splits = int(np.prod(self._level_lengths))
   for i, l in enumerate(self._level_lengths):
     num_splits //= l
     tf.logging.info('Level %d splits: %d', i, num_splits)
     h_encoder = self._core_encoder_cls()
     h_encoder.build(
         hparams, is_training,
         name_or_scope=tf.VariableScope(
             tf.AUTO_REUSE, 'encoder/hierarchical_level_%d' % i))
     self._hierarchical_encoders.append((num_splits, h_encoder))
    def vis_encoder(self, sources_psr, targets_psr, targets_cls):
        base_depth = 32
        num_categories = 52
        bottleneck_bits = 32
        sources_psr = tf.reshape(sources_psr, [-1, 64, 64, 1])
        targets_psr = tf.reshape(targets_psr, [-1, 64, 64, 1])
        with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''),
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            ret = targets_psr
            clss = targets_cls
            clss = tf.reshape(clss, [-1])

            # conv layer, followed by instance norm + FiLM
            ret = tf.layers.Conv2D(base_depth,
                                   5,
                                   1,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            ret = tf.layers.Conv2D(base_depth,
                                   5,
                                   2,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            ret = tf.layers.Conv2D(2 * base_depth,
                                   5,
                                   1,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            ret = tf.layers.Conv2D(2 * base_depth,
                                   5,
                                   2,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            # new conv layer, to bring shape down
            ret = tf.layers.Conv2D(2 * bottleneck_bits,
                                   4,
                                   2,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            # new conv layer, to bring shape down
            ret = tf.layers.Conv2D(2 * bottleneck_bits,
                                   4,
                                   2,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)
            # new conv layer, to bring shape down
            ret = tf.layers.Conv2D(2 * bottleneck_bits,
                                   4,
                                   2,
                                   padding='SAME',
                                   activation=None)(ret)
            ret = ops.conditional_instance_norm(ret, clss, num_categories)
            ret = tf.nn.relu(ret)

            # ret has 1024
            ret = tf.layers.flatten(ret)
            ret = tf.layers.dense(ret, bottleneck_bits, activation=None)
        return ret
Example #10
0
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
    """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
    return tf.variable_scope(tf.VariableScope(name=scope, **kwargs),
                             auxiliary_name_scope=False)