def pretrained_visual_encoder(self, features, hparams): # we want the exact hparams used for training this vv vae_hparams = trainer_lib.create_hparams( hparams.vae_hparam_set, hparams.vae_hparams, data_dir=hparams.vae_data_dir, problem_name=hparams.vae_problem) # go back to root variable scope with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): vae = image_vae.ImageVAE( vae_hparams, mode=self._hparams.mode, problem_hparams=vae_hparams.problem_hparams) # the real input to vae will be features['rendered_targets'] vae_features = copy.copy(features) vae_features['inputs'] = tf.reshape( vae_features['targets_psr'][:, -1, :], [-1, 64, 64, 1]) vae_features['targets'] = vae_features['inputs'] # we want vae to return bottleneck vae_features['bottleneck'] = tf.zeros((0, 128)) sampled_bottleneck, _ = vae(vae_features) vae.initialize_from_ckpt(hparams.vae_ckpt_dir) if tf.executing_eagerly(): sampled_bottleneck, _ = vae(vae_features) return sampled_bottleneck
def infer_step(logits_so_far, current_hidden): """Inference step of LSTM while loop.""" # unflatten hidden: current_hidden = tuple(tf.nn.rnn_cell.LSTMStateTuple(c=s[0], h=s[1]) for s in current_hidden) # put logits_so_far through top tm = self._problem_hparams.modality['targets'] # need to reuse top params reset_scope = tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False) top_scope = tf.variable_scope('svg_decoder/{}_modality'.format(tm), reuse=tf.AUTO_REUSE) with reset_scope, top_scope: samples_so_far = self.hparams.top['targets']( logits_so_far, None, self.hparams, self.problem_hparams.vocab_size) # append a zero pad to the samples. this effectively shifts the samples # right, but, unlike shift_right, by not removing the last element, we # allow an empty samples_so_far to not be empty after padding samples_so_far = tf.concat([zero_pad, samples_so_far], axis=1) shifted_targets = common_layers.flatten4d3d(samples_so_far) # now take the very last one here, will be the actual input to the rnn shifted_targets = shifted_targets[:, -1:, :] # tile and append the bottleneck to inputs sln_offset = 0 if hparams.condition_on_sln: sln_offset = 51 pre_tile_y = tf.reshape( bottleneck, [common_layers.shape_list(bottleneck)[0], 1, hparams.bottleneck_bits + hparams.num_categories + sln_offset]) overlay_x = tf.tile(pre_tile_y, [1, common_layers.shape_list(shifted_targets)[1], 1]) inputs = tf.concat([shifted_targets, overlay_x], -1) seq_len_batch = tf.ones([common_layers.shape_list(inputs)[0]]) # RUN PRE-LSTM LAYER with tf.variable_scope('pre_decoder', reuse=tf.AUTO_REUSE): inputs = tf.layers.dense( inputs, hparams.hidden_size, name='bottom') inputs = tf.nn.tanh(inputs) # RUN LSTM with tf.variable_scope('lstm_decoder', reuse=tf.AUTO_REUSE): next_step, next_state = tf.nn.dynamic_rnn( layers, inputs, seq_len_batch, initial_state=current_hidden, dtype=tf.float32, time_major=False) next_step = tf.expand_dims(next_step, [1]) logits_so_far = tf.concat([logits_so_far, next_step], 1) #print('concat success') # input() # flatten state next_state = tuple((s.c, s.h) for s in next_state) return logits_so_far, next_state
def pretrained_visual_encoder(self, features, hparams, train): # we want the exact hparams used for training this vv vae_hparams = trainer_lib.create_hparams( hparams.vae_hparam_set, hparams.vae_hparams, data_dir=hparams.vae_data_dir, problem_name=hparams.vae_problem) # go back to root variable scope with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): vae = image_vae_joint.ImageVAE( vae_hparams, mode=self._hparams.mode, problem_hparams=vae_hparams.problem_hparams) # source image feat vae_features_source = copy.copy(features) vae_features_source['inputs'] = tf.reshape( vae_features_source['source_psr'][:, -1, :], [-1, 64, 64, 1]) vae_features_source['targets'] = vae_features_source['inputs'] vae_features_source['cls'] = vae_features_source['targets_cls'] # we want vae to return bottleneck # vae_features_source['bottleneck'] = tf.zeros((0, 128)) # we want vae return all sampled_bottleneck_source, dec_out_source, losses_source = vae.vae_internal( vae_features_source, hparams, train) if tf.executing_eagerly(): sampled_bottleneck_source, dec_out_source, losses_source = vae.vae_internal( vae_features_source, hparams, train) # the real input to vae will be features['rendered_targets'] vae_features_target = copy.copy(features) #print('checking shape') # print(vae_features_target['targets_psr']) # input() vae_features_target['inputs'] = tf.reshape( vae_features_target['targets_psr'][:, -1, :], [-1, 64, 64, 1]) vae_features_target['targets'] = vae_features_target['inputs'] vae_features_target['cls'] = vae_features_target['targets_cls'] # we want vae to return bottleneck # vae_features_target['bottleneck'] = tf.zeros((0, 128)) sampled_bottleneck_target, dec_out_target, losses_target = vae.vae_internal( vae_features_target, hparams, train) if tf.executing_eagerly(): sampled_bottleneck_target, dec_out_target, losses_target = vae.vae_internal( vae_features_target, hparams, train) vae.initialize_from_ckpt(hparams.vae_ckpt_dir) vae_losses = {} for k in losses_source.keys(): vae_losses[k] = losses_source[k] + losses_target[k] return sampled_bottleneck_target - sampled_bottleneck_source, dec_out_source, dec_out_target, vae_losses
def build_bidirectional_lstm(layer_sizes, use_cudnn, dropout_keep_prob, residual, is_training, name_or_scope): """Build the Tensorflow graph for a bidirectional LSTM.""" if use_cudnn and residual: raise ValueError('Residual connections not supported in cuDNN.') if isinstance(name_or_scope, tf.VariableScope): name = name_or_scope.name reuse = name_or_scope.reuse else: name = name_or_scope reuse = None cells_fw = [] cells_bw = [] for i, layer_size in enumerate(layer_sizes): if use_cudnn: cells_fw.append( cudnn_lstm_layer([layer_size], dropout_keep_prob, is_training, name_or_scope=tf.VariableScope( reuse, name + '/cell_%d/bidirectional_rnn/fw' % i))) cells_bw.append( cudnn_lstm_layer([layer_size], dropout_keep_prob, is_training, name_or_scope=tf.VariableScope( reuse, name + '/cell_%d/bidirectional_rnn/bw' % i))) else: cells_fw.append( rnn_cell([layer_size], dropout_keep_prob, residual, is_training)) cells_bw.append( rnn_cell([layer_size], dropout_keep_prob, residual, is_training)) return cells_fw, cells_bw
def maybe_convert_to_variable(tensor): """Read value of a tensor from a variable when possible. This function is intended to make tensors from inside the TPU while loop available on the CPU by reading it from the variable to which the tensor was written earlier. Note that the read may not reflect any writes that happened in the same session.run(), unless control dependencies are added. Args: tensor: A tf.Tensor. Returns: A tf.Tensor. If input tensor is an output of reading a ResourceVariable, we return an equivalent tensor produced in the current context. Otherwise, we return the original input tensor. """ op = tensor.op if is_on_cpu() and tensor in var_store: return var_store[tensor] while op.type == 'Identity': assert len(op.inputs) == 1 op = op.inputs[0].op if op.type != 'ReadVariableOp': # No need to convert. return tensor with tf.variable_scope( # Reset the scope because variable_name contains all the scopes we need. name_or_scope=tf.VariableScope(''), # We are looking for a reference to an existing variable, so we want to # raise an exception if variable is not found. reuse=True, ): variable_name = get_variable_name(op) tf.logging.info('Converting tensor %s --> variable %s', tensor, variable_name) try: return tf.get_variable(variable_name) except ValueError: tf.logging.info( 'Variable %s was not created with tf.get_variable(). ' 'Attempting to find it in GLOBAL_VARIABLES collection.', variable_name) global_vars = tensor.graph.get_collection( tf.GraphKeys.GLOBAL_VARIABLES) matched_vars = [ v for v in global_vars if v.name == variable_name + ':0' ] if not matched_vars: raise ValueError( 'Variable %s is in GraphDef but not in the live graph.') assert len(matched_vars) == 1 return matched_vars[0]
def cls_embedding(self, sources_cls, sources_fnt, targets_cls, targets_fnt): cls_size = 52 cls_embedding_size = 16 fnt_size = 36632 fnt_embedding_size = 128 print(common_layers.shape_list(sources_cls)) print(common_layers.shape_list(sources_fnt)) print(common_layers.shape_list(targets_cls)) print(common_layers.shape_list(targets_fnt)) with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): W_cls = tf.Variable( tf.random.uniform([cls_size, cls_embedding_size], -1.0, 1.0)) embedded_sources_cls = tf.nn.embedding_lookup(W_cls, sources_cls) embedded_sources_cls = tf.squeeze(embedded_sources_cls, 1) embedded_targets_cls = tf.nn.embedding_lookup(W_cls, targets_cls) embedded_targets_cls = tf.squeeze(embedded_targets_cls, 1) W_fnt = tf.Variable( tf.random.uniform([fnt_size, fnt_embedding_size], -1.0, 1.0)) embedded_sources_fnt = tf.nn.embedding_lookup(W_fnt, sources_fnt) embedded_sources_fnt = tf.squeeze(embedded_sources_fnt, 1) embedded_targets_fnt = tf.nn.embedding_lookup(W_fnt, targets_fnt) embedded_targets_fnt = tf.squeeze(embedded_targets_fnt, 1) src_cls = tf.layers.dense(embedded_sources_cls, 16, activation=None) # src_cls = tf.nn.relu(src_cls) src_fnt = tf.layers.dense(embedded_sources_fnt, 32, activation=None) # src_fnt = tf.nn.relu(src_fnt) tgt_cls = tf.layers.dense(embedded_targets_cls, 16, activation=None) # tgt_cls = tf.nn.relu(tgt_cls) tgt_fnt = tf.layers.dense(embedded_targets_fnt, 32, activation=None) # tgt_fnt = tf.nn.relu(tgt_fnt) emd = tf.concat([src_cls, src_fnt, tgt_cls, tgt_fnt], -1) ret = tf.layers.dense(emd, 32, activation='relu') return ret
def recursive_decode(initial_input, path=None): """Recursive hierarchical decode function.""" path = path or [] level = len(path) if level == num_levels: with tf.variable_scope('core_decoder', reuse=tf.AUTO_REUSE): return base_decode_fn(initial_input, path) scope = tf.VariableScope( tf.AUTO_REUSE, 'decoder/hierarchical_level_%d' % level) num_steps = self._level_lengths[level] with tf.variable_scope(scope): state = lstm_utils.initial_cell_state_from_embedding( self._hier_cells[level], initial_input, name='initial_state') if level not in self._disable_autoregression: # The initial input should be the same size as the tensors returned by # next level. if self._hierarchical_encoder: input_size = self._hierarchical_encoder.level(0).output_depth elif level == num_levels - 1: input_size = sum(tf.nest.flatten(self._core_decoder.state_size)) else: input_size = sum( tf.nest.flatten(self._hier_cells[level + 1].state_size)) next_input = tf.zeros([batch_size, input_size]) lower_level_embeddings = [] for i in range(num_steps): if level in self._disable_autoregression: next_input = tf.zeros([batch_size, 1]) else: next_input = tf.concat([next_input, initial_input], axis=1) with tf.variable_scope(scope): output, state = self._hier_cells[level](next_input, state, scope) next_input = recursive_decode(output, path + [i]) lower_level_embeddings.append(next_input) if self._hierarchical_encoder: # Return the encoding of the outputs using the appropriate level of the # hierarchical encoder. enc_level = num_levels - level return self._hierarchical_encoder.level(enc_level).encode( sequence=tf.stack(lower_level_embeddings, axis=1), sequence_length=tf.fill([batch_size], num_steps)) else: # Return the final state. return tf.concat(tf.nest.flatten(state), axis=-1)
def build(self, hparams, is_training=True): self._total_length = hparams.max_seq_len if self._total_length != np.prod(self._level_lengths): raise ValueError( 'The product of the HierarchicalLstmEncoder level lengths (%d) must ' 'equal the padded input sequence length (%d).' % ( np.prod(self._level_lengths), self._total_length)) tf.logging.info('\nHierarchical Encoder:\n' ' input length: %d\n' ' level lengths: %s\n', self._total_length, self._level_lengths) self._hierarchical_encoders = [] num_splits = int(np.prod(self._level_lengths)) for i, l in enumerate(self._level_lengths): num_splits //= l tf.logging.info('Level %d splits: %d', i, num_splits) h_encoder = self._core_encoder_cls() h_encoder.build( hparams, is_training, name_or_scope=tf.VariableScope( tf.AUTO_REUSE, 'encoder/hierarchical_level_%d' % i)) self._hierarchical_encoders.append((num_splits, h_encoder))
def vis_encoder(self, sources_psr, targets_psr, targets_cls): base_depth = 32 num_categories = 52 bottleneck_bits = 32 sources_psr = tf.reshape(sources_psr, [-1, 64, 64, 1]) targets_psr = tf.reshape(targets_psr, [-1, 64, 64, 1]) with tf.variable_scope(tf.VariableScope(tf.AUTO_REUSE, ''), reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): ret = targets_psr clss = targets_cls clss = tf.reshape(clss, [-1]) # conv layer, followed by instance norm + FiLM ret = tf.layers.Conv2D(base_depth, 5, 1, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) ret = tf.layers.Conv2D(base_depth, 5, 2, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) ret = tf.layers.Conv2D(2 * base_depth, 5, 1, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) ret = tf.layers.Conv2D(2 * base_depth, 5, 2, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) # new conv layer, to bring shape down ret = tf.layers.Conv2D(2 * bottleneck_bits, 4, 2, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) # new conv layer, to bring shape down ret = tf.layers.Conv2D(2 * bottleneck_bits, 4, 2, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) # new conv layer, to bring shape down ret = tf.layers.Conv2D(2 * bottleneck_bits, 4, 2, padding='SAME', activation=None)(ret) ret = ops.conditional_instance_norm(ret, clss, num_categories) ret = tf.nn.relu(ret) # ret has 1024 ret = tf.layers.flatten(ret) ret = tf.layers.dense(ret, bottleneck_bits, activation=None) return ret
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)