def testStep(self): agent = self._get_agent_instance() bs_ph = tf.placeholder_with_default(B, ()) init_state = agent.initial_state(bs=bs_ph) step_type = np.zeros((B, ), dtype=np.int32) reward = np.zeros((B, ), dtype=np.float32) obs = dict(features=np.zeros((B, N_NODES), dtype=np.float32), graph_features=self._get_graph_features_step(), node_mask=np.ones((B, N_NODES), dtype=np.int32)) prev_state = init_state # hack that works for now! step_type, reward, obs, prev_state = agent.step_preprocess( step_type, reward, obs, prev_state) def f(np_arr): return tf.constant(np_arr) with tf.variable_scope('step', reuse=tf.AUTO_REUSE): step_output = agent.step(nest.map_structure(f, step_type), nest.map_structure(f, reward), nest.map_structure(f, obs), prev_state) sess = self.session() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for _ in range(100): sess.run(step_output) print('.', end='') print('') print('Done!')
def dynamic_decode(decoder_cell, max_iter): max_iter = tf.convert_to_tensor(max_iter, dtype=tf.int32) # TensorArray: wrap dynamic-sized, per-time-step, write-once Tensor arrays def create_tensor_array(d): # initial size = 0 return tf.TensorArray(dtype=d, size=0, dynamic_size=True) time_index = tf.constant(0, dtype=tf.int32) # nest.map_structure: applies func to each entry in structure output_tensor_arrays = nest.map_structure( create_tensor_array, decoder_cell.output_dtype) cell_states, inputs, decode_finished = decoder_cell.initialize() # tf.while_loop(cond, body, vars): Repeat body while condition cond is true def condition(time_index, output_ta, cell_states, inputs, decode_finished): """ if all "decode_finished" are True, return "False" """ return tf.logical_not(tf.reduce_all(decode_finished)) def body(time_index, output_ta, cell_states, inputs, decode_finished): sts = decoder_cell.step(time_index, cell_states, inputs, decode_finished) new_output, new_cell_states, new_inputs, new_decode_finished = sts # TensorArray.write(index, value): register value and returns new TAs output_ta = nest.map_structure( lambda ta, out: ta.write(time_index, out), output_ta, new_output) new_decode_finished = tf.logical_or( tf.greater_equal(time_index, max_iter), new_decode_finished) return (time_index + 1, output_ta, new_cell_states, new_inputs, new_decode_finished) with tf.variable_scope("decoding"): res = tf.while_loop( condition, body, loop_vars=[time_index, output_tensor_arrays, cell_states, inputs, decode_finished], back_prop=False) # get final outputs and states final_output_ta, final_cell_states = res[1], res[2] # TA.stack(): stack all tensors in TensorArray, [max_iter+1, batch_size, _] final_outputs = nest.map_structure(lambda ta: ta.stack(), final_output_ta) # finalize the computation from the decoder cell final_outputs = decoder_cell.finalize(final_outputs, final_cell_states) # transpose the final output final_outputs = nest.map_structure(transpose_batch_time, final_outputs) return final_outputs, final_cell_states
def write(self, values, step=None): if step is None: step = self._step if self._tracker.track_increment(): # Note that this is #write calls % period # and not necessarily step % period # compute the to_write value if self._is_average: if self._history is None: self._history = nest.map_structure(self._make_history, values) else: self._history = nest.map_structure(self._add_to_history, self._history, values) to_write = nest.map_structure(np.mean, self._history) else: to_write = values # write to base now for logger in self._base_loggers: logger.write(to_write, step=step) self._step += 1
def _prepare_memory(memory, memory_sequence_length, check_inner_dims_defined): """Convert to tensor and possibly mask `memory`. Args: memory: `Tensor`, shaped `[batch_size, max_time, ...]`. memory_sequence_length: `int32` `Tensor`, shaped `[batch_size]`. check_inner_dims_defined: Python boolean. If `True`, the `memory` argument's shape is checked to ensure all but the two outermost dimensions are fully defined. Returns: A (possibly masked), checked, new `memory`. Raises: ValueError: If `check_inner_dims_defined` is `True` and not `memory.shape[2:].is_fully_defined()`. """ memory = nest.map_structure( lambda m: tf.convert_to_tensor(m, name="memory"), memory) if memory_sequence_length is not None: memory_sequence_length = tf.convert_to_tensor( memory_sequence_length, name="memory_sequence_length") if check_inner_dims_defined: def _check_dims(m): if not m.get_shape()[2:].is_fully_defined(): raise ValueError("Expected memory %s to have fully defined inner dims, " "but saw shape: %s" % (m.name, m.get_shape())) nest.map_structure(_check_dims, memory) if memory_sequence_length is None: seq_len_mask = None else: seq_len_mask = tf.sequence_mask( memory_sequence_length, maxlen=tf.shape(nest.flatten(memory)[0])[1], dtype=nest.flatten(memory)[0].dtype) seq_len_batch_size = ( tf.dimension_value(memory_sequence_length.shape[0]) or tf.shape(memory_sequence_length)[0]) def _maybe_mask(m, seq_len_mask): rank = m.get_shape().ndims rank = rank if rank is not None else tf.rank(m) extra_ones = tf.ones(rank - 2, dtype=tf.int32) m_batch_size = tf.dimension_value( m.shape[0]) or tf.shape(m)[0] if memory_sequence_length is not None: message = ("memory_sequence_length and memory tensor batch sizes do not " "match.") with tf.control_dependencies([ tf.assert_equal( seq_len_batch_size, m_batch_size, message=message)]): seq_len_mask = tf.reshape( seq_len_mask, tf.concat((tf.shape(seq_len_mask), extra_ones), 0)) return m * seq_len_mask else: return m return nest.map_structure(lambda m: _maybe_mask(m, seq_len_mask), memory)
def _get_graph_features_update(self): # get timestep stacked and batched graph features def f(*l): return np.stack(l, axis=0) graph_features = nest.map_structure(f, *[GRAPH_FEATURES] * B) return nest.map_structure(f, *[graph_features] * (T + 1))
def loop_tf(loop_fn, inputs, persistent_initializer, transient_initializer, n=None, time_major=False): def create_tensor_array(initial_tensor: tf.Tensor): return tf.TensorArray(initial_tensor.dtype, size=n, element_shape=initial_tensor.get_shape()) tensor_arrays = nest.map_structure(create_tensor_array, persistent_initializer) def while_fn(*args): current_iteration = args[0] persistent_values = args[1] transient_values = args[2] current_tensor_arrays = args[3] if time_major: input_values = inputs[current_iteration] else: input_values = inputs[:, current_iteration] new_persistent, new_transient = loop_fn(input_values, persistent_values, transient_values) flat_new_persistent = nest.flatten(new_persistent) flat_tensor_arrays = nest.flatten(current_tensor_arrays) flat_written_tensor_arrays = [ ta.write(current_iteration, a) for ta, a in zip(flat_tensor_arrays, flat_new_persistent) ] new_tensor_arrays = nest.pack_sequence_as(current_tensor_arrays, flat_written_tensor_arrays) return current_iteration + 1, new_persistent, new_transient, new_tensor_arrays def while_cond(*args): seq_len = tf.shape(inputs)[0] if time_major else tf.shape(inputs)[1] return tf.less(args[0], seq_len) _, final_persistent, final_transient, final_tensor_arrays = \ tf.while_loop(while_cond, while_fn, (0, persistent_initializer, transient_initializer, tensor_arrays)) final_sequence_tensors = nest.map_structure(lambda x: x.stack(), final_tensor_arrays) def make_batch_major(tensor): permutation = np.arange(len(tensor.get_shape())) permutation[:2] = permutation[:2][::-1] return tf.transpose(tensor, permutation) if not time_major: final_sequence_tensors = nest.map_structure(make_batch_major, final_sequence_tensors) return final_sequence_tensors
def _get_graph_features_update(self, large_graph=True): # get timestep stacked and batched graph features def f(*l): return np.stack(l, axis=0) if large_graph: graph_features = self._large_graph() else: graph_features = GRAPH_FEATURES graph_features = nest.map_structure(f, *[graph_features] * B) return nest.map_structure(f, *[graph_features] * (T + 1))
def testUpdate(self): agent = self._get_agent_instance() bs_ph = tf.placeholder_with_default(B, ()) sess = self.session() init_state = agent.initial_state(bs=bs_ph) init_state_val = sess.run(init_state) step_type = np.zeros((T + 1, B), dtype=np.int32) reward = np.zeros((T + 1, B), dtype=np.float32) discount = np.zeros((T + 1, B), dtype=np.float32) var_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32) constraint_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32) obj_type_mask = np.zeros((T + 1, B, N_NODES), dtype=np.int32) var_type_mask[:, :, 0] = 1 constraint_type_mask[:, :, 1] = 1 obj_type_mask[:, :, 2] = 1 obs = dict(features=np.zeros((T + 1, B, N_NODES), dtype=np.float32), graph_features=self._get_graph_features_update(), node_mask=np.ones(((T + 1), B, N_NODES), dtype=np.int32), var_type_mask=var_type_mask, constraint_type_mask=constraint_type_mask, obj_type_mask=obj_type_mask) step_output = StepOutput(action=np.zeros((T, B), dtype=np.int32), logits=np.zeros((T, B, N_NODES), dtype=np.float32), next_state=np.zeros_like( np.vstack([init_state_val] * T))) step_output, _, step_type, reward, obs, discount = agent.update_preprocess( step_output, None, step_type, reward, obs, discount) def f(np_arr): return tf.constant(np_arr) with tf.variable_scope('update', reuse=tf.AUTO_REUSE): agent.build_update_ops( nest.map_structure(f, step_output), tf.zeros_like(np.vstack([init_state_val] * (T + 1))), nest.map_structure(f, step_type), nest.map_structure(f, reward), nest.map_structure(f, obs), nest.map_structure(f, discount)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) for _ in range(3): agent.update(sess, {}, {}) print('.', end='') print('') print('Done!')
def unstack_data_dict(stacked_data_dict): """ stacked_data_dict is a data_dict with all the features stacked. globals => """ def f(arr): if arr is None: return arr l = np.split(arr, arr.shape[0]) # remove the leading dimension l = list(map(lambda k: np.squeeze(k, axis=0), l)) return l d = nest.map_structure(f, stacked_data_dict) bs = len(d['n_node']) data_dicts = [{} for _ in range(bs)] for k, l in d.items(): for i in range(bs): data_dicts[i][k] = l[i] unstacked_data_dicts = [] for d in data_dicts: if d['n_node'].ndim > 0: # d is a stacked data dict unstacked_data_dicts.extend(unstack_data_dict(d)) else: unstacked_data_dicts.append(d) return unstacked_data_dicts
def initialize(batch_size,time,emit_input): for w in ["SOS"]: idx = tf.reshape(tf.constant(word2idx[w],dtype=tf.int32),[-1]) idx = tf.tile(idx,[batch_size]) emit_input = nest.map_structure(lambda ta,em: ta.write(time,em),emit_input,idx) time += 1 return emit_input,time
def rnn_model(inputs, shape, embedding_matrix): """make an unrolled RNN over the inputs. Not optimised for GPU""" with tf.variable_scope('rnn'): inputs = tf.nn.embedding_lookup(embedding_matrix, inputs) input_shape = inputs.get_shape().as_list() vocab_size = embedding_matrix.get_shape()[0].value cells = [tf.nn.rnn_cell.GRUCell(n) for n in shape] cell = tf.nn.rnn_cell.MultiRNNCell(cells) # won't work with LSTMs initial_state = tuple( tf.get_variable('state_{}'.format(i), shape=[input_shape[0], c.state_size], dtype=tf.float32, initializer=tf.zeros_initializer()) for i, c in enumerate(cells)) outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state) # we're always going to roll this over every time the output is # evaluated state_updates = nest.map_structure(tf.assign, initial_state, final_state) state_updates = nest.flatten(state_updates) with tf.control_dependencies(state_updates): outputs = tf.reshape(outputs, [-1, shape[-1]]) outputs = tf.layers.dense(outputs, vocab_size, activation=None) outputs = tf.reshape( outputs, [input_shape[0] or -1, input_shape[1], vocab_size]) return outputs
def _traj_spec(self): def expand_spec(spec): spec = copy.deepcopy(spec) np.expand_dims(spec, axis=0) return spec return nest.map_structure(expand_spec, self._get_graph_features())
def zero_state(self, batch_size, dtype): with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with tf.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: tf.identity(s, name="checked_cell_state"), cell_state) return AttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), alignments=self._item_or_tuple( attention_mechanism.initial_alignments(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), alignment_history=self._item_or_tuple( tf.TensorArray(dtype=dtype, size=0, dynamic_size=True) if self._alignment_history else () for _ in self._attention_mechanisms))
def distill_loss_old(student_logits, teacher_logits, masks): """ Distillation loss. Args: student_logits: structured logits compatible with nest.map_structure. teacher_logits: Returns: final_distill_loss: total loss. head_distill_loss: per-head loss. The same structure as inputs. """ def _compute_kl(logits, o_logits, masks): a0 = logits - tf.reduce_max(logits, axis=-1, keep_dims=True) a1 = o_logits - tf.reduce_max(o_logits, axis=-1, keep_dims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) z0 = tf.reduce_sum(ea0, axis=-1, keep_dims=True) z1 = tf.reduce_sum(ea1, axis=-1, keep_dims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1) * masks head_distill_loss = nest.map_structure(_compute_kl, student_logits, teacher_logits, masks) return head_distill_loss
def step_output_spec(self): def mk_spec(tensor): return ArraySpec(dtype=tensor.dtype.as_numpy_dtype, shape=tensor.shape, name=tensor.name) return dict(nest.map_structure(mk_spec, self._step_output)._asdict())
def _mk_phs(self, traj_spec): def mk_ph(spec): return tf.placeholder(dtype=spec.dtype, shape=spec.shape, name='learner/' + spec.name.replace(':', '_') + '_ph') self._traj_phs = nest.map_structure(mk_ph, traj_spec)
def _zero_state_tensors(state_size, batch_size, dtype): """Create tensors of zeros based on state_size, batch_size, and dtype.""" def get_state_shape(s): """Combine s with batch_size to get a proper tensor shape.""" c = _concat(batch_size, s) size = tf.random_uniform(c, dtype=dtype) return size return nest.map_structure(get_state_shape, state_size)
def _get_graph_features(self): # get timestep stacked and batched graph features def f(*l): return np.stack(l, axis=0) graph_features = self._large_graph() graph_features = nest.map_structure(f, *[graph_features] * B) # return nest.map_structure(f, *[graph_features] * (T + 1)) return graph_features
def body(time, outputs_ta, parents): # get ids, logits and parents predicted at time step by decoder input_t = nest.map_structure(lambda t: t[time], final_outputs) # extract the entries corresponding to parents new_state = nest.map_structure( lambda t: gather_helper(t, parents, self._batch_size, self._beam_size), input_t) # create new output new_output = DecoderOutput(logits=new_state.logits, ids=new_state.ids) # write beam ids outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), outputs_ta, new_output) return (time + 1), outputs_ta, parents
def zero_state(self, batch_size, dtype): """Return an initial (zero) state tuple for this `AttentionWrapper`. **NOTE** Please see the initializer documentation for details of how to call `zero_state` if using an `AttentionWrapper` with a `BeamSearchDecoder`. Args: batch_size: `0D` integer tensor: the batch size. dtype: The internal state data type. Returns: An `AttentionWrapperState` tuple containing zeroed out tensors and, possibly, empty `TensorArray` objects. Raises: ValueError: (or, possibly at runtime, InvalidArgument), if `batch_size` does not match the output size of the encoder passed to the wrapper object at initialization time. """ with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "When calling zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size. Are you using " "the BeamSearchDecoder? If so, make sure your encoder output has " "been tiled to beam_width via tf.contrib.seq2seq.tile_batch, and " "the batch_size= argument passed to zero_state is " "batch_size * beam_width.") with tf.control_dependencies( self._batch_size_checks(batch_size, error_message)): cell_state = nest.map_structure( lambda s: tf.identity(s, name="checked_cell_state"), cell_state) initial_alignments = [ attention_mechanism.initial_alignments(batch_size, dtype) for attention_mechanism in self._attention_mechanisms ] return AttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=_zero_state_tensors(self._attention_layer_size, batch_size, dtype), alignments=self._item_or_tuple(initial_alignments), attention_state=self._item_or_tuple( attention_mechanism.initial_state(batch_size, dtype) for attention_mechanism in self._attention_mechanisms), alignment_history=self._item_or_tuple( tf.TensorArray(dtype, size=0, dynamic_size=True, element_shape=alignment.shape) if self. _alignment_history else () for alignment in initial_alignments))
def __init__( self, obs_spec, step_output_spec, ): self._trajs = None # Don't use shape in the spec since it's unknown self._traj_spec = dict( step_type=ArraySpec(dtype=np.int8, shape=(None, None), name='traj_step_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_discount_spec'), observation=nest.map_structure(expand_spec, obs_spec), step_output=nest.map_structure(expand_spec, step_output_spec))
def __init__(self, inputs, labels, keep_prob, time_sizes=[300, 300], note_sizes=[100, 50]): self.inputs = inputs # input shape (batch, time, note, feature) self.labels = labels # label shape (batch, time, note, out) self.batch_size = tf.shape(self.inputs)[0] self.keep_prob = keep_prob self.time_sizes = time_sizes self.note_sizes = note_sizes self.time_lstm_cell = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.LSTMCell(sz, state_is_tuple=True), output_keep_prob=self.keep_prob) for sz in self.time_sizes ], state_is_tuple=True) self.time_state = nest.map_structure( lambda x: tf.placeholder_with_default(x, x.shape, x.op.name), self.time_lstm_cell.zero_state(self.batch_size * NOTE_LEN, tf.float32)) for tensor in nest.flatten(self.time_state): tf.add_to_collection('time_state_input', tensor) self.note_lstm_cell = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.LSTMCell(sz, state_is_tuple=True), output_keep_prob=self.keep_prob) for sz in self.note_sizes ], state_is_tuple=True) self.note_state = nest.map_structure( lambda x: tf.placeholder_with_default(x, x.shape, x.op.name), self.note_lstm_cell.zero_state(self.batch_size * SEQ_LEN, tf.float32)) for tensor in nest.flatten(self.note_state): tf.add_to_collection('note_state_input', tensor) self.final_time_state, self.final_note_state, self.prediction \ = self.forward_pass() self.loss = self.loss_function() self.optimize = self.optimizer()
def _initial_state(self): # t: [batch_size, num_units] cell_states = nest.map_structure( lambda t: tile_beam(t, self._beam_size), self._dec_init_states) # another "log_probs" initial states: accumulative log_prob! log_probs = tf.zeros([self._batch_size, self._beam_size], dtype=self._dtype) return BeamDecoderCellStates(cell_states, log_probs)
def body(f_time_index, output_ta, f_parents): # get ids, logits and parents predicted at this time step input_t = nest.map_structure(lambda t: t[f_time_index], final_outputs) # parents: reversed version shows the next position to go new_beam_state = nest.map_structure( lambda t: gather_helper(t, f_parents, self._batch_size, self. _beam_size), input_t) # create new output new_output = DecoderOutput(logits=new_beam_state.logits, ids=new_beam_state.ids) # write beam ids output_ta = nest.map_structure( lambda ta, out: ta.write(f_time_index, out), output_ta, new_output) return (f_time_index + 1), output_ta, input_t.parents
def _weight_mean_with_function(f, args, max_batch): batch_size = len(args[0]) weight = np.array([max_batch for _ in range(math.floor(batch_size / max_batch))] + \ [] if batch_size % max_batch == 0 else [batch_size % max_batch]) weight_sum = np.sum(weight) outputs = _calculate_function_with_batch_size(args, f, max_batch) def _weight_mean(*args): return sum(w * a for w, a in zip(weight, args)) / weight_sum outputs = nest.map_structure(_weight_mean, outputs) return outputs
def body(time, outputs_ta, state, inputs, finished): new_output, new_state, new_inputs, new_finished = decoder_cell.step( time, state, inputs, finished) outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out), outputs_ta, new_output) new_finished = tf.logical_or( tf.greater_equal(time, maximum_iterations), new_finished) return (time + 1, outputs_ta, new_state, new_inputs, new_finished)
def get_training_dataset(features_file, labels_file, features_vocab_file, labels_vocab_file, batch_size, batch_type="examples", share_vocab=False, intercept=False, shuffle_buffer_size=None, bucket_width=None, maximum_features_length=None, maximum_labels_length=None, single_pass=False): features_dataset = tf.data.TextLineDataset(features_file) features_vocab = Vocab(vocabulary_file=features_vocab_file) features_vocab = features_vocab.vocabulary_lookup() features_dataset = features_dataset.map( lambda args: make_features(args, vocabulary=features_vocab)) labels_dataset = tf.data.TextLineDataset(labels_file) if share_vocab is not None: labels_vocab = features_vocab else: labels_vocab = Vocab(vocabulary_file=labels_vocab_file) labels_vocab = labels_vocab.vocabulary_lookup() labels_dataset = labels_dataset.map( lambda args: make_labels(args, vocabulary=labels_vocab)) dataset = tf.data.Dataset.zip((features_dataset, labels_dataset)) if shuffle_buffer_size is not None: dataset = dataset.shuffle(shuffle_buffer_size) dataset = dataset.apply(filter_length(maximum_features_length=maximum_features_length, maximum_labels_length=maximum_labels_length, intercept=intercept)) padded_shapes = nest.map_structure(lambda shape: shape.as_list(), dataset.output_shapes) dataset = dataset.apply( batch_pad_dataset(batch_size=batch_size, padded_shapes=padded_shapes, batch_type=batch_type, bucket_width=bucket_width)) if not single_pass: dataset = dataset.repeat() return dataset
def _build_rnn(self, x, log=no_op): log('- build-rnn -') with tf.name_scope('build_rnn', [x]): log('rnn-input', x) bs = tf.unstack(tf.shape(x))[0] # figure out dynamic batch size with slim.arg_scope(self._arg_scope()): lstms = [tf.nn.rnn_cell.LSTMCell(cfg.LSTM_SIZE) for _ in range(cfg.NUM_LSTM)] cell = tf.nn.rnn_cell.MultiRNNCell(lstms) #print('c0',cell.zero_state(cfg.BATCH_SIZE, tf.float32)) state0 = nest.map_structure( lambda x : tf.placeholder_with_default(x, [None] + list(x.shape)[1:], x.op.name), cell.zero_state(self.batch_size_, tf.float32)) #with tf.variable_scope('rnn_state'): # state_variables = [] # for state_c, state_h in cell.zero_state(bs, tf.float32): # state_variables.append(tf.nn.rnn_cell.LSTMStateTuple( # tf.Variable(state_c, trainable=False, validate_shape=False), # tf.Variable(state_h, trainable=False, validate_shape=False))) # state0 = tuple(state_variables) with tf.variable_scope('rnn', reuse=self.reuse_): output, state1 = tf.nn.dynamic_rnn( cell=cell, inputs=x, initial_state=state0, time_major=False, dtype=tf.float32) #log('rnn-output', output.shape) #with tf.name_scope('rnn_keep'): # # for stateful LSTM (during runtime) # keep_ops = [] # for (s0c,s0h), (s1c,s1h) in zip(state0, state1): # # Assign the new state to the state variables on this layer # # for both (c,h) # keep_ops.extend([s0c.assign(s1c), s0h.assign(s1h)]) # rnn_keep_op = tf.group(keep_ops) #with tf.name_scope('rnn_reset'): # # for stateless LSTM (during training) # # Define an op to reset the hidden state to zeros # reset_ops = [] # for (s0c,s0h) in state0: # # Assign the new state to the state variables on this layer # # for both (c,h) # reset_ops.extend([ # s0c.assign(tf.zeros_like(s0c)), # s0h.assign(tf.zeros_like(s0h))]) # rnn_reset_op = tf.group(reset_ops) log('-------------') return output, state1, state0
def __init__(self, obs_spec, step_output_spec, batch_size, discount_factor, traj_length): self._batch_size = batch_size self._traj_len = traj_length self._discount_factor = discount_factor # Don't use shape in the spec since it's unknown self._traj_spec = dict( step_type=ArraySpec(dtype=np.int8, shape=(None, None), name='traj_step_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_discount_spec'), observation=nest.map_structure(expand_spec, obs_spec), step_output=nest.map_structure(expand_spec, step_output_spec)) # self._trajs[i] = trajectory of the ith experience in the batch. self._trajs = None # list of timesteps that have been backtracked # and ready to be split into chunks to be shipped out. # _finished_timesteps[i] = Finished timesteps for the ith item of the batch. self._finished_timesteps = None # used to chop the trajectory into chunks. obs_spec2 = copy.deepcopy(obs_spec) obs_spec2['bootstrap_value'] = ArraySpec(dtype=np.float32, shape=(None, ), name='bootstrap_value_spec') self._chopping_trajs = [ BaseTrajectory(obs_spec2, step_output_spec) for _ in range(batch_size) ] self._len = 0
def body(time_index, output_ta, cell_states, inputs, decode_finished): sts = decoder_cell.step(time_index, cell_states, inputs, decode_finished) new_output, new_cell_states, new_inputs, new_decode_finished = sts # TensorArray.write(index, value): register value and returns new TAs output_ta = nest.map_structure( lambda ta, out: ta.write(time_index, out), output_ta, new_output) new_decode_finished = tf.logical_or( tf.greater_equal(time_index, max_iter), new_decode_finished) return (time_index + 1, output_ta, new_cell_states, new_inputs, new_decode_finished)