def testScan_MultiOutputMismatchedInitializer(self): with self.test_session(): elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) initializer = np.array(1.0) # Multiply a * 1 each time with self.assertRaisesRegexp(ValueError, "two structures don't have the same number of elements"): tf.scan(lambda a, x: (a, -a), elems, initializer)
def train(x=x, size_bt=size_bt, BV_t=BV_t, BH_t=BH_t): bv_init = tf.zeros([1, n_visible], tf.float32) bh_init = tf.zeros([1, n_hidden], tf.float32) u_t = tf.scan(rnn_recurrence, x, initializer=u0) BV_t = tf.reshape(tf.scan(visible_bias_recurrence, u_t, bv_init), [size_bt, n_visible]) BH_t = tf.reshape(tf.scan(hidden_bias_recurrence, u_t, bh_init), [size_bt, n_hidden]) sample, cost = RBM.build_rbm(x, W, BV_t, BH_t, k=15) return x, sample, cost, params, size_bt
def testScan_Simple(self): with self.test_session(): elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data") v = tf.constant(2.0, name="v") r = tf.scan(lambda a, x: tf.mul(a, x), elems) self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r.eval()) r = tf.scan(lambda a, x: tf.mul(a, x), elems, initializer=v) self.assertAllEqual([2.0, 4.0, 12.0, 48.0, 240.0, 1440.0], r.eval())
def testScanVaryingShape(self): with self.test_session() as sess: x = tf.placeholder(dtype=tf.float32, shape=[None, 2]) x_t = tf.transpose(x) # scan over dimension 0 (with shape None) result = tf.scan(lambda a, x: a + x, x) # scanned over transposed dimension 0 (with shape 2) result_t = tf.scan(lambda a, x: a + x, x_t, infer_shape=False) # ensure gradients can be calculated result_grad = tf.gradients(result, [x])[0] result_t_grad = tf.gradients(result_t, [x_t])[0] # smoke test to ensure they all evaluate sess.run([result, result_t, result_grad, result_t_grad], feed_dict={x: [[1.0, 2.0]]})
def tensorflow_test(): import tensorflow as tf nested_input = tf.placeholder(tf.float32, shape=[outer_len, inner_len, input_dim]) variable = tf.Variable(np.float32(1.0)) def inner_func(curr, prev): return curr + prev# + variable def outer_func(curr, prev): inner_res = tf.scan( fn=inner_func, elems=curr, initializer=tf.zeros([input_dim]) ) return prev + inner_res # nested_input.set_shape outputs = tf.scan( fn=outer_func, elems=nested_input, initializer=tf.zeros([inner_len, input_dim]) ) loss = tf.reduce_sum(outputs) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) # train_op = optimizer.minimize(loss) grad = tf.gradients(loss, [variable]) init_op = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init_op)
def outer_func(curr, prev): inner_res = tf.scan( fn=inner_func, elems=curr, initializer=tf.zeros([input_dim]) ) return prev + inner_res
def omniglot(): sess = tf.InteractiveSession() """ def wrapper(v): return tf.Print(v, [v], message="Printing v") v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp') temp = wrapper(v) #with tf.control_dependencies([temp]): temp.eval() print 'Hello'""" def update_tensor(V, dim2, val): # Update tensor V, with index(:,dim2[:]) by val[:] val = tf.cast(val, V.dtype) def body(_, (v, d2, chg)): d2_int = tf.cast(d2, tf.int32) return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]]) Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update") return Z
def build(self, preSoftmaxPi, preSoftmaxA, preSoftmaxB): M, V = preSoftmaxB.shape self.preSoftmaxPi = tf.Variable(preSoftmaxPi) self.preSoftmaxA = tf.Variable(preSoftmaxA) self.preSoftmaxB = tf.Variable(preSoftmaxB) pi = tf.nn.softmax(self.preSoftmaxPi) A = tf.nn.softmax(self.preSoftmaxA) B = tf.nn.softmax(self.preSoftmaxB) # define cost self.tfx = tf.placeholder(tf.int32, shape=(None,), name='x') def recurrence(old_a_old_s, x_t): old_a = tf.reshape(old_a_old_s[0], (1, M)) a = tf.matmul(old_a, A) * B[:, x_t] a = tf.reshape(a, (M,)) s = tf.reduce_sum(a) return (a / s), s # remember, tensorflow scan is going to loop through # all the values! # we treat the first value differently than the rest # so we only want to loop through tfx[1:] # the first scale being 1 doesn't affect the log-likelihood # because log(1) = 0 alpha, scale = tf.scan( fn=recurrence, elems=self.tfx[1:], initializer=(pi*B[:,self.tfx[0]], np.float32(1.0)), ) self.cost = -tf.reduce_sum(tf.log(scale)) self.train_op = tf.train.AdamOptimizer(1e-2).minimize(self.cost)
def testScan_MultiInputSameTypeOutput(self): with self.test_session() as sess: elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) r = tf.scan(lambda a, x: (a[0] + x[0], a[1] + x[1]), (elems, -elems)) r_value = sess.run(r) self.assertAllEqual(np.cumsum(elems), r_value[0]) self.assertAllEqual(np.cumsum(-elems), r_value[1])
def _marginal_hidden_probs(self): """Compute marginal pdf for each individual observable.""" initial_log_probs = tf.broadcast_to(self._log_init, tf.concat([self.batch_shape_tensor(), [self._num_states]], axis=0)) # initial_log_probs :: batch_shape num_states if self._num_steps > 1: transition_log_probs = self._log_trans def forward_step(log_probs, _): return _log_vector_matrix(log_probs, transition_log_probs) dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32) forward_log_probs = tf.scan(forward_step, dummy_index, initializer=initial_log_probs, name="forward_log_probs") forward_log_probs = tf.concat([[initial_log_probs], forward_log_probs], axis=0) else: forward_log_probs = initial_log_probs[tf.newaxis, ...] # returns :: num_steps batch_shape num_states return tf.exp(forward_log_probs)
def define_ppo_epoch(memory, policy_factory, config): """PPO epoch.""" observation, reward, done, action, old_pdf, value = memory # This is to avoid propagating gradients though simulation of simulation observation = tf.stop_gradient(observation) action = tf.stop_gradient(action) reward = tf.stop_gradient(reward) done = tf.stop_gradient(done) value = tf.stop_gradient(value) old_pdf = tf.stop_gradient(old_pdf) ppo_step_rets = tf.scan( lambda _1, _2: define_ppo_step( # pylint: disable=g-long-lambda observation, action, reward, done, value, old_pdf, policy_factory, config), tf.range(config.optimization_epochs), [0., 0., 0., 0., 0., 0.], parallel_iterations=1) ppo_summaries = [tf.reduce_mean(ret) for ret in ppo_step_rets] summaries_names = ["policy_loss", "value_loss", "entropy_loss", "policy_gradient", "value_gradient", "entropy_gradient"] summaries = [tf.summary.scalar(summary_name, summary) for summary_name, summary in zip(summaries_names, ppo_summaries)] losses_summary = tf.summary.merge(summaries) for summary_name, summary in zip(summaries_names, ppo_summaries): losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ") return losses_summary
def cummax(x, reverse=False, name=None): """Compute the cumulative maximum of the tensor `x` along `axis`. This operation is similar to the more classic `cumsum`. Only support 1D Tensor for now. Args: x: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, `complex128`, `qint8`, `quint8`, `qint32`, `half`. axis: A `Tensor` of type `int32` (default: 0). reverse: A `bool` (default: False). name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `x`. """ with ops.name_scope(name, "Cummax", [x]) as name: x = ops.convert_to_tensor(x, name="x") # Not very optimal: should directly integrate reverse into tf.scan. if reverse: x = tf.reverse(x, axis=[0]) # 'Accumlating' maximum: ensure it is always increasing. cmax = tf.scan(lambda a, y: tf.maximum(a, y), x, initializer=None, parallel_iterations=1, back_prop=False, swap_memory=False) if reverse: cmax = tf.reverse(cmax, axis=[0]) return cmax
def get_states_b(self): """ Iterates through time/ sequence to get all hidden state """ all_hidden_states, all_memory_states = self.get_states_f() # Reversing the hidden and memory state to get the final hidden and # memory state last_hidden_states = tf.reverse( all_hidden_states, [True, False, False])[0, :, :] last_memory_states = tf.reverse( all_memory_states, [True, False, False])[0, :, :] # For backward pass using the last hidden and memory of the forward # pass initial_hidden = tf.pack([last_hidden_states, last_memory_states]) # Getting all hidden state throuh time all_hidden_memory_states = tf.scan(self.Lstm_b, self.processed_input_rev, initializer=initial_hidden, name='states') # Now reversing the states to keep those in original order all_hidden_states = tf.reverse(all_hidden_memory_states[ :, 0, :, :], [True, False, False]) all_memory_states = tf.reverse(all_hidden_memory_states[ :, 1, :, :], [True, False, False]) return all_hidden_states, all_memory_states
def feature(self, input_x, name = ''): if len(input_x.get_shape()) == 2: # incase input_x : batch_size x seq_length [tokens] input_x = tf.nn.embedding_lookup(self.embbeding_mat, input_x) # input_x: batch_size x seq_length x g_emb_dim pooled_outputs = [] index = -1 embedded_chars = tf.scan(lambda a, x: tf.matmul(x, self.W), input_x) embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) for filter_size, num_filter in zip(self.filter_sizes, self.num_filters): index += 1 with tf.name_scope("conv-maxpool-%s-midterm" % filter_size): # Convolution Layer conv = tf.nn.conv2d( embedded_chars_expanded, self.W_conv[index], strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, self.b_conv[index]), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, self.sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = sum(self.num_filters) h_pool = tf.concat(pooled_outputs, 3) h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) return h_pool_flat
def MiniminibatchLayer(name, n_in, dim_b, dim_c, group_size, inputs): inputs = tf.random_shuffle(inputs) inputs = tf.reshape(inputs, [-1, group_size, n_in]) def f(a,x): return MinibatchLayer(name, n_in, dim_b, dim_c, x) outputs = tf.scan(f, inputs) return tf.reshape(outputs, [-1, n_in+dim_b])
def testScanUnknownShape(self): x = tf.placeholder(tf.float32) initializer = tf.placeholder(tf.float32) def fn(_, current_input): return current_input y = tf.scan(fn, x, initializer=initializer) self.assertIs(None, y.get_shape().dims)
def diagonal_neural_gpu(inputs, hparams, name=None): """Improved Neural GPU as in https://arxiv.org/abs/1702.08727.""" with tf.variable_scope(name, "diagonal_neural_gpu"): def step(state_tup, inp): """Single step of the improved Neural GPU.""" state, _ = state_tup x = state for layer in xrange(hparams.num_hidden_layers): x, new_loss = common_layers.diagonal_conv_gru( x, (hparams.kernel_height, hparams.kernel_width), hparams.hidden_size, dropout=hparams.dropout, name="dcgru_%d" % layer) # Padding input is zeroed-out in the modality, we check this by summing. padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001) new_state = tf.where(padding_inp, state, x) # No-op where inp is padding. return new_state, new_loss final_state, losses = tf.scan( step, tf.transpose(inputs, [1, 0, 2, 3]), initializer=(inputs, tf.constant(0.0)), parallel_iterations=1, swap_memory=True) return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)
def testScan_MultiInputSingleOutput(self): with self.test_session(): elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) initializer = np.array(1.0) # Multiply a * 1 each time r = tf.scan(lambda a, x: a * (x[0] + x[1]), (elems + 1, -elems), initializer) self.assertAllEqual([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], r.eval())
def fast_dlstm(s_t, state_in): def dilate_one_time_step(one_h, switcher, num_chunks): h_slices = [] h_size = 256 chunk_step_size = h_size // num_chunks for switch_step, h_step in zip(range(num_chunks), range(0, h_size, chunk_step_size)): one_switch = switcher[switch_step] h_s = conditional_backprop(one_switch, one_h[h_step: h_step + chunk_step_size]) h_slices.append(h_s) dh = tf.stack(h_slices) dh = tf.reshape(dh, [-1, 256]) return dh lstm = rnn.LSTMCell(256, state_is_tuple=True) chunks = 8 def dlstm_scan_fn(previous_output, current_input): out, state_out = lstm(current_input, previous_output[1]) i = previous_output[2] basis_i = tf.one_hot(i, depth=chunks) state_out_dilated = dilate_one_time_step(tf.squeeze(state_out[0]), basis_i, chunks) state_out = rnn.LSTMStateTuple(state_out_dilated, state_out[1]) i += tf.constant(1) new_i = tf.mod(i, chunks) return out, state_out, new_i rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=( state_in[1], rnn.LSTMStateTuple(*state_in), tf.constant(0))) state_out = [final_states[0][-1, 0, :], final_states[1][-1, 0, :]] cell_states = final_states[0][:, 0, :] out_states = final_states[1][:, 0, :] return out_states, cell_states, state_out
def define_ppo_epoch(memory, hparams): """PPO epoch.""" observation, reward, done, action, old_pdf, value = memory # This is to avoid propagating gradients through simulated environment. observation = tf.stop_gradient(observation) action = tf.stop_gradient(action) reward = tf.stop_gradient(reward) if hasattr(hparams, "rewards_preprocessing_fun"): reward = hparams.rewards_preprocessing_fun(reward) done = tf.stop_gradient(done) value = tf.stop_gradient(value) old_pdf = tf.stop_gradient(old_pdf) advantage = calculate_generalized_advantage_estimator( reward, value, done, hparams.gae_gamma, hparams.gae_lambda) discounted_reward = tf.stop_gradient(advantage + value) advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True) advantage_normalized = tf.stop_gradient( (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8)) add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)] number_of_batches = (hparams.epoch_length * hparams.optimization_epochs / hparams.optimization_batch_size) dataset = tf.data.Dataset.from_tensor_slices( (observation, action, discounted_reward, advantage_normalized, old_pdf)) dataset = dataset.shuffle(buffer_size=hparams.epoch_length, reshuffle_each_iteration=True) dataset = dataset.repeat(hparams.optimization_epochs) dataset = dataset.batch(hparams.optimization_batch_size) iterator = dataset.make_initializable_iterator() optimizer = get_optimiser(hparams) with tf.control_dependencies([iterator.initializer]): ppo_step_rets = tf.scan( lambda a, i: add_lists_elementwise( # pylint: disable=g-long-lambda a, define_ppo_step(iterator.get_next(), optimizer, hparams)), tf.range(number_of_batches), [0., 0., 0., 0., 0., 0.], parallel_iterations=1) ppo_summaries = [tf.reduce_mean(ret) / number_of_batches for ret in ppo_step_rets] summaries_names = ["policy_loss", "value_loss", "entropy_loss", "policy_gradient", "value_gradient", "entropy_gradient"] summaries = [tf.summary.scalar(summary_name, summary) for summary_name, summary in zip(summaries_names, ppo_summaries)] losses_summary = tf.summary.merge(summaries) for summary_name, summary in zip(summaries_names, ppo_summaries): losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ") return losses_summary
def testScan_Grad(self): with self.test_session(): elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data") v = tf.constant(2.0, name="v") r = tf.scan(lambda a, x: tf.mul(a, x), elems, initializer=v) r = tf.gradients(r, v)[0] self.assertAllEqual(873.0, r.eval())
def testScanShape(self): with self.test_session(): x = tf.constant([[1, 2, 3], [4, 5, 6]]) def fn(_, current_input): return current_input initializer = tf.constant([0, 0, 0]) y = tf.scan(fn, x, initializer=initializer) self.assertAllEqual(y.get_shape(), y.eval().shape)
def testScan_Control(self): with self.test_session() as sess: s = tf.placeholder(tf.float32, shape=[None]) b = tf.placeholder(tf.bool) with tf.control_dependencies([b]): c = tf.scan(lambda a, x: x * a, s) self.assertAllClose(np.array([1.0, 3.0, 9.0]), sess.run(c, {s: [1, 3, 3], b: True}))
def _compute_hidden(self): """ Compute vanilla-RNN states and predictions. """ with tf.variable_scope('states'): states = tf.scan(self.rnn_step, self.inputs, initializer=self.initial_state, name='states') return states
def fast_dlstm(self, s_t, state_in, lstm, chunks, h_size): def get_sub_state(state, state_step): c, h = state chunk_step_size = h_size // chunks h_step = state_step * chunk_step_size sub_state_h = h[:, h_step: h_step + chunk_step_size] sub_state_c = c[:, h_step: h_step + chunk_step_size] sub_state_h.set_shape([1, chunk_step_size]) sub_state_c.set_shape([1, chunk_step_size]) sub_state = tf.contrib.rnn.LSTMStateTuple(sub_state_c, sub_state_h) return sub_state def build_new_state(new_sub_state, previous_state, state_step): c_previous_state, h_previous_state = previous_state c_new_sub_state, h_new_sub_state = new_sub_state h_slices = [] c_slices = [] chunk_step_size = h_size // chunks one_hot_state_step = tf.one_hot(state_step, depth=chunks) for switch_step, h_step in zip(range(chunks), range(0, h_size, chunk_step_size)): is_this_current_step = one_hot_state_step[switch_step] h_s = self.conditional_sub_state(is_this_current_step, h_new_sub_state, h_previous_state[:, h_step: h_step + chunk_step_size]) h_s.set_shape([1, chunk_step_size]) c_s = self.conditional_sub_state(is_this_current_step, c_new_sub_state, c_previous_state[:, h_step: h_step + chunk_step_size]) c_s.set_shape([1, chunk_step_size]) h_slices.append(h_s) c_slices.append(c_s) h_new_state = tf.concat(h_slices, axis=1) c_new_state = tf.concat(c_slices, axis=1) new_state = tf.contrib.rnn.LSTMStateTuple(c_new_state, h_new_state) return new_state def dlstm_scan_fn(previous_output, current_input): # out, state_out = lstm(current_input, previous_output[1]) state_step = previous_output[2] sub_state = get_sub_state(previous_output[1], state_step) out, sub_state_out = lstm(current_input, sub_state) state_out = build_new_state(sub_state_out, previous_output[1], state_step) state_step += tf.constant(1) new_state_step = tf.mod(state_step, chunks) return out, state_out, new_state_step chunk_step_size = h_size // chunks first_input = state_in.c[:, 0: chunk_step_size] rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=( first_input, state_in, tf.constant(0)), name="dlstm") return rnn_outputs, final_states
def testScan_SingleInputMultiOutput(self): with self.test_session() as sess: elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]) initializer = (np.array(1.0), np.array(-1.0)) r = tf.scan(lambda a, x: (a[0] * x, -a[1] * x), elems, initializer) r_value = sess.run(r) self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r_value[0]) self.assertAllEqual([1.0, -2.0, 6.0, -24.0, 120.0, -720.0], r_value[1])
def test_scan(self): with self.test_session() as sess: ed.set_seed(42) op = tf.scan(lambda a, x: a + x, tf.constant([2.0, 3.0, 1.0])) copy_op = ed.copy(op) result_copy, result = sess.run([copy_op, op]) self.assertAllClose(result_copy, [2.0, 5.0, 6.0]) self.assertAllClose(result, [2.0, 5.0, 6.0])
def discounted_return(reward, length, discount): """Discounted Monte-Carlo returns.""" timestep = tf.range(reward.shape[1].value) mask = tf.cast(timestep[None, :] < length[:, None], tf.float32) return_ = tf.reverse(tf.transpose(tf.scan( lambda agg, cur: cur + discount * agg, tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]), tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1]) return tf.check_numerics(tf.stop_gradient(return_), 'return')
def testScan_Scoped(self): with self.test_session() as sess: with tf.variable_scope("root") as varscope: elems = tf.constant([1, 2, 3, 4, 5, 6], name="data") r = tf.scan(simple_scoped_fn, elems) # Check that we have the one variable we asked for here. self.assertEqual(len(tf.trainable_variables()), 1) self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0") sess.run([tf.initialize_all_variables()]) results = np.array([1, 6, 18, 44, 98, 208]) self.assertAllEqual(results, r.eval()) # Now let's reuse our single variable. varscope.reuse_variables() r = tf.scan(simple_scoped_fn, elems, initializer=2) self.assertEqual(len(tf.trainable_variables()), 1) results = np.array([6, 16, 38, 84, 178, 368]) self.assertAllEqual(results, r.eval())
def runScan(self, n): elems = np.arange(n) start_time = time.time() _ = tf.scan(lambda a, x: a + x, elems, parallel_iterations=1) wall_time = time.time() - start_time self.report_benchmark( name='scan', iters=n, wall_time=wall_time)
def bayesian_rnn(cell, inputs, y_i): initializer_c_h = (tf.zeros([batch_size, embedding_size]), tf.zeros([batch_size, embedding_size])) c_list, h_list = tf.scan(cell, inputs, initializer=initializer_c_h) return h_list
def forward_batch(observations, transitions, viterbi=False, return_alpha=False, return_best_sequence=False): """ Takes as input: - observations, sequence of shape (batch_size, n_steps, n_classes) - transitions, sequence of shape (n_classes, n_classes) Probabilities must be given in the log space. Compute alpha, matrix of size (n_steps, batch_size n_classes), such that alpha[i, j] represents one of these 2 values: - the probability that the real path at node i ends in j - the maximum probability of a path finishing in j at node i (Viterbi) Returns one of these 2 values: - alpha - the final probability, which can be: - the sum of the probabilities of all paths - the probability of the best path (Viterbi) """ assert not return_best_sequence or (viterbi and not return_alpha) shape_t = transitions.get_shape().dims transitions_ = tf.reshape(transitions, (1, shape_t[0].value, shape_t[1].value)) def recurrence(prev, obs): previous = prev if return_best_sequence: previous = prev[0] shape_ = tf.shape(previous) previous = tf.reshape(previous, (shape_[0], shape_t[0].value, 1)) obs = tf.reshape(obs, (shape_[0], 1, shape_t[0].value)) if viterbi: scores = previous + obs + transitions_ out = tf.reduce_max(scores, axis=1) if return_best_sequence: out2 = tf.argmax(scores, axis=1) return [out, out2] else: return out else: return log_sum_exp(previous + obs + transitions, axis=1) obs = tf.transpose(observations, (1, 0, 2)) initial = obs[0] ones = tf.ones(tf.shape(initial), dtype=tf.int64) if return_best_sequence: initial = [initial, ones] alpha = tf.scan( fn=recurrence, elems=obs[1:], initializer=initial ) if return_alpha: return alpha elif return_best_sequence: output_info = get_array_arg_max_coordinate(tf.cast(tf.argmax(alpha[0][-1], axis=1), tf.int32)) def recurrence_cal(prev, x): sequ = tf.gather_nd(x, prev) return get_array_arg_max_coordinate(sequ) sequence = tf.scan( fn=recurrence_cal, elems=tf.cast(alpha[1][::-1], tf.int32), initializer=output_info ) sequence = sequence[:, :, -1] sequence = tf.concat([sequence[::-1], [tf.cast(tf.argmax(alpha[0][-1], axis=1), tf.int32)]], axis=0) return tf.transpose(sequence) else: if viterbi: return tf.reduce_max(alpha[-1], axis=1) else: return log_sum_exp(alpha[-1], axis=1)
def det_loop_batch(outputs, inputs): # inputs: num_time_dims x dim_latent x dim_latent # now scan over over time out = tf.scan(fn=det_loop_time, elems=inputs, initializer=0.0) return out
def build_actor(agent, env, level_name, action_set): """Builds the actor loop.""" # Initial values. initial_env_output, initial_env_state = env.initial() initial_agent_state = agent.initial_state(1) initial_action = tf.zeros([1], dtype=tf.int32) dummy_agent_output, _ = agent( (initial_action, nest.map_structure(lambda t: tf.expand_dims(t, 0), initial_env_output)), initial_agent_state) initial_agent_output = nest.map_structure( lambda t: tf.zeros(t.shape, t.dtype), dummy_agent_output) # All state that needs to persist across training iterations. This includes # the last environment output, agent state and last agent output. These # variables should never go on the parameter servers. def create_state(t): # Creates a unique variable scope to ensure the variable name is unique. with tf.variable_scope(None, default_name='state'): return tf.get_local_variable(t.op.name, initializer=t, use_resource=True) persistent_state = nest.map_structure( create_state, (initial_env_state, initial_env_output, initial_agent_state, initial_agent_output)) def step(input_, unused_i): """Steps through the agent and the environment.""" env_state, env_output, agent_state, agent_output = input_ # Run agent. action = agent_output[0] batched_env_output = nest.map_structure(lambda t: tf.expand_dims(t, 0), env_output) agent_output, agent_state = agent((action, batched_env_output), agent_state) # Convert action index to the native action. action = agent_output[0][0] raw_action = tf.gather(action_set, action) env_output, env_state = env.step(raw_action, env_state) return env_state, env_output, agent_state, agent_output # Run the unroll. `read_value()` is needed to make sure later usage will # return the first values and not a new snapshot of the variables. first_values = nest.map_structure(lambda v: v.read_value(), persistent_state) _, first_env_output, first_agent_state, first_agent_output = first_values # Use scan to apply `step` multiple times, therefore unrolling the agent # and environment interaction for `FLAGS.unroll_length`. `tf.scan` forwards # the output of each call of `step` as input of the subsequent call of `step`. # The unroll sequence is initialized with the agent and environment states # and outputs as stored at the end of the previous unroll. # `output` stores lists of all states and outputs stacked along the entire # unroll. Note that the initial states and outputs (fed through `initializer`) # are not in `output` and will need to be added manually later. output = tf.scan(step, tf.range(FLAGS.unroll_length), first_values) _, env_outputs, _, agent_outputs = output # Update persistent state with the last output from the loop. assign_ops = nest.map_structure(lambda v, t: v.assign(t[-1]), persistent_state, output) # The control dependency ensures that the final agent and environment states # and outputs are stored in `persistent_state` (to initialize next unroll). with tf.control_dependencies(nest.flatten(assign_ops)): # Remove the batch dimension from the agent state/output. first_agent_state = nest.map_structure(lambda t: t[0], first_agent_state) first_agent_output = nest.map_structure(lambda t: t[0], first_agent_output) agent_outputs = nest.map_structure(lambda t: t[:, 0], agent_outputs) # Concatenate first output and the unroll along the time dimension. full_agent_outputs, full_env_outputs = nest.map_structure( lambda first, rest: tf.concat([[first], rest], 0), (first_agent_output, first_env_output), (agent_outputs, env_outputs)) output = ActorOutput( level_name=level_name, agent_state=first_agent_state, env_outputs=full_env_outputs, agent_outputs=full_agent_outputs) # No backpropagation should be done here. return nest.map_structure(tf.stop_gradient, output)
def _graph_fn_step(self): if get_backend() == "tf": def scan_func(accum, time_delta): # Not needed: preprocessed-previous-states (tuple!) # `state` is a tuple as well. See comment in ctor for why tf cannot use ContainerSpaces here. internal_states = None state = accum[1] if self.has_rnn: internal_states = accum[-1] state = tuple(tf.convert_to_tensor(value=s) for s in state) flat_state = OrderedDict() for i, flat_key in enumerate( self.state_space_actor_flattened.keys()): # Add a simple (size 1) batch rank to the state so it'll pass through the NN. # - Also have to add a time-rank for RNN processing. expanded = state[i] for _ in range(1 if self.has_rnn is False else 2): expanded = tf.expand_dims(input=expanded, axis=0) # Make None so it'll be recognized as batch-rank by the auto-Space detector. flat_state[flat_key] = tf.placeholder_with_default( input=expanded, shape=(None, ) + ((None, ) if self.has_rnn is True else ()) + self.state_space_actor_list[i].shape) # Recreate state as the original Space to pass it into the actor-component. state = unflatten_op(flat_state) # Get action and preprocessed state (as batch-size 1). out = (self.actor_component.get_preprocessed_state_and_action if self.add_action_probs is False else self.actor_component. get_preprocessed_state_action_and_action_probs)( state, # Add simple batch rank to internal_states. None if internal_states is None else DataOpTuple( internal_states), # <- None for non-RNN systems time_precentage=(((self.time_step + time_delta) / self.max_timesteps) if self.max_timesteps is not None else None)) # Get output depending on whether it contains internal_states or not. a = out["action"] action_probs = out.get("action_probs") current_internal_states = out.get("last_internal_states") # Strip the batch (and maybe time) ranks again from the action in case the Env doesn't like it. a_no_extra_ranks = a[0, 0] if self.has_rnn is True else a[0] # Step through the Env and collect next state (tuple!), reward and terminal as single values # (not batched). out = self.environment_server.step_flow(a_no_extra_ranks) s_, r, t_ = out[:-2], out[-2], out[-1] r = tf.cast(r, dtype="float32") # Add a and/or r to next_state? if self.add_previous_action_to_state is True: assert isinstance( s_, tuple ), "ERROR: Cannot add previous action to non tuple!" s_ = s_ + (a_no_extra_ranks, ) if self.add_previous_reward_to_state is True: assert isinstance( s_, tuple ), "ERROR: Cannot add previous reward to non tuple!" s_ = s_ + (r, ) # Note: s_ is packed as tuple. ret = [t_, s_] + \ ([a_no_extra_ranks] if self.add_action else []) + \ ([r] if self.add_reward else []) + \ ([(action_probs[0][0] if self.has_rnn is True else action_probs[0])] if self.add_action_probs is True else []) + \ ([tuple(current_internal_states)] if self.has_rnn is True else []) return tuple(ret) # Initialize the tf.scan run. initializer = [ # terminals tf.zeros(shape=(), dtype=tf.bool), # current (raw) state (flattened components if ContainerSpace). tuple( map(lambda x: x.read_value(), self.current_state.values())) ] # Append actions and rewards if needed. if self.add_action: initializer.append( tf.zeros(shape=self.action_space.shape, dtype=self.action_space.dtype)) if self.add_reward: initializer.append(tf.zeros(shape=self.reward_space.shape)) # Append action probs if needed. if self.add_action_probs is True: initializer.append( tf.zeros(shape=self.action_probs_space.shape)) # Append internal states if needed. if self.current_internal_states is not None: initializer.append( tuple( tf.placeholder_with_default( internal_s.read_value(), shape=(None, ) + tuple(internal_s.shape.as_list()[1:])) for internal_s in self.current_internal_states.values())) # Scan over n time-steps (tf.range produces the time_delta with respect to the current time_step). # NOTE: Changed parallel to 1, to resolve parallel issues. step_results = list( tf.scan(fn=scan_func, elems=tf.range(self.num_steps, dtype="int32"), initializer=tuple(initializer), back_prop=False)) # Assign all values that need to be passed again into the next scan. assigns = [tf.assign_add(self.time_step, self.num_steps)] # time step # State (or flattened state components). for flat_key, var_ref, state_comp in zip( self.state_space_actor_flattened.keys(), self.current_state.values(), step_results[1]): assigns.append(self.assign_variable( var_ref, state_comp[-1])) # -1: current state (last observed) # Current internal state. if self.current_internal_states is not None: # TODO: What if internal states is not the last item in the list anymore due to some change. slot = -1 # TODO: What if internal states is a dict? Right now assume some tuple. # Remove batch rank from internal states again. internal_states_wo_batch = list() for i, var_ref in enumerate( self.current_internal_states.values( )): #range(len(step_results[slot])): # 1=batch axis (which has dim=1); 0=time axis. internal_states_component = tf.squeeze( step_results[slot][i], axis=1) assigns.append( self.assign_variable(var_ref, internal_states_component[-1:])) internal_states_wo_batch.append(internal_states_component) step_results[slot] = tuple(internal_states_wo_batch) # Concatenate first and rest (and make the concatenated tensors (which are the important return information) # dependent on the assigns). with tf.control_dependencies(control_inputs=assigns): full_results = [] for slot in range(len(step_results)): first_values, rest_values = initializer[ slot], step_results[slot] # Internal states need a slightly different concatenating as the batch rank is missing. if self.current_internal_states is not None and slot == len( step_results) - 1: full_results.append( nest.map_structure(self._concat, first_values, rest_values)) # States need concatenating (first state needed). elif slot == 1: full_results.append( nest.map_structure( lambda first, rest: tf.concat([[first], rest], axis=0), first_values, rest_values)) # Everything else does not need concatenating (saves one op). else: full_results.append(step_results[slot]) # Re-build DataOpDicts of states (from tuple right now). rebuild_s = DataOpDict() for flat_key, var_ref, s_comp in zip( self.state_space_actor_flattened.keys(), self.current_state.values(), full_results[1]): rebuild_s[flat_key] = s_comp rebuild_s = unflatten_op(rebuild_s) full_results[1] = rebuild_s # Let the auto-infer system know, what time rank we have. full_results = DataOpTuple(full_results) for o in flatten_op(full_results).values(): o._time_rank = 0 # which position in the shape is the time-rank? return full_results
def cumsum(x): return tf.scan(lambda a, x: a + x, x)
def v_trace_estimation(self, value_preds, actions, dones, rewards, logits): """Calculates V-trace actor critic targets. Arguments: value_preds: state_value estimated by current policy. Note that one more state_value is appended to value_preds. actions: action sampled by behaviour policy. dones: terminal signal. rewards:: immediate reward return by env. logits: value of logits given by behaviour policy. Returns: remedied value-target and state-action dependent estimator of advantage """ discount = self.config.get('discount', 0.99) discounts = tf.to_float(~tf.cast(dones, tf.bool)) * discount with tf.device("/cpu:0"): behaviour_log_p = -tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=actions) target_log_p = self.action_dist.log_p(action=actions) log_rhos = target_log_p - behaviour_log_p log_rhos = log_rhos[:-1] rhos = tf.exp(log_rhos) rho_clipping = self.config.get('rho_clipping', None) if rho_clipping: clipped_rhos = tf.minimum(tf.cast(rho_clipping, tf.float32), rhos) else: clipped_rhos = rhos pg_rho_clipping = self.config.get('pg_rho_clipping', None) if pg_rho_clipping: clipped_pg_rhos = tf.minimum( tf.cast(pg_rho_clipping, tf.float32), rhos) else: clipped_pg_rhos = rhos cs = tf.minimum(1.0, rhos) next_state_value = value_preds[1:] state_value = value_preds[:-1] last_state_value = value_preds[-1] deltas = clipped_rhos * (rewards + discounts * next_state_value - state_value) # V-trace vs are calculated through a scan from the back to the beginning # of the given trajectory. sequences = ( tf.reverse(discounts, axis=[0]), tf.reverse(cs, axis=[0]), tf.reverse(deltas, axis=[0]), ) def scanfunc(acc, sequence_item): discount_t, c_t, delta_t = sequence_item return delta_t + discount_t * c_t * acc initial_values = tf.zeros_like(last_state_value) vs_minus_v_xs = tf.scan(fn=scanfunc, elems=sequences, initializer=initial_values, parallel_iterations=1, back_prop=False, name='scan') # Reverse the results back to original order. vs_minus_v_xs = tf.reverse(vs_minus_v_xs, [0], name='vs_minus_v_xs') # Add V(x_s) to get v_s. vs = tf.add(vs_minus_v_xs, state_value, name='vs') # Advantage for policy gradient. vs_t_plus_1 = tf.concat( [vs[1:], tf.expand_dims(last_state_value, 0)], axis=0) pg_advantages = (clipped_pg_rhos * (rewards + discounts * vs_t_plus_1 - state_value)) advantages = tf.stop_gradient(pg_advantages) value_target = tf.stop_gradient(vs) return value_target, advantages
def construct_batched_adjacency_and_feature_matrices( size, adj_row, adj_column, adj_values, adj_elem_len, adj_degrees, feature_row, feature_column, feature_values, feature_elem_len, input_dim, max_degree=5, normalize=True, split_adj=False, ): """ Constructs a batched, sparse adjacency matrix. For example to make a batch of two adjacency matrices of 2 and 3 nodes: ``` Example: >>> # first adjacency matrix: [[1, 1], [1, 1]] >>> # second adjacency matrix: [[1, 1, 0], [1, 1, 1], [0, 1, 1]] >>> import tensorflow as tf >>> tf.enable_eager_execution() >>> size = tf.contant([2, 3], tf.int64) >>> adj_row = tf.constant([0, 0, 1, 1, 0, 0, 1, 1, 1, 2, 2], tf.int64) >>> adj_column = tf.constant([0, 1, 0, 1, 0, 1, 0, 1, 2, 1, 2], tf.int64) >>> adj_values = tf.constant([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], tf.float32) >>> adj_elem_len = tf.constant([2, 3], tf.int64) >>> feature_row = tf.constant([0, 1, 0, 1, 2], tf.int64) >>> feature_column = tf.constant([2, 3, 1, 2, 3], tf.int64) >>> feature_values = tf.constant([4, 5, 1, 2, 3], tf.int64) >>> feature_elem_len = tf.constant([2, 3], tf.int64) >>> diagonalized_adj, feature_mat = construct_batched_adjacency_matrix(size, adj_row, adj_column, adj_values, adj_elem_len, adj_degrees, feature_row, feature_column, feature_values, feature_elem_len, 10, normalize=False, split_adj=False) >>> tf.sparse.to_dense(diagonalized_adj[0]).numpy() array([[1., 1., 0., 0., 0]., [1., 1., 0., 0., 0]., [0., 0., 1., 1., 0]., [0., 0., 1., 1., 1]., [0., 0., 0., 1., 1]] >>> feature_mat.numpy() array([[0, 0, 4, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 3, 0, 0, 0, 0, 0, 0], dtype=int32) Parameters: size: sizes of adjacency matrices. adj_row: concatenated row indices of all matrices in a batch. adj_column: concatenated column indices of all matrices in a batch. adj_values: concatenated values of elements of all matrices in a batch adj_elem_len: number of non-zero elements in all matrices. adj_degrees: degree of each node feature_row: concatenated row indices of all feature matrices. feature_column: concatenated column indices of all feature matrices. feature_values: concatenated values in all feature matrices. feature_elem_len: number of non-zero elements n all feature matrices. input_dim: dimension of each node in feature matrices. max_degree: normalize: normalizes the adjacency matrix if True. split_adj: splits the adjacency matrix based on degrees of nodes if True. Returns: Batched adjacency matrix. """ with tf.device("/cpu:0"): cumsum = tf.cumsum(size) adj_row = adj_row adj_col = adj_column adj_elem_len = adj_elem_len start = tf.cumsum(adj_elem_len, exclusive=True) offset = tf.cumsum(size, exclusive=True) start_size_offset = tf.stack([start, adj_elem_len, offset], 1) def offset_index(row_or_column): def split_sum(a, x): padded_index = tf.concat( [ tf.zeros(x[0], tf.int64), row_or_column[x[0]:x[0] + x[1]] + x[2], tf.zeros( tf.shape(row_or_column, out_type=tf.int64)[0] - x[0] - x[1], tf.int64, ), ], 0, ) return padded_index return split_sum with tf.device("/cpu:0"): padded_rows = tf.scan(offset_index(adj_row), start_size_offset, initializer=adj_row) padded_columns = tf.scan(offset_index(adj_col), start_size_offset, initializer=adj_col) diagonal_row = tf.reduce_sum(padded_rows, axis=0) diagonal_col = tf.reduce_sum(padded_columns, axis=0) adj_shape = [tf.reduce_sum(size), tf.reduce_sum(size)] if normalize: diagonalized_adj = tf.SparseTensor( indices=tf.transpose(tf.stack([diagonal_row, diagonal_col])), values=adj_values, dense_shape=adj_shape, ) degree_hat = tf.sparse.reduce_sum(diagonalized_adj, axis=0) diagonalized_adj = (diagonalized_adj / tf.sqrt(degree_hat) / tf.expand_dims(tf.sqrt(degree_hat), 1)) diagonalized_adj = [diagonalized_adj] # number of channel is 1. elif split_adj: adj_degrees = adj_degrees # degree is the number of edges on each node, including the one to itself. # A node with degree 1 is not connected with any other node. adj_degrees = tf.clip_by_value(adj_degrees, 0, max_degree) diagonalized_adj = [] for degree in range(1, max_degree + 1): row_deg = tf.boolean_mask(diagonal_row, tf.equal(adj_degrees, degree)) row_col = tf.boolean_mask(diagonal_col, tf.equal(adj_degrees, degree)) diagonalized_adj.append( tf.SparseTensor( indices=tf.transpose(tf.stack([row_deg, row_col])), values=tf.boolean_mask(adj_values, tf.equal(adj_degrees, degree)), dense_shape=adj_shape, )) diagonalized_adj.append(tf.sparse.eye( adj_shape[0])) # connection to self else: diagonalized_adj = tf.SparseTensor( indices=tf.transpose(tf.stack([diagonal_row, diagonal_col])), values=adj_values, dense_shape=adj_shape, ) diagonalized_adj = [diagonalized_adj] start_feature = tf.cumsum(feature_elem_len, exclusive=True) start_size_offset_feature = tf.stack( [start_feature, feature_elem_len, offset], 1) with tf.device("/cpu:0"): padded_rows_feature = tf.scan( offset_index(feature_row), start_size_offset_feature, initializer=feature_row, ) stacked_row = tf.reduce_sum(padded_rows_feature, axis=0) net = tf.SparseTensor( indices=tf.transpose(tf.stack([stacked_row, feature_column])), values=feature_values, dense_shape=[tf.reduce_sum(size), input_dim], ) net = tf.sparse_reorder(net) net = tf.sparse_tensor_to_dense(net) return diagonalized_adj, net
def qr_layer(inputs, n_units, filter_width=2, pool_type='ifo', kernel_regularizer=None): """ Simple QR layer implementation Parameters ---------- inputs n_units: Number of QR units / dimension of output vectors. filter_width: Time-dimension width of the convolution filters. pool_type: Type of the QR pooling. Affects number of parameters / parallel convolutions. Has to be one of ['f', 'fo', 'ifo']. kernel_regularizer: Parameter passed to the interval convolutions. """ if pool_type not in ['f', 'fo', 'ifo']: raise ValueError("Pool type must be one of 'f', 'fo', 'ifo'") paddings = [[0, 0], [filter_width - 1, 0], [0, 0]] inputs = tf.pad(inputs, paddings=paddings) inputs = tf.expand_dims(inputs, -1) input_dim = inputs.shape[2] # Candidate vectors candidate = tf.layers.conv2d(inputs, n_units, (filter_width, input_dim), activation=tf.tanh, kernel_regularizer=kernel_regularizer) candidate = tf.squeeze(candidate, axis=2) # Forget gate forget_gate = tf.layers.conv2d(inputs, n_units, (filter_width, input_dim), activation=tf.sigmoid, kernel_regularizer=kernel_regularizer) forget_gate = tf.squeeze(forget_gate, axis=2) # Input gate if 'i' in pool_type: input_gate = tf.layers.conv2d(inputs, n_units, (filter_width, input_dim), activation=tf.sigmoid) input_gate = tf.squeeze(input_gate, axis=2) else: input_gate = 1 - forget_gate # Recurrent part of the calculation of c # Prepare for tf.scan forget_gate = tf.transpose(forget_gate, [1, 0, 2]) input_gate = tf.transpose(input_gate, [1, 0, 2]) candidate = tf.transpose(candidate, [1, 0, 2]) initializer = tf.zeros(tf.shape(forget_gate)[1:], tf.float32) # Apply reccurent step hidden = tf.scan(_recurrent_step, (forget_gate, input_gate, candidate), initializer=initializer) # Return to proper shape hidden = tf.transpose(hidden, [1, 0, 2]) # Calculate outputs if 'o' in pool_type: # Output gate output_gate = tf.layers.conv2d(inputs, n_units, (filter_width, input_dim), activation=tf.sigmoid, kernel_regularizer=kernel_regularizer) output_gate = tf.squeeze(output_gate, axis=2) outputs = output_gate * hidden else: outputs = hidden return outputs
def forward(self, x): """ :param x: The input to the hidden layer. :return: The values after performing forward propagation in this layer. """ return tf.scan(fn=self.recurrence, elems=x, initializer=self.h0)
# output, state = RNNConvCell(x_i, state) # else: # output, state = RNNCell(x_i, state) # outputs.append(output) """ Use tf.scan instead of naive for loop Remark: scan->a(t) = fn(a(t-1), x), the first param fn(a,x) in scan function, 'a' stands for a recursive op and 'x'stands for a input at some time, but in this example, we need cell(rnn_input, state) as scan function, so we use lambda function transform Cell(rnn_input, state) to Cell(state, rnn_input) with responding to fn(a, x) in scan function. And notes that Cell function return (output, state), so we actuallly need send a[1] which means state to the fn's first input """ outputs = tf.scan(lambda a, x: RNNCell(x, a[1]), tf.transpose(x, [1, 0, 2]), initializer=(state, state))[0] # Linear activation, using rnn inner loop last output preds = [] cost = [] for out_idx in xrange(n_steps - 1, n_steps - loss_len - 1, -1): pred = tf.matmul(outputs[out_idx], weights['out']) + biases['out'] preds.append(pred) # Define loss and optimizer cost.append(tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))) cost = tf.reduce_sum(cost) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Evaluate model
def __init__(self, sequence_length, num_classes, embedding_size, filter_sizes, num_filters, embedding_size_lex, attention_depth_w2v, attention_depth_lex, l2_reg_lambda=0.0, l1_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x_2c = tf.placeholder( tf.float32, [None, sequence_length, embedding_size, 2], name="input_x_2c") self.input_x = tf.placeholder(tf.float32, [None, sequence_length, embedding_size], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.input_x_lexicon = tf.placeholder( tf.float32, [None, sequence_length, embedding_size_lex], name="input_x_lexicon") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) l1_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.embedded_chars = self.input_x self.embedded_chars_expanded = tf.expand_dims( self.embedded_chars, -1) print self.embedded_chars_expanded # lexicon embedding self.embedded_chars_lexicon = self.input_x_lexicon self.embedded_chars_expanded_lexicon = tf.expand_dims( self.embedded_chars_lexicon, -1) print '[self.embedded_chars]', self.embedded_chars print '[self.embedded_chars_expanded]', self.embedded_chars_expanded print '[self.embedded_chars_lexicon]', self.embedded_chars_lexicon print '[self.embedded_chars_expanded_lexicon]', self.embedded_chars_expanded_lexicon attention_outputs = [] with tf.name_scope("pre-attention"): U_shape = [embedding_size, attention_depth_w2v] # (400, 60) self.U_w2v = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1), name="U_w2v") U_shape = [embedding_size_lex, attention_depth_lex] # (15, 60) self.U_lex = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1), name="U_lex") self.embedded_chars_tr = tf.batch_matrix_transpose( self.embedded_chars) self.embedded_chars_lexicon_tr = tf.batch_matrix_transpose( self.embedded_chars_lexicon) print '[self.embedded_chars_lexicon_tr]', self.embedded_chars_lexicon_tr def fn_matmul_w2v(previous_output, current_input): print(current_input.get_shape()) current_ouput = tf.matmul(current_input, self.U_w2v) print 'previous_output', previous_output print 'current_ouput', current_ouput return current_ouput def fn_matmul_lex(previous_output, current_input): print(current_input.get_shape()) current_ouput = tf.matmul(current_input, self.U_lex) print 'previous_output', previous_output print 'current_ouput', current_ouput return current_ouput initializer = tf.constant(np.zeros( [sequence_length, attention_depth_w2v]), dtype=tf.float32) WU_w2v = tf.scan(fn_matmul_w2v, self.embedded_chars, initializer=initializer) print '[WU_w2v]', WU_w2v initializer = tf.constant(np.zeros( [sequence_length, attention_depth_lex]), dtype=tf.float32) LU_lex = tf.scan(fn_matmul_lex, self.embedded_chars_lexicon, initializer=initializer) print '[LU_lex]', LU_lex WU_w2v_expanded = tf.expand_dims(WU_w2v, -1) print '[WU_w2v_expanded]', WU_w2v_expanded # (?, 60(seq_len), 60(depth), 1) w2v_pool = tf.nn.max_pool(WU_w2v_expanded, ksize=[1, 1, attention_depth_w2v, 1], strides=[1, 1, 1, 1], padding='VALID', name="w2v_pool") print '[w2v_pool]', w2v_pool # (?, 60(seq_len), 1, 1) #select attention for w2v LU_lex_expanded = tf.expand_dims(LU_lex, -1) print '[LU_lex_expanded]', LU_lex_expanded # (?, 60(seq_len), 60(depth), 1) lex_pool = tf.nn.max_pool(LU_lex_expanded, ksize=[1, 1, attention_depth_lex, 1], strides=[1, 1, 1, 1], padding='VALID', name="lex_pool") print '[lex_pool]', lex_pool # (?, 60(seq_len), 1, 1) #select attention for lex w2v_pool_sq = tf.expand_dims( tf.squeeze(w2v_pool, squeeze_dims=[2, 3]), -1) # (?, 60, 1) print '[w2v_pool_sq]', w2v_pool_sq lex_pool_sq = tf.expand_dims( tf.squeeze(lex_pool, squeeze_dims=[2, 3]), -1) # (?, 60, 1) print '[lex_pool_sq]', lex_pool_sq attentioned_w2v = tf.batch_matmul(self.embedded_chars_tr, w2v_pool_sq) attentioned_lex = tf.batch_matmul(self.embedded_chars_lexicon_tr, lex_pool_sq) attentioned_w2v_sq = tf.squeeze(attentioned_w2v, squeeze_dims=[2]) attentioned_lex_sq = tf.squeeze(attentioned_lex, squeeze_dims=[2]) print '[attentioned_w2v]', attentioned_w2v_sq print '[attentioned_lex]', attentioned_lex_sq attention_outputs.append(attentioned_w2v_sq) attention_outputs.append(attentioned_lex_sq) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 2, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") # l2_loss += tf.nn.l2_loss(W)/1000 # l2_loss += tf.nn.l2_loss(b)/1000 conv = tf.nn.conv2d(self.input_x_2c, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) self.appended_pool = tf.concat( 1, [self.h_pool_flat, attention_outputs[0], attention_outputs[1]]) print '[self.appended_pool]', self.appended_pool num_filters_total = num_filters_total + embedding_size + embedding_size_lex # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.appended_pool, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) / 30 l2_loss += tf.nn.l2_loss(b) / 30 l1_loss += tf.reduce_sum(tf.abs(W)) l1_loss += tf.reduce_sum(tf.abs(b)) self._b = b self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # CalculateMean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( self.scores, self.input_y) self.loss = tf.reduce_mean( losses) + l2_reg_lambda * l2_loss + l1_reg_lambda * l1_loss # Accuracy with tf.name_scope("accuracy"): self.golds = tf.argmax(self.input_y, 1, name="golds") correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") with tf.name_scope("avg_f1"): self.golds = tf.argmax(self.input_y, 1, name="golds") self.preds = self.predictions # positive recall pos_gold_sel = tf.equal(self.golds, 2) # positive_gold posg_golds = tf.boolean_mask(self.golds, pos_gold_sel) posg_preds = tf.boolean_mask(self.preds, pos_gold_sel) correct_predictions_pr = tf.equal(posg_golds, posg_preds) pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"), name="pos_recall") # positive precision pos_pred_sel = tf.equal(self.preds, 2) # positive_pred posp_golds = tf.boolean_mask(self.golds, pos_pred_sel) posp_preds = tf.boolean_mask(self.preds, pos_pred_sel) correct_predictions_pp = tf.equal(posp_golds, posp_preds) pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"), name="pos_precision") # negative recall neg_gold_sel = tf.equal(self.golds, 0) # positive_gold negg_golds = tf.boolean_mask(self.golds, neg_gold_sel) negg_preds = tf.boolean_mask(self.preds, neg_gold_sel) correct_predictions_nr = tf.equal(negg_golds, negg_preds) self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr, "float"), name="neg_recall") # negative precision neg_pred_sel = tf.equal(self.preds, 0) # positive_pred negp_golds = tf.boolean_mask(self.golds, neg_pred_sel) negp_preds = tf.boolean_mask(self.preds, neg_pred_sel) correct_predictions_np = tf.equal(negp_golds, negp_preds) self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np, "float"), name="neg_precision") self.f1_neg = 2 * self.neg_p * self.neg_r / (self.neg_p + self.neg_r) * 100 self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r) * 100 self.avg_f1 = (self.f1_neg + self.f1_pos) / 2
def memory_augmented_neural_network(input_var, target_var, \ batch_size=16, nb_class=5, memory_shape=(128, 40), \ controller_size=200, input_size=20 * 20, nb_reads=4): ## input_var has dimensions (batch_size, time, input_dim) ## target_var has dimensions (batch_size, time) (label indices) M_0 = shared_float32(1e-6 * np.ones((batch_size, ) + memory_shape), name='memory') c_0 = shared_float32(np.zeros((batch_size, controller_size)), name='memory_cell_state') h_0 = shared_float32(np.zeros((batch_size, controller_size)), name='hidden_state') r_0 = shared_float32(np.zeros((batch_size, nb_reads * memory_shape[1])), name='read_vector') wr_0 = shared_one_hot((batch_size, nb_reads, memory_shape[0]), name='wr') wu_0 = shared_one_hot((batch_size, memory_shape[0]), name='wu') def shape_high(shape): shape = np.array(shape) if isinstance(shape, int): high = np.sqrt(6. / shape) return (list(shape), high) else: high = np.sqrt(6. / (np.sum(shape[:2]) * np.prod(shape[2:]))) return (list(shape), high) with tf.variable_scope("Weights"): shape, high = shape_high((nb_reads, controller_size, memory_shape[1])) W_key = tf.get_variable('W_key', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) b_key = tf.get_variable('b_key', shape=(nb_reads, memory_shape[1]), initializer=tf.constant_initializer(0)) shape, high = shape_high((nb_reads, controller_size, memory_shape[1])) W_add = tf.get_variable('W_add', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) b_add = tf.get_variable('b_add', shape=(nb_reads, memory_shape[1]), initializer=tf.constant_initializer(0)) shape, high = shape_high((nb_reads, controller_size, 1)) W_sigma = tf.get_variable('W_sigma', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) b_sigma = tf.get_variable('b_sigma', shape=(nb_reads, 1), initializer=tf.constant_initializer(0)) shape, high = shape_high((input_size + nb_class, 4 * controller_size)) W_xh = tf.get_variable('W_xh', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) b_h = tf.get_variable('b_xh', shape=(4 * controller_size), initializer=tf.constant_initializer(0)) shape, high = shape_high( (controller_size + nb_reads * memory_shape[1], nb_class)) W_o = tf.get_variable('W_o', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) b_o = tf.get_variable('b_o', shape=(nb_class), initializer=tf.constant_initializer(0)) shape, high = shape_high( (nb_reads * memory_shape[1], 4 * controller_size)) W_rh = tf.get_variable('W_rh', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) shape, high = shape_high((controller_size, 4 * controller_size)) W_hh = tf.get_variable('W_hh', shape=shape, initializer=tf.random_uniform_initializer( -1 * high, high)) gamma = tf.get_variable('gamma', shape=[1], initializer=tf.constant_initializer(0.95)) def slice_equally(x, size, nb_slice): # type: (object, object, object) -> object return [x[:, n * size:(n + 1) * size] for n in range(nb_slice)] def step(xparameter1, x_t): #M_tm1, c_tm1, h_tm1, r_tm1, wr_tm1, wu_tm1 M_tm1, c_tm1, h_tm1, r_tm1, wr_tm1, wu_tm1 = xparameter1 with tf.variable_scope("Weights", reuse=True): W_key = tf.get_variable('W_key', shape=(nb_reads, controller_size, memory_shape[1])) b_key = tf.get_variable('b_key', shape=(nb_reads, memory_shape[1])) W_add = tf.get_variable('W_add', shape=(nb_reads, controller_size, memory_shape[1])) b_add = tf.get_variable('b_add', shape=(nb_reads, memory_shape[1])) W_sigma = tf.get_variable('W_sigma', shape=(nb_reads, controller_size, 1)) b_sigma = tf.get_variable('b_sigma', shape=(nb_reads, 1)) W_xh = tf.get_variable('W_xh', shape=(input_size + nb_class, 4 * controller_size)) b_h = tf.get_variable('b_xh', shape=(4 * controller_size)) W_o = tf.get_variable('W_o', shape=(controller_size + nb_reads * memory_shape[1], nb_class)) b_o = tf.get_variable('b_o', shape=(nb_class)) W_rh = tf.get_variable('W_rh', shape=(nb_reads * memory_shape[1], 4 * controller_size)) W_hh = tf.get_variable('W_hh', shape=(controller_size, 4 * controller_size)) gamma = tf.get_variable('gamma', shape=[1], initializer=tf.constant_initializer(0.95)) #pt = M_tm1[0:2] #pt = tf.Print(pt, [pt], message='Prinitng W_key: ') #x_t = tf.transpose(X_t, perm=[1, 0, 2])[ix] #with tf.control_dependencies([pt]): preactivations = tf.matmul(x_t, W_xh) + tf.matmul( r_tm1, W_rh) + tf.matmul(h_tm1, W_hh) + b_h gf_, gi_, go_, u_ = slice_equally(preactivations, controller_size, 4) gf = tf.sigmoid(gf_) gi = tf.sigmoid(gi_) go = tf.sigmoid(go_) u = tf.sigmoid(u_) c_t = gf * c_tm1 + gi * u h_t = go * tf.tanh(c_t) #(batch_size, controller_size) h_t_W_key = tf.matmul(h_t, tf.reshape(W_key, shape=(controller_size, -1))) k_t = tf.tanh( tf.reshape(h_t_W_key, shape=(batch_size, nb_reads, memory_shape[1])) + b_key) #(batch_size, nb_reads, memory_shape[1]) h_t_W_add = tf.matmul(h_t, tf.reshape(W_add, shape=(controller_size, -1))) a_t = tf.tanh( tf.reshape(h_t_W_add, shape=(batch_size, nb_reads, memory_shape[1])) + b_add) h_t_W_sigma = tf.matmul( h_t, tf.reshape(W_sigma, shape=(controller_size, -1))) sigma_t = tf.sigmoid( tf.reshape(h_t_W_sigma, shape=(batch_size, nb_reads, 1)) + b_sigma) #(batch_size, nb_reads, 1) _, temp_indices = tf.nn.top_k(wu_tm1, memory_shape[0]) wlu_tm1 = tf.slice(temp_indices, [0, 0], [batch_size, nb_reads]) #(batch_size, nb_reads) sigma_t_wr_tm_1 = tf.tile( sigma_t, tf.stack([1, 1, wr_tm1.get_shape().as_list()[2]])) ww_t = tf.reshape( sigma_t * wr_tm1, (batch_size * nb_reads, memory_shape[0])) #(batch_size*nb_reads, memory_shape[0]) #with tf.variable_scope("ww_t"): ww_t = update_tensor(ww_t, tf.reshape(wlu_tm1, [-1]), 1.0 - tf.reshape( sigma_t, shape=[-1])) #Update tensor done using index slicing ww_t = tf.reshape(ww_t, (batch_size, nb_reads, memory_shape[0])) with tf.variable_scope("M_t"): print('wlu_tm1 : ', wlu_tm1.get_shape().as_list()) M_t = update_tensor( M_tm1, wlu_tm1[:, 0], tf.constant(0., shape=[ batch_size, memory_shape[1] ])) #Update tensor done using sparse to dense M_t = tf.add( M_t, tf.matmul(tf.transpose(ww_t, perm=[0, 2, 1]), a_t)) #(batch_size, memory_size[0], memory_size[1]) K_t = cosine_similarity(k_t, M_t) wr_t = tf.nn.softmax( tf.reshape(K_t, (batch_size * nb_reads, memory_shape[0]))) wr_t = tf.reshape( wr_t, (batch_size, nb_reads, memory_shape[0])) #(batch_size, nb_reads, memory_size[0]) wu_t = gamma * wu_tm1 + tf.reduce_sum(wr_t, axis=1) + tf.reduce_sum( ww_t, axis=1) #(batch_size, memory_size[0]) r_t = tf.reshape(tf.matmul(wr_t, M_t), [batch_size, -1]) return [M_t, c_t, h_t, r_t, wr_t, wu_t] #Model Part: sequence_length_var = target_var.get_shape().as_list()[ 1] #length of the input output_shape_var = (batch_size * sequence_length_var, nb_class ) #(batch_size*sequence_length_vat,nb_class) # Input concat with time offset one_hot_target_flattened = tf.one_hot(tf.reshape(target_var, [-1]), depth=nb_class) one_hot_target = tf.reshape( one_hot_target_flattened, (batch_size, sequence_length_var, nb_class)) #(batch_size, sequence_var_length, nb_class) offset_target_var = tf.concat( [ tf.zeros_like(tf.expand_dims(one_hot_target[:, 0], 1)), one_hot_target[:, :-1] ], axis=1) #(batch_size, sequence_var_length, nb_class) l_input_var = tf.concat( [input_var, offset_target_var], axis=2) #(batch_size, sequence_var_length, input_size+nb_class) #ix = tf.variable(0,dtype=tf.int32) #cond = lambda M_0, c_0, h_0, r_0, wr_0, wu_0, ix: ix < sequence_length_var l_ntm_var = tf.scan( step, elems=tf.transpose(l_input_var, perm=[1, 0, 2]), initializer=[M_0, c_0, h_0, r_0, wr_0, wu_0], name="Scan_MANN_Last") #Set of all above parameters, as list l_ntm_output_var = tf.transpose( tf.concat(l_ntm_var[2:4], axis=2), perm=[1, 0, 2] ) #h_t & r_t, size=(batch_size, sequence_var_length, controller_size+nb_reads*memory_size[1]) l_input_var_W_o = tf.matmul( tf.reshape(l_ntm_output_var, shape=(batch_size * sequence_length_var, -1)), W_o) output_var_preactivation = tf.add( tf.reshape(l_input_var_W_o, (batch_size, sequence_length_var, nb_class)), b_o) output_var_flatten = tf.nn.softmax( tf.reshape(output_var_preactivation, output_shape_var)) output_var = tf.reshape(output_var_flatten, output_var_preactivation.get_shape().as_list()) #Parameters params = [ W_key, b_key, W_add, b_add, W_sigma, b_sigma, W_xh, W_rh, W_hh, b_h, W_o, b_o ] return output_var, output_var_flatten, params
x_place=tf.placeholder(tf.float32,[None,time_steps,element_size]) y_place=tf.placeholder(tf.float32,[None,num_classes]) with tf.name_scope('rnn_weights') as scope: wl=tf.Variable(tf.zeros((element_size,hidden_layer_size))) wh=tf.Variable(tf.zeros((hidden_layer_size,hidden_layer_size))) bl=tf.Variable(tf.zeros((hidden_layer_size))) def rnn_step(previous_hidden_layer,x): initial=tf.tanh(tf.matmul(x,wl)+tf.matmul(previous_hidden_layer,wh)+bl) return initial processed_inputs=tf.transpose(x_place,perm=[1,0,2]) print('processed_inputs ',processed_inputs.get_shape()) initial_hidden=tf.zeros((batch_size,hidden_layer_size)) all_hidden_states=tf.scan(rnn_step,processed_inputs,initializer=initial_hidden) print('all_hidden_states',all_hidden_states.get_shape()) with tf.name_scope('linear_weights') as scope: WL=tf.Variable(tf.truncated_normal((hidden_layer_size,num_classes))) BL=tf.Variable(tf.truncated_normal([num_classes])) def linear_layer(hidden_state): return tf.matmul(hidden_state,WL)+BL all_outputs=tf.map_fn(linear_layer,all_hidden_states) print('all_outputs',all_outputs.get_shape()) output=all_outputs[-1] print(output.get_shape()) with tf.name_scope('accuracy'): loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=y_place)) train_step=tf.train.RMSPropOptimizer(0.001,0.9).minimize(loss) accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output,1),tf.argmax(y_place,1)),tf.float32)) # finally the training part
def call(self, x, mask=None): # TODO: validate input shape assert (len(x) == 3) L_flat = x[0] mu = x[1] a = x[2] if self.mode == 'full': # Create L and L^T matrix, which we use to construct the positive-definite matrix P. L = None LT = None if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, L_acc, LT_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x) diag = K.exp(T.diag(x_)) + K.epsilon() x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag) return x_, x_.T outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) L, LT = results elif K.backend() == 'tensorflow': import tensorflow as tf # Number of elements in a triangular matrix. nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2 # Create mask for the diagonal elements in L_flat. This is used to exponentiate # only the diagonal elements, which is done before gathering. diag_indeces = [0] for row in range(1, self.nb_actions): diag_indeces.append(diag_indeces[-1] + (row + 1)) diag_mask = np.zeros(1 + nb_elems) # +1 for the leading zero diag_mask[np.array(diag_indeces) + 1] = 1 diag_mask = K.variable(diag_mask) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Create mask that can be used to gather elements from L_flat and put them # into a lower triangular matrix. tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1) # Finally, process each element of the batch. init = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] def fn(a, x): # Exponentiate everything. This is much easier than only exponentiating # the diagonal elements, and, usually, the action space is relatively low. x_ = K.exp(x) + K.epsilon() # Only keep the diagonal elements. x_ *= diag_mask # Add the original, non-diagonal elements. x_ += x * (1. - diag_mask) # Finally, gather everything into a lower triangular matrix. L_ = tf.gather(x_, tril_mask) return [L_, tf.transpose(L_)] tmp = tf.scan(fn, L_flat, initializer=init) if isinstance(tmp, (list, tuple)): # TensorFlow 0.10 now returns a tuple of tensors. L, LT = tmp else: # Old TensorFlow < 0.10 returns a shared tensor. L = tmp[:, 0, :, :] LT = tmp[:, 1, :, :] else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert L is not None assert LT is not None P = K.batch_dot(L, LT) elif self.mode == 'diag': if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, P_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x) return x_ outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), ] P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) elif K.backend() == 'tensorflow': import tensorflow as tf # Create mask that can be used to gather elements from L_flat and put them # into a diagonal matrix. diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except TypeError: # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Finally, process each element of the batch. def fn(a, x): x_ = tf.gather(x, diag_mask) return x_ P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions))) else: raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend())) assert P is not None assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). What we compute here is # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All # operations happen over the batch size, which is dimension 0. prod = K.batch_dot(K.expand_dims(a - mu, 1), P) prod = K.batch_dot(prod, K.expand_dims(a - mu, -1)) A = -.5 * K.batch_flatten(prod) assert K.ndim(A) == 2 return A
def tf_discounted_cumulative_reward(self, terminal, reward, discount=None, final_reward=0.0, horizon=0): """ Creates and returns the TensorFlow operations for calculating the sequence of discounted cumulative rewards for a given sequence of single rewards. Example: single rewards = 2.0 1.0 0.0 0.5 1.0 -1.0 terminal = False, False, False, False True False gamma = 0.95 final_reward = 100.0 (only matters for last episode (r=-1.0) as this episode has no terminal signal) horizon=3 output = 2.95 1.45 1.38 1.45 1.0 94.0 Args: terminal: Tensor (bool) holding the is-terminal sequence. This sequence may contain more than one True value. If its very last element is False (not terminating), the given `final_reward` value is assumed to follow the last value in the single rewards sequence (see below). reward: Tensor (float) holding the sequence of single rewards. If the last element of `terminal` is False, an assumed last reward of the value of `final_reward` will be used. discount (float): The discount factor (gamma). By default, take the Model's discount factor. final_reward (float): Reward value to use if last episode in sequence does not terminate (terminal sequence ends with False). This value will be ignored if horizon == 1 or discount == 0.0. horizon (int): The length of the horizon (e.g. for n-step cumulative rewards in continuous tasks without terminal signals). Use 0 (default) for an infinite horizon. Note that horizon=1 leads to the exact same results as a discount factor of 0.0. Returns: Discounted cumulative reward tensor with the same shape as `reward`. """ # By default -> take Model's gamma value if discount is None: discount = self.discount # Accumulates discounted (n-step) reward (start new if terminal) def cumulate(cumulative, reward_terminal_horizon_subtract): rew, is_terminal, is_over_horizon, sub = reward_terminal_horizon_subtract return tf.where( # If terminal, start new cumulation. condition=is_terminal, x=rew, y=tf.where( # If we are above the horizon length (H) -> subtract discounted value from H steps back. condition=is_over_horizon, x=(rew + cumulative * discount - sub), y=(rew + cumulative * discount) ) ) # Accumulates length of episodes (starts new if terminal) def len_(cumulative, term): return tf.where( condition=term, # Start counting from 1 after is-terminal signal x=tf.ones(shape=(), dtype=tf.int32), # Otherwise, increase length by 1 y=cumulative + 1 ) # Reverse, since reward cumulation is calculated right-to-left, but tf.scan only works left-to-right. reward = tf.reverse(tensor=reward, axis=(0,)) # e.g. -1.0 1.0 0.5 0.0 1.0 2.0 terminal = tf.reverse(tensor=terminal, axis=(0,)) # e.g. F T F F F F # Store the steps until end of the episode(s) determined by the input terminal signals (True starts new count). lengths = tf.scan(fn=len_, elems=terminal, initializer=0) # e.g. 1 1 2 3 4 5 off_horizon = tf.greater(lengths, tf.fill(dims=tf.shape(lengths), value=horizon)) # e.g. F F F F T T # Calculate the horizon-subtraction value for each step. if horizon > 0: horizon_subtractions = tf.map_fn(lambda x: (discount ** horizon) * x, reward, dtype=tf.float32) # Shift right by size of horizon (fill rest with 0.0). horizon_subtractions = tf.concat([np.zeros(shape=(horizon,)), horizon_subtractions], axis=0) horizon_subtractions = tf.slice(horizon_subtractions, begin=(0,), size=tf.shape(reward)) # e.g. 0.0, 0.0, 0.0, -1.0*g^3, 1.0*g^3, 0.5*g^3 # all 0.0 if infinite horizon (special case: horizon=0) else: horizon_subtractions = tf.zeros(shape=tf.shape(reward)) # Now do the scan, each time summing up the previous step (discounted by gamma) and # subtracting the respective `horizon_subtraction`. reward = tf.scan( fn=cumulate, elems=(reward, terminal, off_horizon, horizon_subtractions), initializer=final_reward if horizon != 1 else 0.0 ) # Re-reverse again to match input sequences. return tf.reverse(tensor=reward, axis=(0,))
ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd( ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast( tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(context * tf.transpose( tf.expand_dims(tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1) print("Done with building model") print("Training...") # Training # gold_standard: The real answers. gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer") with tf.variable_scope('accuracy'):
def construct_model(self, images, actions, rewards): """Build convolutional lstm video predictor using CDNA, or DNA. Args: images: list of tensors of ground truth image sequences there should be a 4D image ?xWxHxC for each timestep actions: list of action tensors each action should be in the shape ?x1xZ rewards: list of reward tensors each reward should be in the shape ?x1xZ Returns: gen_images: predicted future image frames gen_rewards: predicted future rewards latent_mean: mean of approximated posterior latent_std: std of approximated posterior Raises: ValueError: if more than 1 mask specified for DNA model. """ context_frames = self.hparams.video_num_input_frames batch_size = common_layers.shape_list(images)[1] ss_func = self.get_scheduled_sample_func(batch_size) def process_single_frame(prev_outputs, inputs): """Process a single frame of the video.""" cur_image, cur_reward, action = inputs time_step, prev_image, prev_reward, lstm_states = prev_outputs generated_items = [prev_image, prev_reward] groundtruth_items = [cur_image, cur_reward] done_warm_start = tf.greater(time_step, context_frames - 1) input_image, input_reward = self.get_scheduled_sample_inputs( done_warm_start, groundtruth_items, generated_items, ss_func) # Prediction pred_image, lstm_states = self.construct_predictive_tower( input_image, input_reward, action, lstm_states, latent) if self.hparams.reward_prediction: reward_input_image = pred_image if self.hparams.reward_prediction_stop_gradient: reward_input_image = tf.stop_gradient(reward_input_image) pred_reward = self.reward_prediction(reward_input_image, input_reward, action, latent) else: pred_reward = input_reward time_step += 1 outputs = (time_step, pred_image, pred_reward, lstm_states) return outputs # Latent tower latent = None if self.hparams.stochastic_model: latent_mean, latent_std = self.construct_latent_tower(images) latent = self.get_gaussian_latent(latent_mean, latent_std) # HACK: Do first step outside to initialize all the variables lstm_states = [None] * 7 inputs = images[0], rewards[0], actions[0] prev_outputs = (tf.constant(0), tf.zeros_like(images[0]), tf.zeros_like(rewards[0]), lstm_states) initializers = process_single_frame(prev_outputs, inputs) first_gen_images = tf.expand_dims(initializers[1], axis=0) first_gen_rewards = tf.expand_dims(initializers[2], axis=0) inputs = (images[1:-1], rewards[1:-1], actions[1:-1]) outputs = tf.scan(process_single_frame, inputs, initializers) gen_images, gen_rewards = outputs[1:3] gen_images = tf.concat((first_gen_images, gen_images), axis=0) gen_rewards = tf.concat((first_gen_rewards, gen_rewards), axis=0) return gen_images, gen_rewards, [latent_mean], [latent_std]
def rnnrbm(): # This function builds the RNN-RBM and returns the parameters of the model # The placeholder variable that holds our data x = tf.placeholder(tf.float32, [None, n_visible]) # The learning rate. We set and change this value during training. lr = tf.placeholder(tf.float32) # the batch size size_bt = tf.shape(x)[0] # Here we set aside the space for each of the variables. # We initialize these variables when we load saved parameters in rnn_rbm_train.py or rnn_rbm_generate.py W = tf.Variable(tf.zeros([n_visible, n_hidden]), name="W") Wuh = tf.Variable(tf.zeros([n_hidden_recurrent, n_hidden]), name="Wuh") Wuv = tf.Variable(tf.zeros([n_hidden_recurrent, n_visible]), name="Wuv") Wvu = tf.Variable(tf.zeros([n_visible, n_hidden_recurrent]), name="Wvu") Wuu = tf.Variable(tf.zeros([n_hidden_recurrent, n_hidden_recurrent]), name="Wuu") bh = tf.Variable(tf.zeros([1, n_hidden]), name="bh") bv = tf.Variable(tf.zeros([1, n_visible]), name="bv") bu = tf.Variable(tf.zeros([1, n_hidden_recurrent]), name="bu") u0 = tf.Variable(tf.zeros([1, n_hidden_recurrent]), name="u0") BH_t = tf.Variable(tf.zeros([1, n_hidden]), name="BH_t") BV_t = tf.Variable(tf.zeros([1, n_visible]), name="BV_t") def rnn_recurrence(u_tm1, sl): # Iterate through the data in the batch and generate the values of the RNN hidden nodes sl = tf.reshape(sl, [1, n_visible]) u_t = (tf.tanh(bu + tf.matmul(sl, Wvu) + tf.matmul(u_tm1, Wuu))) return u_t def visible_bias_recurrence(bv_t, u_tm1): # Iterate through the values of the RNN hidden nodes and generate the values of the visible bias vectors bv_t = tf.add(bv, tf.matmul(u_tm1, Wuv)) return bv_t def hidden_bias_recurrence(bh_t, u_tm1): # Iterate through the values of the RNN hidden nodes and generate the values of the hidden bias vectors bh_t = tf.add(bh, tf.matmul(u_tm1, Wuh)) return bh_t def generate_recurrence(count, k, u_tm1, primer, x, music): # This function builds and runs the gibbs steps for each RBM in the chain to generate music # Get the bias vectors from the current state of the RNN bv_t = tf.add(bv, tf.matmul(u_tm1, Wuv)) bh_t = tf.add(bh, tf.matmul(u_tm1, Wuh)) # Run the Gibbs step to get the music output. Prime the RBM with the previous musical output. x_out = RBM.gibbs_sample(primer, W, bv_t, bh_t, k=25) # Update the RNN hidden state based on the musical output and current hidden state. u_t = (tf.tanh(bu + tf.matmul(x_out, Wvu) + tf.matmul(u_tm1, Wuu))) # Add the new output to the musical piece music = tf.concat(axis=0, values=[music, x_out]) return count + 1, k, u_t, x_out, x, music def generate(num, x=x, size_bt=size_bt, u0=u0, n_visible=n_visible, prime_length=100): """ This function handles generating music. This function is one of the outputs of the build_rnnrbm function Args: num (int): The number of time steps to generate x (tf.placeholder): The data vector. We can use feed_dict to set this to the music primer. size_bt (tf.float32): The batch size u0 (tf.Variable): The initial state of the RNN n_visible (int): The size of the data vectors prime_length (int): The number of times teps into the primer song that we use befoe beginning to generate music Returns: The generated music, as a tf.Tensor """ Uarr = tf.scan(rnn_recurrence, x, initializer=u0) # U = Uarr[np.floor(prime_length/midi_manipulation.num_timesteps), :, :] U = Uarr[int(np.floor(prime_length / midi_manipulation.num_timesteps)), :, :] # [_, _, _, _, _, music] = control_flow_ops.While(lambda count, num_iter, *args: count < num_iter, # generate_recurrence, [tf.constant(1, tf.int32), tf.constant(num), U, # tf.zeros([1, n_visible], tf.float32), x, # tf.zeros([1, n_visible], tf.float32)]) time_steps = tf.constant(1, tf.int32) iterations = tf.constant(num) u_t = tf.zeros([1, n_visible], tf.float32) music = tf.zeros([1, n_visible], tf.float32) loop_vars = [time_steps, iterations, U, u_t, x, music] [_, _, _, _, _, music ] = tf.while_loop(lambda count, num_iter, *args: count < num_iter, generate_recurrence, loop_vars, shape_invariants=[ time_steps.get_shape(), iterations.get_shape(), U.get_shape(), u_t.get_shape(), x.get_shape(), tf.TensorShape([None, 780]) ]) return music # Reshape our bias matrices to be the same size as the batch. tf.assign(BH_t, tf.tile(BH_t, [size_bt, 1])) tf.assign(BV_t, tf.tile(BV_t, [size_bt, 1])) # Scan through the rnn and generate the value for each hidden node in the batch u_t = tf.scan(rnn_recurrence, x, initializer=u0) # Scan through the rnn and generate the visible and hidden biases for each RBM in the batch BV_t = tf.reshape( tf.scan(visible_bias_recurrence, u_t, tf.zeros([1, n_visible], tf.float32)), [size_bt, n_visible]) BH_t = tf.reshape( tf.scan(hidden_bias_recurrence, u_t, tf.zeros([1, n_hidden], tf.float32)), [size_bt, n_hidden]) # Get the free energy cost from each of the RBMs in the batch cost = RBM.get_free_energy_cost(x, W, BV_t, BH_t, k=15) return x, cost, generate, W, bh, bv, x, lr, Wuh, Wuv, Wvu, Wuu, bu, u0
def define_ppo_epoch(memory, policy_factory, config): observation, reward, done, action, old_pdf, value = memory # This is to avoid propagating gradients through simulated environment. observation = tf.stop_gradient(observation) action = tf.stop_gradient(action) reward = tf.stop_gradient(reward) if hasattr(config, "rewards_preprocessing_fun"): reward = config.rewards_preprocessing_fun(reward) done = tf.stop_gradient(done) value = tf.stop_gradient(value) old_pdf = tf.stop_gradient(old_pdf) advantage = calculate_generalized_advantage_estimator( reward, value, done, config.gae_gamma, config.gae_lambda) discounted_reward = tf.stop_gradient(advantage + value) advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1], keep_dims=True) advantage_normalized = tf.stop_gradient( (advantage - advantage_mean) / (tf.sqrt(advantage_variance) + 1e-8)) add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)] number_of_batches = (config.epoch_length * config.optimization_epochs / config.optimization_batch_size) dataset = tf.data.Dataset.from_tensor_slices( (observation, action, discounted_reward, advantage_normalized, old_pdf)) dataset = dataset.shuffle(buffer_size=config.epoch_length, reshuffle_each_iteration=True) dataset = dataset.repeat(config.optimization_epochs) dataset = dataset.batch(config.optimization_batch_size) iterator = dataset.make_initializable_iterator() optimizer = get_optimiser(config) with tf.control_dependencies([iterator.initializer]): ppo_step_rets = tf.scan( lambda a, i: add_lists_elementwise( # pylint: disable=g-long-lambda a, define_ppo_step(iterator.get_next(), policy_factory, optimizer, config)), tf.range(number_of_batches), [0., 0., 0., 0., 0., 0.], parallel_iterations=1) ppo_summaries = [ tf.reduce_mean(ret) / number_of_batches for ret in ppo_step_rets ] summaries_names = [ "policy_loss", "value_loss", "entropy_loss", "policy_gradient", "value_gradient", "entropy_gradient" ] summaries = [ tf.summary.scalar(summary_name, summary) for summary_name, summary in zip(summaries_names, ppo_summaries) ] losses_summary = tf.summary.merge(summaries) for summary_name, summary in zip(summaries_names, ppo_summaries): losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ") return losses_summary
def _build_precision_matrix(self): # get inverse of data-dependent covariances self.c_psi_inv = tf.matmul(self.r_psi_sqrt, tf.transpose(self.r_psi_sqrt, perm=[0, 1, 3, 2]), name='precision_diag_data_dep') if self.dim_latent > 1: self.AQ0_invA_Q_inv = tf.matmul( tf.matmul(self.A, self.Q0_inv), self.A, transpose_b=True) \ + self.Q_inv self.AQ_invA_Q_inv = tf.matmul( tf.matmul(self.A, self.Q_inv), self.A, transpose_b=True) \ + self.Q_inv self.AQ0_inv = tf.matmul(-self.A, self.Q0_inv) self.AQ_inv = tf.matmul(-self.A, self.Q_inv) else: self.AQ0_invA_Q_inv = tf.multiply(tf.multiply(self.A, self.Q0_inv), self.A) + self.Q_inv self.AQ_invA_Q_inv = tf.multiply(tf.multiply(self.A, self.Q_inv), self.A) + self.Q_inv self.AQ0_inv = tf.multiply(-self.A, self.Q0_inv) self.AQ_inv = tf.multiply(-self.A, self.Q_inv) # put together components of precision matrix Sinv in tensor of # shape [batch_size, num_time_pts, dim_latent, dim_latent] Sinv_diag = tf.tile(tf.expand_dims(self.AQ_invA_Q_inv, 0), [self.num_time_pts - 2, 1, 1]) Sinv_diag = tf.concat([ tf.expand_dims(self.Q0_inv, 0), tf.expand_dims(self.AQ0_invA_Q_inv, 0), Sinv_diag ], axis=0, name='precision_diag_static') self.Sinv_diag = tf.add(Sinv_diag, self.c_psi_inv, name='precision_diag') Sinv_ldiag = tf.tile(tf.expand_dims(self.AQ_inv, 0), [self.num_time_pts - 2, 1, 1], name='precision_lower_diag') Sinv_ldiag0 = tf.concat([tf.expand_dims(self.AQ0_inv, 0), Sinv_ldiag], axis=0) # we now have Sinv (represented as diagonal and off-diagonal # blocks); to sample from the posterior we need the square root # of the inverse of Sinv; fortunately this is fast given the # tridiagonal block structure of Sinv. First we'll compute the # Cholesky decomposition of Sinv, then calculate the inverse using # that decomposition # get cholesky decomposition for each element in batch def scan_chol(_, inputs): """inputs refer to diagonal blocks, outputs the L/U matrices""" chol_decomp_Sinv = blk_tridiag_chol(inputs, Sinv_ldiag0) return chol_decomp_Sinv self.chol_decomp_Sinv = tf.scan( fn=scan_chol, elems=self.Sinv_diag, initializer=[Sinv_diag, Sinv_ldiag0], # throwaway to get scan name='precision_chol_decomp') # to behave
def build(self, We, Wx, Wh, bh, h0, Wo, bo): # make them tf Variables self.We = tf.Variable(We) self.Wx = tf.Variable(Wx) self.Wh = tf.Variable(Wh) self.bh = tf.Variable(bh) self.h0 = tf.Variable(h0) self.Wo = tf.Variable(Wo) self.bo = tf.Variable(bo) self.params = [ self.We, self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo ] # for easy access V = self.V D = self.D M = self.M # placeholders self.tfX = tf.placeholder(tf.int32, shape=(None, ), name='X') self.tfY = tf.placeholder(tf.int32, shape=(None, ), name='Y') # convert word indexes to word vectors # this would be equivalent to doing # We[tfX] in Numpy / Theano # or: # X_one_hot = one_hot_encode(X) # X_one_hot.dot(We) XW = tf.nn.embedding_lookup(We, self.tfX) def recurrence(h_t1, xWe_t): # returns h(t), y(t) h_t1 = tf.reshape(h_t1, (1, M)) h_t = self.f(xWe_t + tf.matmul(h_t1, self.Wh) + self.bh) h_t = tf.reshape(h_t, (M, )) return h_t h = tf.scan( fn=recurrence, elems=XW, initializer=self.h0, ) # output logits = tf.matmul(h, self.Wo) + self.bo prediction = tf.argmax(logits, 1) self.output_probs = tf.nn.softmax(logits) nce_weights = tf.transpose(self.Wo, [1, 0]) # needs to be VxD, not DxV nce_biases = self.bo h = tf.reshape(h, (-1, M)) labels = tf.reshape(self.tfY, (-1, 1)) self.cost = tf.reduce_mean( tf.nn.sampled_softmax_loss( weights=nce_weights, biases=nce_biases, labels=labels, inputs=h, num_sampled=50, # number of negative samples num_classes=V)) self.predict_op = prediction self.train_op = tf.train.AdamOptimizer(1e-2).minimize(self.cost) # self.train_op = tf.train.MomentumOptimizer(1e-3, 0.9).minimize(self.cost) # init all variables init = tf.global_variables_initializer() self.session.run(init)
def forward(observations, transitions, viterbi=False, return_alpha=False, return_best_sequence=False): """ Takes as input: - observations, sequence of shape (n_steps, n_classes) - transitions, sequence of shape (n_classes, n_classes) Probabilities must be given in the log space. Compute alpha, matrix of size (n_steps, n_classes), such that alpha[i, j] represents one of these 2 values: - the probability that the real path at node i ends in j - the maximum probability of a path finishing in j at node i (Viterbi) Returns one of these 2 values: - alpha - the final probability, which can be: - the sum of the probabilities of all paths - the probability of the best path (Viterbi) """ assert not return_best_sequence or (viterbi and not return_alpha) def recurrence(prev, obs): previous = prev if return_best_sequence: previous = prev[0] previous = tf.expand_dims(previous, 1) obs = tf.expand_dims(obs, 0) if viterbi: scores = previous + obs + transitions out = tf.reduce_max(scores, axis=0) if return_best_sequence: out2 = tf.argmax(scores, axis=0) return [out, out2] else: return out else: return log_sum_exp(previous + obs + transitions, axis=0) initial = observations[0] ones = tf.ones(tf.shape(initial), dtype=tf.int64) if return_best_sequence: initial = [initial, ones] alpha = tf.scan( fn=recurrence, elems=observations[1:], initializer=initial ) if return_alpha: return alpha elif return_best_sequence: output_info = tf.cast(tf.argmax(alpha[0][-1], axis=0), tf.int32) sequence = tf.scan( fn=lambda previous, beta_i: beta_i[previous], elems=tf.cast(alpha[1][::-1], tf.int32), initializer=output_info ) sequence = tf.concat([sequence[::-1], [tf.cast(tf.argmax(alpha[0][-1], axis=0), tf.int32)]], axis=0) return sequence else: if viterbi: return tf.reduce_max(alpha[-1], axis=0) else: return log_sum_exp(alpha[-1], axis=0)
def integrate_steps( model: models.TimeStepModel, state: KeyedTensors, steps: ArrayLike, initial_time: float = 0.0, axis: int = 0, xla_compile: bool = False, ) -> KeyedTensors: """Integrate some fixed number of time steps. Args: model: model to integrate. state: starting value of the state. steps: number of time steps at which the solution is saved. initial_time: initial time for time integration. axis: axis in result tensors along which the integrated solution is stacked. xla_compile: whether to compile with XLA or not. Returns: Time evolved states at the times specified in `times`. Each tensor has the same shape as the inputs, with an additional dimension inserted to store values at each requested time. """ # TODO(shoyer): explicitly include time? del initial_time # unused state = nest.map_structure(tf.convert_to_tensor, state) steps = tf.convert_to_tensor(steps, dtype=tf.int32) constant_state = { k: v for k, v in state.items() if k in model.equation.constant_keys } evolving_state = { k: v for k, v in state.items() if k in model.equation.evolving_keys } def advance_one_step(state): return model.take_time_step({**state, **constant_state}) def advance_until_saved_step(evolving_state, start_stop): """Integrate until the next step at which to save results.""" start, stop = start_stop result, _ = tf.while_loop( lambda _, i: i < stop, lambda state, i: (advance_one_step(state), i + 1), loop_vars=(evolving_state, start), ) return result if xla_compile: advance_until_saved_step = _xla_decorator(advance_until_saved_step) starts = tf.concat([[0], steps[:-1]], axis=0) integrated = tf.scan(advance_until_saved_step, [starts, steps], initializer=evolving_state) integrated_constants = nest.map_structure( lambda x: tf.broadcast_to(x, steps.shape.as_list() + x.shape.as_list()), constant_state) integrated.update(integrated_constants) return tensor_ops.moveaxis(integrated, 0, axis)
def prediction_pmstrnn(self): print('=' * 100) print('@ prediction _pmstrnn') if self._isThisTrain: # transpose inputs for scan and make tuple v_t = tf.transpose(self._v_in, perm=[1, 0, 2, 3]) m_t = tf.transpose(self._prop_in, perm=[1, 0, 2, 3]) input_t = (m_t, v_t) else: # transpose inputs for scan and make tuple v_t = tf.transpose(self._windInput_vision, perm=[1, 0, 2, 3]) m_t = tf.transpose(self._windInput_prop, perm=[1, 0, 2, 3]) input_t = (m_t, v_t) #dyn_input_shape = tf.shape(self._windInput_vision) #batch_size = dyn_input_shape[0] # make the initializer for the scan function # Using the embedding_lookup, it reads the corresponding initial states from the variables # h: internal states, y: activation value # Prop. Fast myInit_p1_h = tf.nn.embedding_lookup(self._myInit_p1_h, self._idxd) myInit_p1_h = tf.reshape(myInit_p1_h, [-1, self._p1_unit]) myInit_p1_y = tf.nn.embedding_lookup(self._myInit_p1_y, self._idxd) myInit_p1_y = tf.reshape(myInit_p1_y, [-1, self._p1_unit]) new_c_p1 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p1_y, myInit_p1_h) # Prop. Mid myInit_p2_h = tf.nn.embedding_lookup(self._myInit_p2_h, self._idxd) myInit_p2_h = tf.reshape(myInit_p2_h, [-1, self._p2_unit]) myInit_p2_y = tf.nn.embedding_lookup(self._myInit_p2_y, self._idxd) myInit_p2_y = tf.reshape(myInit_p2_y, [-1, self._p2_unit]) new_c_p2 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p2_y, myInit_p2_h) # Prop. Slow myInit_p3_h = tf.nn.embedding_lookup(self._myInit_p3_h, self._idxd) myInit_p3_y = tf.nn.embedding_lookup(self._myInit_p3_y, self._idxd) new_c_p3 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p3_y, myInit_p3_h) # Vision Fast myInit_v1_h = tf.nn.embedding_lookup(self._myInit_v1_h, self._idxd) myInit_v1_y = tf.nn.embedding_lookup(self._myInit_v1_y, self._idxd) new_c_v1 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v1_y, myInit_v1_h) # Vision Mid myInit_v2_h = tf.nn.embedding_lookup(self._myInit_v2_h, self._idxd) myInit_v2_y = tf.nn.embedding_lookup(self._myInit_v2_y, self._idxd) new_c_v2 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v2_y, myInit_v2_h) # Vision Slow myInit_v3_h = tf.nn.embedding_lookup(self._myInit_v3_h, self._idxd) myInit_v3_y = tf.nn.embedding_lookup(self._myInit_v3_y, self._idxd) new_c_v3 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v3_y, myInit_v3_h) if self._isThisTrain: init_state = (self._prop_init, new_c_p1, new_c_p2, new_c_p3, self._v_init, new_c_v1, new_c_v2, new_c_v3) else: init_state = (self._windInit_prop, new_c_p1, new_c_p2, new_c_p3, self._windInit_vision, new_c_v1, new_c_v2, new_c_v3) scan_outputs = tf.scan(lambda a, x: self.model_step_pmstrnn(x, a), input_t, initializer=init_state) pred_prop_t, c_p1_t, c_p2_t, c_p3_t, \ pred_vision_t, c_v1_t, c_v2_t, c_v3_t = scan_outputs pred_prop = tf.transpose(pred_prop_t, perm=[1, 0, 2, 3], name='pred_prop') c_p1 = tf.transpose(c_p1_t, perm=[2, 1, 0, 3], name='states_propFast') c_p2 = tf.transpose(c_p2_t, perm=[2, 1, 0, 3], name='states_propMid') c_p3 = tf.transpose(c_p3_t, perm=[2, 1, 0, 3, 4, 5], name='states_propSlow') pred_vision = tf.transpose(pred_vision_t, perm=[1, 0, 2, 3], name='pred_vision') c_v1 = tf.transpose(c_v1_t, perm=[2, 1, 0, 3, 4, 5], name='states_visionFast') c_v2 = tf.transpose(c_v2_t, perm=[2, 1, 0, 3, 4, 5], name='states_visionMid') c_v3 = tf.transpose(c_v3_t, perm=[2, 1, 0, 3, 4, 5], name='states_visionSlow') return pred_prop, c_p1, c_p2, c_p3, pred_vision, c_v1, c_v2, c_v3, input_t, init_state, self._idxd
name='inputs') y = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES], name='labels') # Training data set in required batch size batch_x, batch_y = mnist.train.next_batch(BATCH_SIZE) # Reshape data to get 28 sequences of 28 pixels batch_x = batch_x.reshape((BATCH_SIZE, TIME_STEPS, INPUT_SIZE)) # Transpose the input data, tensorflow scan interates on the first dimension of the input data. # Initialize hidden states of RNN processed_input = tf.transpose(_inputs, perm=[1, 0, 2]) initial_hidden = tf.zeros([BATCH_SIZE, HIDDEN_LAYER_SIZE]) # Compute states for all rnn steps all_hidden_states = tf.scan(rnn.rnn, processed_input, initializer=initial_hidden, name='states') # Compute linear layer all_outputs = tf.map_fn(rnn.linear_layer, all_hidden_states) # We need only the final layer output output = all_outputs[-1] # Compute loss (Here we are doing cross_entropy) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y)) # Training using Adam optimizer train_step = tf.train.AdamOptimizer().minimize(cross_entropy) # Compute prediction correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1)) # Compute accuracy
def scan(cls, node, input_dict, strict): current_opset = [make_opsetid(cls.DOMAIN, cls.VERSION)] body = node.attrs["body"] # in version 8, node.inputs[0] is the sequence_lens node_inputs = node.inputs if cls.SINCE_VERSION != 8 else \ node.inputs[1:] # M num_scan_inputs = int(node.attrs["num_scan_inputs"]) # N = num_inputs - M num_state_vars = len(node_inputs) - num_scan_inputs # K = num_outputs - N num_scan_outputs = len(node.outputs) - num_state_vars """ Function to run subgraph used with tf.scan """ def run_subgraph(a, b): input_values = {} # set the input values for the subgraph # set the values for the state variables for i in range(num_state_vars): input_values[body.input[i].name] = a[i] # set the values for the scan inputs for i in range(num_scan_inputs): input_values[body.input[i + num_state_vars].name] = b[i] # get the tensor operations for the onnx graph tensor_dict = \ onnx_tf.backend.onnx_graph_to_tensorflow_ops( graph_def=body, input_values=input_values, opset=current_opset, strict=strict) # return sequence of tensors for every subgraph output outputs = [tensor_dict[output.name] for output in body.output] return outputs scan_input_axes = node.attrs.get("scan_input_axes", [0] * num_scan_inputs) scan_input_directions = node.attrs.get( "directions" if cls.SINCE_VERSION == 8 else "scan_input_directions", [0] * num_scan_inputs) scan_output_axes = node.attrs.get("scan_output_axes", [0] * num_scan_outputs) scan_output_directions = node.attrs.get("scan_output_directions", [0] * num_scan_outputs) # if version 8 read the sequnce_lens from the first input if cls.SINCE_VERSION == 8: sequence_lens = input_dict[node.inputs[0]] \ if node.inputs[0] != '' else None inputs = [input_dict[node_input] for node_input in node_inputs] scan_inputs = inputs[num_state_vars:] # loop over all the scan inputs and apply transpose depending # on input axes provided and also reverse the scan inputs if # reverse direction for scan is provided for i in range(num_scan_inputs): # if input axes are different than 0, use transpose to scan over # the provided axes if scan_input_axes[i] != 0: transpose_perm = cls._calc_transpose_perm_input( tf.rank(scan_inputs[i]), scan_input_axes[i]) scan_inputs[i] = tf.transpose(scan_inputs[i], transpose_perm) # check for reverse direction scans if scan_input_directions[i] == 1: # version 8 has a batch dimension axis = 0 if cls.SINCE_VERSION != 8 else 1 scan_inputs[i] = tf.reverse(scan_inputs[i], [axis]) state_vars_init = inputs[:num_state_vars] scan_outputs_init = [] # generate sequence of zero tensors for all scan outputs # with the correct shape and dtype for scan_output in body.output[num_state_vars:]: tensor_type = scan_output.type.tensor_type shape = [ d.dim_value if (d.dim_value > 0 and d.dim_param == "") else None for d in tensor_type.shape.dim ] dtype = data_type.onnx2tf(tensor_type.elem_type) scan_outputs_init.append(tf.zeros(shape, dtype=dtype)) # tf.scan initilizer is state_variables_init + scan_outputs_init initializer = state_vars_init + scan_outputs_init if cls.SINCE_VERSION == 8: # version == 8 # function to process the batches. it is used with tf.map_fn def run_batches(x): # state vars initial values per batch initial = x[0] # scan inputs per batch scan_inputs = x[1] # sequence length for the batch seq_len = x[2] # slice the input to the current sequence len scan_inputs = [ scan_input[:seq_len, ...] for scan_input in scan_inputs ] # run scan on the current batch out = tf.scan(run_subgraph, scan_inputs, initializer=initial + scan_outputs_init) # pad to the original shape with zeros paddings = [[ 0, tf.shape(x[1][0], out_type=seq_len.dtype)[0] - seq_len ]] for i in range(len(out)): pads = tf.concat([ paddings, tf.zeros([(tf.rank(out[i]) - 1), 2], dtype=tf.int32) ], axis=0) out[i] = tf.pad(out[i], pads) return out if sequence_lens is None: # if sequence_lens is None, fill it with the shape of # the input axis 1 sequence_lens = tf.fill([tf.shape(scan_inputs[0])[0]], tf.shape(scan_inputs[0], out_type=tf.int32)[1]) output_types = [ data_type.onnx2tf(output.type.tensor_type.elem_type) for output in body.output ] # run scan for every batch out = tf.map_fn(run_batches, (state_vars_init, scan_inputs, sequence_lens), dtype=output_types) state_vars_outputs = [] # extract the final values of the state variables for state_var in out[:num_state_vars]: state_vars_outputs.append( tf.map_fn(lambda x: x[0][x[1] - 1], (state_var, sequence_lens), state_var.dtype)) else: # version > 8 # run the scan out = tf.scan(run_subgraph, scan_inputs, initializer=initializer) # extract the final values of the state variables state_vars_outputs = [ state_var[tf.shape(state_var)[0] - 1] for state_var in out[:num_state_vars] ] scan_outputs = out[num_state_vars:] # post process the scan outputs depending on the directions and # axes provided. for i in range(num_scan_outputs): # check for reverse direction scan outputs if scan_output_directions[i] == 1: scan_outputs[i] = tf.reverse(scan_outputs[i], [0]) if scan_output_axes[i] != 0: transpose_perm = cls._calc_transpose_perm_output( tf.rank(scan_outputs[i]), scan_output_axes[i]) scan_outputs[i] = tf.transpose(scan_outputs[i], transpose_perm) return state_vars_outputs + scan_outputs
def tf_discount_rewards(tf_r): # tf_r ~ [game_steps,1] discount_f = lambda a, v: a * gamma + v tf_r_reverse = tf.scan(discount_f, tf.reverse(tf_r, [True, False])) tf_discounted_r = tf.reverse(tf_r_reverse, [True, False]) return tf_discounted_r
def noniso_KLD(self, mu, log_sigma_sq): return 0.5 * ( tf.trace( tf.scan(lambda a, x: tf.matmul(tf.matrix_inverse(self.tf_cov_prior), x), tf.matrix_diag(tf.exp(log_sigma_sq)) ) ) + tf.reduce_sum( tf.multiply( tf.matmul( tf.subtract(self.tf_mu_prior, mu), tf.matrix_inverse(self.tf_cov_prior) ), tf.subtract(self.tf_mu_prior, mu) ), 1) - float(self.cov_prior.shape[0]) + np.log(np.linalg.det(self.cov_prior)) - tf.reduce_sum(log_sigma_sq, 1) )
def overshooting(cell, target, embedded, prev_action, length, amount, ignore_input=False): """Perform open loop rollouts from the posteriors at every step. First, we apply the encoder to embed raw inputs and apply the model to obtain posterior states for every time step. Then, we perform `amount` long open loop rollouts from these posteriors. Note that the actions should be those leading to the current time step. So under common convention, it contains the last actions while observations are the current ones. Input: target, embedded: [A B C D E F] [A B C D E ] prev_action: [0 A B C D E] [0 A B C D ] length: [6 5] amount: 3 Output: prior, posterior, target: [A B C D E F] [A B C D E ] [B C D E F ] [B C D E ] [C D E F ] [C D E ] [D E F ] [D E ] mask: [1 1 1 1 1 1] [1 1 1 1 1 0] [1 1 1 1 1 0] [1 1 1 1 0 0] [1 1 1 1 0 0] [1 1 1 0 0 0] [1 1 1 0 0 0] [1 1 0 0 0 0] """ # Closed loop unroll to get posterior states, which are the starting points # for open loop unrolls. We don't need the last time step, since we have no # targets for unrolls from it. use_obs = tf.ones( tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3], tf.bool) use_obs = tf.cond(tf.convert_to_tensor(ignore_input), lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs) print(cell._layers) (prior, posterior), _ = tf.nn.dynamic_rnn(cell, (embedded, prev_action, use_obs), length, dtype=tf.float32, swap_memory=True) #inputs = tf.concat([embedded, prev_action, tf.cast(use_obs, tf.float32)], axis=-1) #(prior, posterior), _ = keras.layers.RNN(cell)(inputs, ) #(prior, posterior), = cell((embedded, prev_action, use_obs), # Arrange inputs for every iteration in the open loop unroll. Every loop # iteration below corresponds to one row in the docstring illustration. max_length = shape.shape(nested.flatten(embedded)[0])[1] first_output = { 'observ': embedded, 'prev_action': prev_action, 'posterior': posterior, 'target': target, 'mask': tf.sequence_mask(length, max_length, tf.int32), } progress_fn = lambda tensor: tf.concat([tensor[:, 1:], 0 * tensor[:, :1]], 1) other_outputs = tf.scan( lambda past_output, _: nested.map(progress_fn, past_output), tf.range(amount), first_output) sequences = nested.map(lambda lhs, rhs: tf.concat([lhs[None], rhs], 0), first_output, other_outputs) # Merge batch and time dimensions of steps to compute unrolls from every # time step as one batch. The time dimension becomes the number of # overshooting distances. sequences = nested.map(lambda tensor: _merge_dims(tensor, [1, 2]), sequences) sequences = nested.map( lambda tensor: tf.transpose(tensor, [1, 0] + list( range(2, tensor.shape.ndims))), sequences) merged_length = tf.reduce_sum(sequences['mask'], 1) # Mask out padding frames; unnecessary if the input is already masked. sequences = nested.map( lambda tensor: tensor * tf.cast( _pad_dims(sequences['mask'], tensor.shape.ndims), tensor.dtype), sequences) # Compute open loop rollouts. use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None] prev_state = nested.map( lambda tensor: tf.concat([0 * tensor[:, :1], tensor[:, :-1]], 1), posterior) prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]), prev_state) (priors, _), _ = tf.nn.dynamic_rnn( cell, (sequences['observ'], sequences['prev_action'], use_obs), merged_length, prev_state) # Restore batch dimension. target, prior, posterior, mask = nested.map( functools.partial(_restore_batch_dim, batch_size=shape.shape(length)[0]), (sequences['target'], priors, sequences['posterior'], sequences['mask'])) mask = tf.cast(mask, tf.bool) return target, prior, posterior, mask