def testScan_MultiOutputMismatchedInitializer(self):
     with self.test_session():
         elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
         initializer = np.array(1.0)
         # Multiply a * 1 each time
         with self.assertRaisesRegexp(ValueError, "two structures don't have the same number of elements"):
             tf.scan(lambda a, x: (a, -a), elems, initializer)
Example #2
0
 def train(x=x, size_bt=size_bt, BV_t=BV_t, BH_t=BH_t):
     bv_init = tf.zeros([1, n_visible], tf.float32)
     bh_init = tf.zeros([1, n_hidden], tf.float32)
     u_t  = tf.scan(rnn_recurrence, x, initializer=u0)
     BV_t = tf.reshape(tf.scan(visible_bias_recurrence, u_t, bv_init), [size_bt, n_visible])
     BH_t = tf.reshape(tf.scan(hidden_bias_recurrence, u_t, bh_init), [size_bt, n_hidden])
     sample, cost = RBM.build_rbm(x, W, BV_t, BH_t, k=15)
     return x, sample, cost, params, size_bt            
    def testScan_Simple(self):
        with self.test_session():
            elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
            v = tf.constant(2.0, name="v")

            r = tf.scan(lambda a, x: tf.mul(a, x), elems)
            self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r.eval())

            r = tf.scan(lambda a, x: tf.mul(a, x), elems, initializer=v)
            self.assertAllEqual([2.0, 4.0, 12.0, 48.0, 240.0, 1440.0], r.eval())
    def testScanVaryingShape(self):
        with self.test_session() as sess:
            x = tf.placeholder(dtype=tf.float32, shape=[None, 2])
            x_t = tf.transpose(x)
            # scan over dimension 0 (with shape None)
            result = tf.scan(lambda a, x: a + x, x)
            # scanned over transposed dimension 0 (with shape 2)
            result_t = tf.scan(lambda a, x: a + x, x_t, infer_shape=False)
            # ensure gradients can be calculated
            result_grad = tf.gradients(result, [x])[0]
            result_t_grad = tf.gradients(result_t, [x_t])[0]

            # smoke test to ensure they all evaluate
            sess.run([result, result_t, result_grad, result_t_grad], feed_dict={x: [[1.0, 2.0]]})
def tensorflow_test():
    import tensorflow as tf
    nested_input = tf.placeholder(tf.float32, shape=[outer_len, inner_len, input_dim])

    variable = tf.Variable(np.float32(1.0))

    def inner_func(curr, prev):
        return curr + prev# + variable

    def outer_func(curr, prev):
        inner_res = tf.scan(
                fn=inner_func,
                elems=curr,
                initializer=tf.zeros([input_dim])
            )
        return prev + inner_res

    # nested_input.set_shape
    outputs = tf.scan(
            fn=outer_func,
            elems=nested_input,
            initializer=tf.zeros([inner_len, input_dim])
        )

    loss = tf.reduce_sum(outputs)
    # optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
    # train_op = optimizer.minimize(loss)
    grad = tf.gradients(loss, [variable])

    init_op = tf.initialize_all_variables()

    with tf.Session() as sess:
        sess.run(init_op)
 def outer_func(curr, prev):
     inner_res = tf.scan(
             fn=inner_func,
             elems=curr,
             initializer=tf.zeros([input_dim])
         )
     return prev + inner_res
Example #7
0
def omniglot():

    sess = tf.InteractiveSession()

    """    def wrapper(v):
        return tf.Print(v, [v], message="Printing v")

    v = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='Matrix')

    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    temp = tf.Variable(initial_value=np.arange(0, 36).reshape((6, 6)), dtype=tf.float32, name='temp')
    temp = wrapper(v)
    #with tf.control_dependencies([temp]):
    temp.eval()
    print 'Hello'"""

    def update_tensor(V, dim2, val):  # Update tensor V, with index(:,dim2[:]) by val[:]
        val = tf.cast(val, V.dtype)
        def body(_, (v, d2, chg)):
            d2_int = tf.cast(d2, tf.int32)
            return tf.slice(tf.concat_v2([v[:d2_int],[chg] ,v[d2_int+1:]], axis=0), [0], [v.get_shape().as_list()[0]])
        Z = tf.scan(body, elems=(V, dim2, val), initializer=tf.constant(1, shape=V.get_shape().as_list()[1:], dtype=tf.float32), name="Scan_Update")
        return Z
    def build(self, preSoftmaxPi, preSoftmaxA, preSoftmaxB):
        M, V = preSoftmaxB.shape

        self.preSoftmaxPi = tf.Variable(preSoftmaxPi)
        self.preSoftmaxA = tf.Variable(preSoftmaxA)
        self.preSoftmaxB = tf.Variable(preSoftmaxB)

        pi = tf.nn.softmax(self.preSoftmaxPi)
        A = tf.nn.softmax(self.preSoftmaxA)
        B = tf.nn.softmax(self.preSoftmaxB)

        # define cost
        self.tfx = tf.placeholder(tf.int32, shape=(None,), name='x')
        def recurrence(old_a_old_s, x_t):
            old_a = tf.reshape(old_a_old_s[0], (1, M))
            a = tf.matmul(old_a, A) * B[:, x_t]
            a = tf.reshape(a, (M,))
            s = tf.reduce_sum(a)
            return (a / s), s

        # remember, tensorflow scan is going to loop through
        # all the values!
        # we treat the first value differently than the rest
        # so we only want to loop through tfx[1:]
        # the first scale being 1 doesn't affect the log-likelihood
        # because log(1) = 0
        alpha, scale = tf.scan(
            fn=recurrence,
            elems=self.tfx[1:],
            initializer=(pi*B[:,self.tfx[0]], np.float32(1.0)),
        )

        self.cost = -tf.reduce_sum(tf.log(scale))
        self.train_op = tf.train.AdamOptimizer(1e-2).minimize(self.cost)
 def testScan_MultiInputSameTypeOutput(self):
     with self.test_session() as sess:
         elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
         r = tf.scan(lambda a, x: (a[0] + x[0], a[1] + x[1]), (elems, -elems))
         r_value = sess.run(r)
         self.assertAllEqual(np.cumsum(elems), r_value[0])
         self.assertAllEqual(np.cumsum(-elems), r_value[1])
  def _marginal_hidden_probs(self):
    """Compute marginal pdf for each individual observable."""

    initial_log_probs = tf.broadcast_to(self._log_init,
                                        tf.concat([self.batch_shape_tensor(),
                                                   [self._num_states]],
                                                  axis=0))
    # initial_log_probs :: batch_shape num_states

    if self._num_steps > 1:
      transition_log_probs = self._log_trans

      def forward_step(log_probs, _):
        return _log_vector_matrix(log_probs, transition_log_probs)

      dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32)

      forward_log_probs = tf.scan(forward_step, dummy_index,
                                  initializer=initial_log_probs,
                                  name="forward_log_probs")

      forward_log_probs = tf.concat([[initial_log_probs], forward_log_probs],
                                    axis=0)
    else:
      forward_log_probs = initial_log_probs[tf.newaxis, ...]

    # returns :: num_steps batch_shape num_states

    return tf.exp(forward_log_probs)
Example #11
0
def define_ppo_epoch(memory, policy_factory, config):
  """PPO epoch."""
  observation, reward, done, action, old_pdf, value = memory

  # This is to avoid propagating gradients though simulation of simulation
  observation = tf.stop_gradient(observation)
  action = tf.stop_gradient(action)
  reward = tf.stop_gradient(reward)
  done = tf.stop_gradient(done)
  value = tf.stop_gradient(value)
  old_pdf = tf.stop_gradient(old_pdf)

  ppo_step_rets = tf.scan(
      lambda _1, _2: define_ppo_step(  # pylint: disable=g-long-lambda
          observation, action, reward, done, value,
          old_pdf, policy_factory, config),
      tf.range(config.optimization_epochs),
      [0., 0., 0., 0., 0., 0.],
      parallel_iterations=1)

  ppo_summaries = [tf.reduce_mean(ret) for ret in ppo_step_rets]
  summaries_names = ["policy_loss", "value_loss", "entropy_loss",
                     "policy_gradient", "value_gradient", "entropy_gradient"]

  summaries = [tf.summary.scalar(summary_name, summary)
               for summary_name, summary in zip(summaries_names, ppo_summaries)]
  losses_summary = tf.summary.merge(summaries)

  for summary_name, summary in zip(summaries_names, ppo_summaries):
    losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ")

  return losses_summary
Example #12
0
def cummax(x, reverse=False, name=None):
    """Compute the cumulative maximum of the tensor `x` along `axis`. This
    operation is similar to the more classic `cumsum`. Only support 1D Tensor
    for now.

    Args:
    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
       `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
       `complex128`, `qint8`, `quint8`, `qint32`, `half`.
       axis: A `Tensor` of type `int32` (default: 0).
       reverse: A `bool` (default: False).
       name: A name for the operation (optional).
    Returns:
    A `Tensor`. Has the same type as `x`.
    """
    with ops.name_scope(name, "Cummax", [x]) as name:
        x = ops.convert_to_tensor(x, name="x")
        # Not very optimal: should directly integrate reverse into tf.scan.
        if reverse:
            x = tf.reverse(x, axis=[0])
        # 'Accumlating' maximum: ensure it is always increasing.
        cmax = tf.scan(lambda a, y: tf.maximum(a, y), x,
                       initializer=None, parallel_iterations=1,
                       back_prop=False, swap_memory=False)
        if reverse:
            cmax = tf.reverse(cmax, axis=[0])
        return cmax
    def get_states_b(self):
        """
        Iterates through time/ sequence to get all hidden state
        """

        all_hidden_states, all_memory_states = self.get_states_f()

        # Reversing the hidden and memory state to get the final hidden and
        # memory state
        last_hidden_states = tf.reverse(
            all_hidden_states, [True, False, False])[0, :, :]
        last_memory_states = tf.reverse(
            all_memory_states, [True, False, False])[0, :, :]

        # For backward pass using the last hidden and memory of the forward
        # pass
        initial_hidden = tf.pack([last_hidden_states, last_memory_states])

        # Getting all hidden state throuh time
        all_hidden_memory_states = tf.scan(self.Lstm_b,
                                           self.processed_input_rev,
                                           initializer=initial_hidden,
                                           name='states')

        # Now reversing the states to keep those in original order
        all_hidden_states = tf.reverse(all_hidden_memory_states[
                                       :, 0, :, :], [True, False, False])
        all_memory_states = tf.reverse(all_hidden_memory_states[
                                       :, 1, :, :], [True, False, False])

        return all_hidden_states, all_memory_states
Example #14
0
    def feature(self, input_x, name = ''):
        if len(input_x.get_shape()) == 2:
            # incase input_x : batch_size x seq_length [tokens]
            input_x = tf.nn.embedding_lookup(self.embbeding_mat, input_x)
        # input_x:  batch_size x seq_length x g_emb_dim
        pooled_outputs = []
        index = -1
        embedded_chars = tf.scan(lambda a, x: tf.matmul(x, self.W), input_x)
        embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
        for filter_size, num_filter in zip(self.filter_sizes, self.num_filters):
            index += 1
            with tf.name_scope("conv-maxpool-%s-midterm" % filter_size):
                # Convolution Layer
                conv = tf.nn.conv2d(
                    embedded_chars_expanded,
                    self.W_conv[index],
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, self.b_conv[index]), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = sum(self.num_filters)
        h_pool = tf.concat(pooled_outputs, 3)
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])
        return h_pool_flat
Example #15
0
def MiniminibatchLayer(name, n_in, dim_b, dim_c, group_size, inputs):
    inputs = tf.random_shuffle(inputs)
    inputs = tf.reshape(inputs, [-1, group_size, n_in])
    def f(a,x):
        return MinibatchLayer(name, n_in, dim_b, dim_c, x)
    outputs = tf.scan(f, inputs)
    return tf.reshape(outputs, [-1, n_in+dim_b])
 def testScanUnknownShape(self):
   x = tf.placeholder(tf.float32)
   initializer = tf.placeholder(tf.float32)
   def fn(_, current_input):
     return current_input
   y = tf.scan(fn, x, initializer=initializer)
   self.assertIs(None, y.get_shape().dims)
Example #17
0
def diagonal_neural_gpu(inputs, hparams, name=None):
  """Improved Neural GPU as in https://arxiv.org/abs/1702.08727."""
  with tf.variable_scope(name, "diagonal_neural_gpu"):

    def step(state_tup, inp):
      """Single step of the improved Neural GPU."""
      state, _ = state_tup
      x = state
      for layer in xrange(hparams.num_hidden_layers):
        x, new_loss = common_layers.diagonal_conv_gru(
            x, (hparams.kernel_height, hparams.kernel_width),
            hparams.hidden_size,
            dropout=hparams.dropout,
            name="dcgru_%d" % layer)
      # Padding input is zeroed-out in the modality, we check this by summing.
      padding_inp = tf.less(tf.reduce_sum(tf.abs(inp), axis=[1, 2]), 0.00001)
      new_state = tf.where(padding_inp, state, x)  # No-op where inp is padding.
      return new_state, new_loss

    final_state, losses = tf.scan(
        step,
        tf.transpose(inputs, [1, 0, 2, 3]),
        initializer=(inputs, tf.constant(0.0)),
        parallel_iterations=1,
        swap_memory=True)
    return final_state[0, :, :, :, :], 2.0 * tf.reduce_mean(losses)
 def testScan_MultiInputSingleOutput(self):
     with self.test_session():
         elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
         initializer = np.array(1.0)
         # Multiply a * 1 each time
         r = tf.scan(lambda a, x: a * (x[0] + x[1]), (elems + 1, -elems), initializer)
         self.assertAllEqual([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], r.eval())
Example #19
0
    def fast_dlstm(s_t, state_in):
        def dilate_one_time_step(one_h, switcher, num_chunks):
            h_slices = []
            h_size = 256
            chunk_step_size = h_size // num_chunks
            for switch_step, h_step in zip(range(num_chunks), range(0, h_size, chunk_step_size)):
                one_switch = switcher[switch_step]
                h_s = conditional_backprop(one_switch, one_h[h_step: h_step + chunk_step_size])
                h_slices.append(h_s)
            dh = tf.stack(h_slices)
            dh = tf.reshape(dh, [-1, 256])
            return dh

        lstm = rnn.LSTMCell(256, state_is_tuple=True)
        chunks = 8

        def dlstm_scan_fn(previous_output, current_input):
            out, state_out = lstm(current_input, previous_output[1])
            i = previous_output[2]
            basis_i = tf.one_hot(i, depth=chunks)
            state_out_dilated = dilate_one_time_step(tf.squeeze(state_out[0]), basis_i, chunks)
            state_out = rnn.LSTMStateTuple(state_out_dilated, state_out[1])
            i += tf.constant(1)
            new_i = tf.mod(i, chunks)
            return out, state_out, new_i

        rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn,
                                                      tf.transpose(s_t, [1, 0, 2]),
                                                      initializer=(
                                                      state_in[1], rnn.LSTMStateTuple(*state_in), tf.constant(0)))

        state_out = [final_states[0][-1, 0, :], final_states[1][-1, 0, :]]
        cell_states = final_states[0][:, 0, :]
        out_states = final_states[1][:, 0, :]
        return out_states, cell_states, state_out
Example #20
0
def define_ppo_epoch(memory, hparams):
  """PPO epoch."""
  observation, reward, done, action, old_pdf, value = memory

  # This is to avoid propagating gradients through simulated environment.
  observation = tf.stop_gradient(observation)
  action = tf.stop_gradient(action)
  reward = tf.stop_gradient(reward)
  if hasattr(hparams, "rewards_preprocessing_fun"):
    reward = hparams.rewards_preprocessing_fun(reward)
  done = tf.stop_gradient(done)
  value = tf.stop_gradient(value)
  old_pdf = tf.stop_gradient(old_pdf)

  advantage = calculate_generalized_advantage_estimator(
      reward, value, done, hparams.gae_gamma, hparams.gae_lambda)

  discounted_reward = tf.stop_gradient(advantage + value)

  advantage_mean, advantage_variance = tf.nn.moments(advantage, axes=[0, 1],
                                                     keep_dims=True)
  advantage_normalized = tf.stop_gradient(
      (advantage - advantage_mean)/(tf.sqrt(advantage_variance) + 1e-8))

  add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)]

  number_of_batches = (hparams.epoch_length * hparams.optimization_epochs
                       / hparams.optimization_batch_size)

  dataset = tf.data.Dataset.from_tensor_slices(
      (observation, action, discounted_reward, advantage_normalized, old_pdf))
  dataset = dataset.shuffle(buffer_size=hparams.epoch_length,
                            reshuffle_each_iteration=True)
  dataset = dataset.repeat(hparams.optimization_epochs)
  dataset = dataset.batch(hparams.optimization_batch_size)
  iterator = dataset.make_initializable_iterator()
  optimizer = get_optimiser(hparams)

  with tf.control_dependencies([iterator.initializer]):
    ppo_step_rets = tf.scan(
        lambda a, i: add_lists_elementwise(  # pylint: disable=g-long-lambda
            a, define_ppo_step(iterator.get_next(), optimizer, hparams)),
        tf.range(number_of_batches),
        [0., 0., 0., 0., 0., 0.],
        parallel_iterations=1)

  ppo_summaries = [tf.reduce_mean(ret) / number_of_batches
                   for ret in ppo_step_rets]
  summaries_names = ["policy_loss", "value_loss", "entropy_loss",
                     "policy_gradient", "value_gradient", "entropy_gradient"]

  summaries = [tf.summary.scalar(summary_name, summary)
               for summary_name, summary in zip(summaries_names, ppo_summaries)]
  losses_summary = tf.summary.merge(summaries)

  for summary_name, summary in zip(summaries_names, ppo_summaries):
    losses_summary = tf.Print(losses_summary, [summary], summary_name + ": ")

  return losses_summary
  def testScan_Grad(self):
    with self.test_session():
      elems = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], name="data")
      v = tf.constant(2.0, name="v")

      r = tf.scan(lambda a, x: tf.mul(a, x), elems, initializer=v)
      r = tf.gradients(r, v)[0]
      self.assertAllEqual(873.0, r.eval())
 def testScanShape(self):
   with self.test_session():
     x = tf.constant([[1, 2, 3], [4, 5, 6]])
     def fn(_, current_input):
       return current_input
     initializer = tf.constant([0, 0, 0])
     y = tf.scan(fn, x, initializer=initializer)
     self.assertAllEqual(y.get_shape(), y.eval().shape)
    def testScan_Control(self):
        with self.test_session() as sess:
            s = tf.placeholder(tf.float32, shape=[None])
            b = tf.placeholder(tf.bool)

            with tf.control_dependencies([b]):
                c = tf.scan(lambda a, x: x * a, s)
            self.assertAllClose(np.array([1.0, 3.0, 9.0]), sess.run(c, {s: [1, 3, 3], b: True}))
Example #24
0
    def _compute_hidden(self):
        """ Compute vanilla-RNN states and predictions. """

        with tf.variable_scope('states'):
            states = tf.scan(self.rnn_step, self.inputs,
                             initializer=self.initial_state, name='states')

        return states
Example #25
0
    def fast_dlstm(self, s_t, state_in, lstm, chunks, h_size):

        def get_sub_state(state, state_step):
            c, h = state
            chunk_step_size = h_size // chunks
            h_step = state_step * chunk_step_size
            sub_state_h = h[:, h_step: h_step + chunk_step_size]
            sub_state_c = c[:, h_step: h_step + chunk_step_size]
            sub_state_h.set_shape([1, chunk_step_size])
            sub_state_c.set_shape([1, chunk_step_size])
            sub_state = tf.contrib.rnn.LSTMStateTuple(sub_state_c, sub_state_h)
            return sub_state

        def build_new_state(new_sub_state, previous_state, state_step):
            c_previous_state, h_previous_state = previous_state
            c_new_sub_state, h_new_sub_state = new_sub_state
            h_slices = []
            c_slices = []
            chunk_step_size = h_size // chunks
            one_hot_state_step = tf.one_hot(state_step, depth=chunks)

            for switch_step, h_step in zip(range(chunks), range(0, h_size, chunk_step_size)):
                is_this_current_step = one_hot_state_step[switch_step]
                h_s = self.conditional_sub_state(is_this_current_step, h_new_sub_state,
                                                 h_previous_state[:, h_step: h_step + chunk_step_size])
                h_s.set_shape([1, chunk_step_size])
                c_s = self.conditional_sub_state(is_this_current_step,
                                                 c_new_sub_state,
                                                 c_previous_state[:, h_step: h_step + chunk_step_size])
                c_s.set_shape([1, chunk_step_size])
                h_slices.append(h_s)
                c_slices.append(c_s)
            h_new_state = tf.concat(h_slices, axis=1)
            c_new_state = tf.concat(c_slices, axis=1)
            new_state = tf.contrib.rnn.LSTMStateTuple(c_new_state, h_new_state)
            return new_state

        def dlstm_scan_fn(previous_output, current_input):
            # out, state_out = lstm(current_input, previous_output[1])
            state_step = previous_output[2]

            sub_state = get_sub_state(previous_output[1], state_step)
            out, sub_state_out = lstm(current_input, sub_state)
            state_out = build_new_state(sub_state_out, previous_output[1], state_step)
            state_step += tf.constant(1)
            new_state_step = tf.mod(state_step, chunks)


            return out, state_out, new_state_step

        chunk_step_size = h_size // chunks
        first_input = state_in.c[:, 0: chunk_step_size]
        rnn_outputs, final_states, mod_idxs = tf.scan(dlstm_scan_fn,
                                                      tf.transpose(s_t, [1, 0, 2]),
                                                      initializer=(
                                                          first_input, state_in, tf.constant(0)), name="dlstm")

        return rnn_outputs, final_states
    def testScan_SingleInputMultiOutput(self):
        with self.test_session() as sess:
            elems = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
            initializer = (np.array(1.0), np.array(-1.0))
            r = tf.scan(lambda a, x: (a[0] * x, -a[1] * x), elems, initializer)
            r_value = sess.run(r)

            self.assertAllEqual([1.0, 2.0, 6.0, 24.0, 120.0, 720.0], r_value[0])
            self.assertAllEqual([1.0, -2.0, 6.0, -24.0, 120.0, -720.0], r_value[1])
Example #27
0
  def test_scan(self):
    with self.test_session() as sess:
      ed.set_seed(42)
      op = tf.scan(lambda a, x: a + x, tf.constant([2.0, 3.0, 1.0]))
      copy_op = ed.copy(op)

      result_copy, result = sess.run([copy_op, op])
      self.assertAllClose(result_copy, [2.0, 5.0, 6.0])
      self.assertAllClose(result, [2.0, 5.0, 6.0])
Example #28
0
def discounted_return(reward, length, discount):
  """Discounted Monte-Carlo returns."""
  timestep = tf.range(reward.shape[1].value)
  mask = tf.cast(timestep[None, :] < length[:, None], tf.float32)
  return_ = tf.reverse(tf.transpose(tf.scan(
      lambda agg, cur: cur + discount * agg,
      tf.transpose(tf.reverse(mask * reward, [1]), [1, 0]),
      tf.zeros_like(reward[:, -1]), 1, False), [1, 0]), [1])
  return tf.check_numerics(tf.stop_gradient(return_), 'return')
  def testScan_Scoped(self):
    with self.test_session() as sess:
      with tf.variable_scope("root") as varscope:
        elems = tf.constant([1, 2, 3, 4, 5, 6], name="data")

        r = tf.scan(simple_scoped_fn, elems)
        # Check that we have the one variable we asked for here.
        self.assertEqual(len(tf.trainable_variables()), 1)
        self.assertEqual(tf.trainable_variables()[0].name, "root/body/two:0")
        sess.run([tf.initialize_all_variables()])
        results = np.array([1, 6, 18, 44, 98, 208])
        self.assertAllEqual(results, r.eval())

        # Now let's reuse our single variable.
        varscope.reuse_variables()
        r = tf.scan(simple_scoped_fn, elems, initializer=2)
        self.assertEqual(len(tf.trainable_variables()), 1)
        results = np.array([6, 16, 38, 84, 178, 368])
        self.assertAllEqual(results, r.eval())
Example #30
0
  def runScan(self, n):
    elems = np.arange(n)
    start_time = time.time()
    _ = tf.scan(lambda a, x: a + x, elems, parallel_iterations=1)
    wall_time = time.time() - start_time

    self.report_benchmark(
        name='scan',
        iters=n,
        wall_time=wall_time)
Example #31
0
def bayesian_rnn(cell, inputs, y_i):
    initializer_c_h = (tf.zeros([batch_size, embedding_size]), tf.zeros([batch_size, embedding_size]))
    c_list, h_list = tf.scan(cell, inputs, initializer=initializer_c_h)
    return h_list
Example #32
0
def forward_batch(observations, transitions, viterbi=False,
            return_alpha=False, return_best_sequence=False):
    """
    Takes as input:
        - observations, sequence of shape (batch_size, n_steps, n_classes)
        - transitions, sequence of shape (n_classes, n_classes)
    Probabilities must be given in the log space.
    Compute alpha, matrix of size (n_steps, batch_size n_classes), such that
    alpha[i, j] represents one of these 2 values:
        - the probability that the real path at node i ends in j
        - the maximum probability of a path finishing in j at node i (Viterbi)
    Returns one of these 2 values:
        - alpha
        - the final probability, which can be:
            - the sum of the probabilities of all paths
            - the probability of the best path (Viterbi)
    """
    assert not return_best_sequence or (viterbi and not return_alpha)

    shape_t = transitions.get_shape().dims
    transitions_ = tf.reshape(transitions, (1, shape_t[0].value, shape_t[1].value))

    def recurrence(prev, obs):
        previous = prev
        if return_best_sequence:
            previous = prev[0]
        shape_ = tf.shape(previous)
        previous = tf.reshape(previous, (shape_[0], shape_t[0].value, 1))
        obs = tf.reshape(obs, (shape_[0], 1, shape_t[0].value))
        if viterbi:
            scores = previous + obs + transitions_
            out = tf.reduce_max(scores, axis=1)
            if return_best_sequence:
                out2 = tf.argmax(scores, axis=1)
                return [out, out2]
            else:
                return out
        else:
            return log_sum_exp(previous + obs + transitions, axis=1)

    obs = tf.transpose(observations, (1, 0, 2))
    initial = obs[0]
    ones = tf.ones(tf.shape(initial), dtype=tf.int64)
    if return_best_sequence:
        initial = [initial, ones]
    alpha = tf.scan(
        fn=recurrence,
        elems=obs[1:],
        initializer=initial
    )
    if return_alpha:
        return alpha
    elif return_best_sequence:
        output_info = get_array_arg_max_coordinate(tf.cast(tf.argmax(alpha[0][-1], axis=1), tf.int32))

        def recurrence_cal(prev, x):
            sequ = tf.gather_nd(x, prev)
            return get_array_arg_max_coordinate(sequ)
        sequence = tf.scan(
            fn=recurrence_cal,
            elems=tf.cast(alpha[1][::-1], tf.int32),
            initializer=output_info
        )
        sequence = sequence[:, :, -1]
        sequence = tf.concat([sequence[::-1], [tf.cast(tf.argmax(alpha[0][-1], axis=1), tf.int32)]], axis=0)
        return tf.transpose(sequence)
    else:
        if viterbi:
            return tf.reduce_max(alpha[-1], axis=1)
        else:
            return log_sum_exp(alpha[-1], axis=1)
Example #33
0
 def det_loop_batch(outputs, inputs):
     # inputs: num_time_dims x dim_latent x dim_latent
     # now scan over over time
     out = tf.scan(fn=det_loop_time, elems=inputs, initializer=0.0)
     return out
Example #34
0
def build_actor(agent, env, level_name, action_set):
  """Builds the actor loop."""
  # Initial values.
  initial_env_output, initial_env_state = env.initial()
  initial_agent_state = agent.initial_state(1)
  initial_action = tf.zeros([1], dtype=tf.int32)
  dummy_agent_output, _ = agent(
      (initial_action,
       nest.map_structure(lambda t: tf.expand_dims(t, 0), initial_env_output)),
      initial_agent_state)
  initial_agent_output = nest.map_structure(
      lambda t: tf.zeros(t.shape, t.dtype), dummy_agent_output)

  # All state that needs to persist across training iterations. This includes
  # the last environment output, agent state and last agent output. These
  # variables should never go on the parameter servers.
  def create_state(t):
    # Creates a unique variable scope to ensure the variable name is unique.
    with tf.variable_scope(None, default_name='state'):
      return tf.get_local_variable(t.op.name, initializer=t, use_resource=True)

  persistent_state = nest.map_structure(
      create_state, (initial_env_state, initial_env_output, initial_agent_state,
                     initial_agent_output))

  def step(input_, unused_i):
    """Steps through the agent and the environment."""
    env_state, env_output, agent_state, agent_output = input_

    # Run agent.
    action = agent_output[0]
    batched_env_output = nest.map_structure(lambda t: tf.expand_dims(t, 0),
                                            env_output)
    agent_output, agent_state = agent((action, batched_env_output), agent_state)

    # Convert action index to the native action.
    action = agent_output[0][0]
    raw_action = tf.gather(action_set, action)

    env_output, env_state = env.step(raw_action, env_state)

    return env_state, env_output, agent_state, agent_output

  # Run the unroll. `read_value()` is needed to make sure later usage will
  # return the first values and not a new snapshot of the variables.
  first_values = nest.map_structure(lambda v: v.read_value(), persistent_state)
  _, first_env_output, first_agent_state, first_agent_output = first_values

  # Use scan to apply `step` multiple times, therefore unrolling the agent
  # and environment interaction for `FLAGS.unroll_length`. `tf.scan` forwards
  # the output of each call of `step` as input of the subsequent call of `step`.
  # The unroll sequence is initialized with the agent and environment states
  # and outputs as stored at the end of the previous unroll.
  # `output` stores lists of all states and outputs stacked along the entire
  # unroll. Note that the initial states and outputs (fed through `initializer`)
  # are not in `output` and will need to be added manually later.
  output = tf.scan(step, tf.range(FLAGS.unroll_length), first_values)
  _, env_outputs, _, agent_outputs = output

  # Update persistent state with the last output from the loop.
  assign_ops = nest.map_structure(lambda v, t: v.assign(t[-1]),
                                  persistent_state, output)

  # The control dependency ensures that the final agent and environment states
  # and outputs are stored in `persistent_state` (to initialize next unroll).
  with tf.control_dependencies(nest.flatten(assign_ops)):
    # Remove the batch dimension from the agent state/output.
    first_agent_state = nest.map_structure(lambda t: t[0], first_agent_state)
    first_agent_output = nest.map_structure(lambda t: t[0], first_agent_output)
    agent_outputs = nest.map_structure(lambda t: t[:, 0], agent_outputs)

    # Concatenate first output and the unroll along the time dimension.
    full_agent_outputs, full_env_outputs = nest.map_structure(
        lambda first, rest: tf.concat([[first], rest], 0),
        (first_agent_output, first_env_output), (agent_outputs, env_outputs))

    output = ActorOutput(
        level_name=level_name, agent_state=first_agent_state,
        env_outputs=full_env_outputs, agent_outputs=full_agent_outputs)

    # No backpropagation should be done here.
    return nest.map_structure(tf.stop_gradient, output)
    def _graph_fn_step(self):
        if get_backend() == "tf":

            def scan_func(accum, time_delta):
                # Not needed: preprocessed-previous-states (tuple!)
                # `state` is a tuple as well. See comment in ctor for why tf cannot use ContainerSpaces here.
                internal_states = None
                state = accum[1]
                if self.has_rnn:
                    internal_states = accum[-1]

                state = tuple(tf.convert_to_tensor(value=s) for s in state)

                flat_state = OrderedDict()
                for i, flat_key in enumerate(
                        self.state_space_actor_flattened.keys()):
                    # Add a simple (size 1) batch rank to the state so it'll pass through the NN.
                    # - Also have to add a time-rank for RNN processing.
                    expanded = state[i]
                    for _ in range(1 if self.has_rnn is False else 2):
                        expanded = tf.expand_dims(input=expanded, axis=0)
                    # Make None so it'll be recognized as batch-rank by the auto-Space detector.
                    flat_state[flat_key] = tf.placeholder_with_default(
                        input=expanded,
                        shape=(None, ) + ((None, ) if self.has_rnn is True else
                                          ()) +
                        self.state_space_actor_list[i].shape)

                # Recreate state as the original Space to pass it into the actor-component.
                state = unflatten_op(flat_state)

                # Get action and preprocessed state (as batch-size 1).
                out = (self.actor_component.get_preprocessed_state_and_action
                       if self.add_action_probs is False else
                       self.actor_component.
                       get_preprocessed_state_action_and_action_probs)(
                           state,
                           # Add simple batch rank to internal_states.
                           None if internal_states is None else DataOpTuple(
                               internal_states),  # <- None for non-RNN systems
                           time_precentage=(((self.time_step + time_delta) /
                                             self.max_timesteps)
                                            if self.max_timesteps is not None
                                            else None))

                # Get output depending on whether it contains internal_states or not.
                a = out["action"]
                action_probs = out.get("action_probs")
                current_internal_states = out.get("last_internal_states")

                # Strip the batch (and maybe time) ranks again from the action in case the Env doesn't like it.
                a_no_extra_ranks = a[0, 0] if self.has_rnn is True else a[0]
                # Step through the Env and collect next state (tuple!), reward and terminal as single values
                # (not batched).
                out = self.environment_server.step_flow(a_no_extra_ranks)
                s_, r, t_ = out[:-2], out[-2], out[-1]
                r = tf.cast(r, dtype="float32")

                # Add a and/or r to next_state?
                if self.add_previous_action_to_state is True:
                    assert isinstance(
                        s_, tuple
                    ), "ERROR: Cannot add previous action to non tuple!"
                    s_ = s_ + (a_no_extra_ranks, )
                if self.add_previous_reward_to_state is True:
                    assert isinstance(
                        s_, tuple
                    ), "ERROR: Cannot add previous reward to non tuple!"
                    s_ = s_ + (r, )

                # Note: s_ is packed as tuple.
                ret = [t_, s_] + \
                    ([a_no_extra_ranks] if self.add_action else []) + \
                    ([r] if self.add_reward else []) + \
                    ([(action_probs[0][0] if self.has_rnn is True else action_probs[0])] if
                     self.add_action_probs is True else []) + \
                    ([tuple(current_internal_states)] if self.has_rnn is True else [])

                return tuple(ret)

            # Initialize the tf.scan run.
            initializer = [
                # terminals
                tf.zeros(shape=(), dtype=tf.bool),
                # current (raw) state (flattened components if ContainerSpace).
                tuple(
                    map(lambda x: x.read_value(), self.current_state.values()))
            ]
            # Append actions and rewards if needed.
            if self.add_action:
                initializer.append(
                    tf.zeros(shape=self.action_space.shape,
                             dtype=self.action_space.dtype))
            if self.add_reward:
                initializer.append(tf.zeros(shape=self.reward_space.shape))
            # Append action probs if needed.
            if self.add_action_probs is True:
                initializer.append(
                    tf.zeros(shape=self.action_probs_space.shape))
            # Append internal states if needed.
            if self.current_internal_states is not None:
                initializer.append(
                    tuple(
                        tf.placeholder_with_default(
                            internal_s.read_value(),
                            shape=(None, ) +
                            tuple(internal_s.shape.as_list()[1:])) for
                        internal_s in self.current_internal_states.values()))

            # Scan over n time-steps (tf.range produces the time_delta with respect to the current time_step).
            # NOTE: Changed parallel to 1, to resolve parallel issues.
            step_results = list(
                tf.scan(fn=scan_func,
                        elems=tf.range(self.num_steps, dtype="int32"),
                        initializer=tuple(initializer),
                        back_prop=False))

            # Assign all values that need to be passed again into the next scan.
            assigns = [tf.assign_add(self.time_step,
                                     self.num_steps)]  # time step
            # State (or flattened state components).
            for flat_key, var_ref, state_comp in zip(
                    self.state_space_actor_flattened.keys(),
                    self.current_state.values(), step_results[1]):
                assigns.append(self.assign_variable(
                    var_ref,
                    state_comp[-1]))  # -1: current state (last observed)

            # Current internal state.
            if self.current_internal_states is not None:
                # TODO: What if internal states is not the last item in the list anymore due to some change.
                slot = -1
                # TODO: What if internal states is a dict? Right now assume some tuple.
                # Remove batch rank from internal states again.
                internal_states_wo_batch = list()
                for i, var_ref in enumerate(
                        self.current_internal_states.values(
                        )):  #range(len(step_results[slot])):
                    # 1=batch axis (which has dim=1); 0=time axis.
                    internal_states_component = tf.squeeze(
                        step_results[slot][i], axis=1)
                    assigns.append(
                        self.assign_variable(var_ref,
                                             internal_states_component[-1:]))
                    internal_states_wo_batch.append(internal_states_component)
                step_results[slot] = tuple(internal_states_wo_batch)

            # Concatenate first and rest (and make the concatenated tensors (which are the important return information)
            # dependent on the assigns).
            with tf.control_dependencies(control_inputs=assigns):
                full_results = []
                for slot in range(len(step_results)):
                    first_values, rest_values = initializer[
                        slot], step_results[slot]
                    # Internal states need a slightly different concatenating as the batch rank is missing.
                    if self.current_internal_states is not None and slot == len(
                            step_results) - 1:
                        full_results.append(
                            nest.map_structure(self._concat, first_values,
                                               rest_values))
                    # States need concatenating (first state needed).
                    elif slot == 1:
                        full_results.append(
                            nest.map_structure(
                                lambda first, rest: tf.concat([[first], rest],
                                                              axis=0),
                                first_values, rest_values))
                    # Everything else does not need concatenating (saves one op).
                    else:
                        full_results.append(step_results[slot])

            # Re-build DataOpDicts of states (from tuple right now).
            rebuild_s = DataOpDict()
            for flat_key, var_ref, s_comp in zip(
                    self.state_space_actor_flattened.keys(),
                    self.current_state.values(), full_results[1]):
                rebuild_s[flat_key] = s_comp
            rebuild_s = unflatten_op(rebuild_s)
            full_results[1] = rebuild_s

            # Let the auto-infer system know, what time rank we have.
            full_results = DataOpTuple(full_results)
            for o in flatten_op(full_results).values():
                o._time_rank = 0  # which position in the shape is the time-rank?

            return full_results
Example #36
0
 def cumsum(x):
     return tf.scan(lambda a, x: a + x, x)
Example #37
0
    def v_trace_estimation(self, value_preds, actions, dones, rewards, logits):
        """Calculates V-trace actor critic targets.

        Arguments:
            value_preds: state_value estimated by current policy.
                          Note that one more state_value is appended to value_preds.
            actions: action sampled by behaviour policy.
            dones: terminal signal.
            rewards:: immediate reward return by env.
            logits: value of logits given by behaviour policy.

        Returns:
            remedied value-target and state-action dependent estimator of advantage
        """
        discount = self.config.get('discount', 0.99)
        discounts = tf.to_float(~tf.cast(dones, tf.bool)) * discount

        with tf.device("/cpu:0"):
            behaviour_log_p = -tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=actions)
            target_log_p = self.action_dist.log_p(action=actions)

            log_rhos = target_log_p - behaviour_log_p
            log_rhos = log_rhos[:-1]
            rhos = tf.exp(log_rhos)

            rho_clipping = self.config.get('rho_clipping', None)
            if rho_clipping:
                clipped_rhos = tf.minimum(tf.cast(rho_clipping, tf.float32),
                                          rhos)
            else:
                clipped_rhos = rhos

            pg_rho_clipping = self.config.get('pg_rho_clipping', None)
            if pg_rho_clipping:
                clipped_pg_rhos = tf.minimum(
                    tf.cast(pg_rho_clipping, tf.float32), rhos)
            else:
                clipped_pg_rhos = rhos

            cs = tf.minimum(1.0, rhos)

            next_state_value = value_preds[1:]
            state_value = value_preds[:-1]
            last_state_value = value_preds[-1]

            deltas = clipped_rhos * (rewards + discounts * next_state_value -
                                     state_value)

            # V-trace vs are calculated through a scan from the back to the beginning
            # of the given trajectory.
            sequences = (
                tf.reverse(discounts, axis=[0]),
                tf.reverse(cs, axis=[0]),
                tf.reverse(deltas, axis=[0]),
            )

            def scanfunc(acc, sequence_item):
                discount_t, c_t, delta_t = sequence_item
                return delta_t + discount_t * c_t * acc

            initial_values = tf.zeros_like(last_state_value)
            vs_minus_v_xs = tf.scan(fn=scanfunc,
                                    elems=sequences,
                                    initializer=initial_values,
                                    parallel_iterations=1,
                                    back_prop=False,
                                    name='scan')
            # Reverse the results back to original order.
            vs_minus_v_xs = tf.reverse(vs_minus_v_xs, [0],
                                       name='vs_minus_v_xs')
            # Add V(x_s) to get v_s.
            vs = tf.add(vs_minus_v_xs, state_value, name='vs')

            # Advantage for policy gradient.
            vs_t_plus_1 = tf.concat(
                [vs[1:], tf.expand_dims(last_state_value, 0)], axis=0)
            pg_advantages = (clipped_pg_rhos *
                             (rewards + discounts * vs_t_plus_1 - state_value))

            advantages = tf.stop_gradient(pg_advantages)
            value_target = tf.stop_gradient(vs)

            return value_target, advantages
Example #38
0
def construct_batched_adjacency_and_feature_matrices(
    size,
    adj_row,
    adj_column,
    adj_values,
    adj_elem_len,
    adj_degrees,
    feature_row,
    feature_column,
    feature_values,
    feature_elem_len,
    input_dim,
    max_degree=5,
    normalize=True,
    split_adj=False,
):
    """
    Constructs a batched, sparse adjacency matrix.
    For example to make a batch of two adjacency matrices of 2 and 3 nodes:
    ```
    Example:
        >>> # first adjacency matrix: [[1, 1], [1, 1]]
        >>> # second adjacency matrix: [[1, 1, 0], [1, 1, 1], [0, 1, 1]]
        >>> import tensorflow as tf
        >>> tf.enable_eager_execution()
        >>> size = tf.contant([2, 3], tf.int64)
        >>> adj_row = tf.constant([0, 0, 1, 1, 0, 0, 1, 1, 1, 2, 2], tf.int64)
        >>> adj_column = tf.constant([0, 1, 0, 1, 0, 1, 0, 1, 2, 1, 2], tf.int64)
        >>> adj_values = tf.constant([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], tf.float32)
        >>> adj_elem_len = tf.constant([2, 3], tf.int64)
        >>> feature_row = tf.constant([0, 1, 0, 1, 2], tf.int64)
        >>> feature_column = tf.constant([2, 3, 1, 2, 3], tf.int64)
        >>> feature_values = tf.constant([4, 5, 1, 2, 3], tf.int64)
        >>> feature_elem_len = tf.constant([2, 3], tf.int64)
        >>> diagonalized_adj, feature_mat = construct_batched_adjacency_matrix(size, adj_row, adj_column, adj_values, adj_elem_len, adj_degrees, feature_row, feature_column, feature_values, feature_elem_len, 10, normalize=False, split_adj=False)
        >>> tf.sparse.to_dense(diagonalized_adj[0]).numpy()
        array([[1., 1., 0., 0., 0].,
               [1., 1., 0., 0., 0].,
               [0., 0., 1., 1., 0].,
               [0., 0., 1., 1., 1].,
               [0., 0., 0., 1., 1]]
        >>> feature_mat.numpy()
        array([[0, 0, 4, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 5, 0, 0, 0, 0, 0, 0],
               [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 2, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 3, 0, 0, 0, 0, 0, 0], dtype=int32)

    Parameters:
        size: sizes of adjacency matrices.
        adj_row: concatenated row indices of all matrices in a batch.
        adj_column: concatenated column indices of all matrices in a batch.
        adj_values: concatenated values of elements of all matrices in a batch
        adj_elem_len: number of non-zero elements in all matrices.
        adj_degrees: degree of each node
        feature_row: concatenated row indices of all feature matrices.
        feature_column: concatenated column indices of all feature matrices.
        feature_values: concatenated values in all feature matrices.
        feature_elem_len: number of non-zero elements n all feature matrices.
        input_dim: dimension of each node in feature matrices.
        max_degree:
        normalize: normalizes the adjacency matrix if True.
        split_adj: splits the adjacency matrix based on degrees of nodes if True.
    Returns:
        Batched adjacency matrix.

    """
    with tf.device("/cpu:0"):
        cumsum = tf.cumsum(size)

        adj_row = adj_row
        adj_col = adj_column
        adj_elem_len = adj_elem_len
        start = tf.cumsum(adj_elem_len, exclusive=True)
        offset = tf.cumsum(size, exclusive=True)
        start_size_offset = tf.stack([start, adj_elem_len, offset], 1)

    def offset_index(row_or_column):
        def split_sum(a, x):
            padded_index = tf.concat(
                [
                    tf.zeros(x[0], tf.int64),
                    row_or_column[x[0]:x[0] + x[1]] + x[2],
                    tf.zeros(
                        tf.shape(row_or_column, out_type=tf.int64)[0] - x[0] -
                        x[1],
                        tf.int64,
                    ),
                ],
                0,
            )
            return padded_index

        return split_sum

    with tf.device("/cpu:0"):
        padded_rows = tf.scan(offset_index(adj_row),
                              start_size_offset,
                              initializer=adj_row)
        padded_columns = tf.scan(offset_index(adj_col),
                                 start_size_offset,
                                 initializer=adj_col)
    diagonal_row = tf.reduce_sum(padded_rows, axis=0)
    diagonal_col = tf.reduce_sum(padded_columns, axis=0)
    adj_shape = [tf.reduce_sum(size), tf.reduce_sum(size)]
    if normalize:
        diagonalized_adj = tf.SparseTensor(
            indices=tf.transpose(tf.stack([diagonal_row, diagonal_col])),
            values=adj_values,
            dense_shape=adj_shape,
        )
        degree_hat = tf.sparse.reduce_sum(diagonalized_adj, axis=0)
        diagonalized_adj = (diagonalized_adj / tf.sqrt(degree_hat) /
                            tf.expand_dims(tf.sqrt(degree_hat), 1))
        diagonalized_adj = [diagonalized_adj]  # number of channel is 1.
    elif split_adj:
        adj_degrees = adj_degrees
        # degree is the number of edges on each node, including the one to itself.
        # A node with degree 1 is not connected with any other node.
        adj_degrees = tf.clip_by_value(adj_degrees, 0, max_degree)
        diagonalized_adj = []
        for degree in range(1, max_degree + 1):
            row_deg = tf.boolean_mask(diagonal_row,
                                      tf.equal(adj_degrees, degree))
            row_col = tf.boolean_mask(diagonal_col,
                                      tf.equal(adj_degrees, degree))
            diagonalized_adj.append(
                tf.SparseTensor(
                    indices=tf.transpose(tf.stack([row_deg, row_col])),
                    values=tf.boolean_mask(adj_values,
                                           tf.equal(adj_degrees, degree)),
                    dense_shape=adj_shape,
                ))
        diagonalized_adj.append(tf.sparse.eye(
            adj_shape[0]))  # connection to self
    else:
        diagonalized_adj = tf.SparseTensor(
            indices=tf.transpose(tf.stack([diagonal_row, diagonal_col])),
            values=adj_values,
            dense_shape=adj_shape,
        )
        diagonalized_adj = [diagonalized_adj]

    start_feature = tf.cumsum(feature_elem_len, exclusive=True)
    start_size_offset_feature = tf.stack(
        [start_feature, feature_elem_len, offset], 1)
    with tf.device("/cpu:0"):
        padded_rows_feature = tf.scan(
            offset_index(feature_row),
            start_size_offset_feature,
            initializer=feature_row,
        )
    stacked_row = tf.reduce_sum(padded_rows_feature, axis=0)
    net = tf.SparseTensor(
        indices=tf.transpose(tf.stack([stacked_row, feature_column])),
        values=feature_values,
        dense_shape=[tf.reduce_sum(size), input_dim],
    )
    net = tf.sparse_reorder(net)
    net = tf.sparse_tensor_to_dense(net)
    return diagonalized_adj, net
Example #39
0
def qr_layer(inputs,
             n_units,
             filter_width=2,
             pool_type='ifo',
             kernel_regularizer=None):
    """
    Simple QR layer implementation

    Parameters
    ----------
    inputs
    n_units:
        Number of QR units / dimension of output vectors.
    filter_width:
        Time-dimension width of the convolution filters.
    pool_type:
        Type of the QR pooling. Affects number of parameters / parallel
        convolutions. Has to be one of ['f', 'fo', 'ifo'].
    kernel_regularizer:
        Parameter passed to the interval convolutions.
    """
    if pool_type not in ['f', 'fo', 'ifo']:
        raise ValueError("Pool type must be one of 'f', 'fo', 'ifo'")
    paddings = [[0, 0], [filter_width - 1, 0], [0, 0]]
    inputs = tf.pad(inputs, paddings=paddings)
    inputs = tf.expand_dims(inputs, -1)
    input_dim = inputs.shape[2]

    # Candidate vectors
    candidate = tf.layers.conv2d(inputs,
                                 n_units, (filter_width, input_dim),
                                 activation=tf.tanh,
                                 kernel_regularizer=kernel_regularizer)
    candidate = tf.squeeze(candidate, axis=2)

    # Forget gate
    forget_gate = tf.layers.conv2d(inputs,
                                   n_units, (filter_width, input_dim),
                                   activation=tf.sigmoid,
                                   kernel_regularizer=kernel_regularizer)
    forget_gate = tf.squeeze(forget_gate, axis=2)

    # Input gate
    if 'i' in pool_type:
        input_gate = tf.layers.conv2d(inputs,
                                      n_units, (filter_width, input_dim),
                                      activation=tf.sigmoid)
        input_gate = tf.squeeze(input_gate, axis=2)
    else:
        input_gate = 1 - forget_gate

    # Recurrent part of the calculation of c
    # Prepare for tf.scan
    forget_gate = tf.transpose(forget_gate, [1, 0, 2])
    input_gate = tf.transpose(input_gate, [1, 0, 2])
    candidate = tf.transpose(candidate, [1, 0, 2])
    initializer = tf.zeros(tf.shape(forget_gate)[1:], tf.float32)

    # Apply reccurent step
    hidden = tf.scan(_recurrent_step, (forget_gate, input_gate, candidate),
                     initializer=initializer)
    # Return to proper shape
    hidden = tf.transpose(hidden, [1, 0, 2])

    # Calculate outputs
    if 'o' in pool_type:
        # Output gate
        output_gate = tf.layers.conv2d(inputs,
                                       n_units, (filter_width, input_dim),
                                       activation=tf.sigmoid,
                                       kernel_regularizer=kernel_regularizer)
        output_gate = tf.squeeze(output_gate, axis=2)

        outputs = output_gate * hidden
    else:
        outputs = hidden

    return outputs
Example #40
0
 def forward(self, x):
     """
     :param x: The input to the hidden layer.
     :return: The values after performing forward propagation in this layer.
     """
     return tf.scan(fn=self.recurrence, elems=x, initializer=self.h0)
Example #41
0
#        output, state = RNNConvCell(x_i, state)
#    else:
#        output, state = RNNCell(x_i, state)
#    outputs.append(output)

"""
Use tf.scan instead of naive for loop
Remark: scan->a(t) = fn(a(t-1), x), the first param fn(a,x) in scan function, 
'a' stands for a recursive op and 'x'stands for a input at some time, 
but in this example, we need cell(rnn_input, state) as scan function, 
so we use lambda function transform Cell(rnn_input, state) to 
Cell(state, rnn_input) with responding to fn(a, x) in scan function.
And notes that Cell function return (output, state), so we actuallly need 
send a[1] which means state to the fn's first input
"""
outputs = tf.scan(lambda a, x: RNNCell(x, a[1]), tf.transpose(x, [1, 0, 2]), 
                  initializer=(state, state))[0]
    
    
# Linear activation, using rnn inner loop last output
preds = []
cost = []
for out_idx in xrange(n_steps - 1, n_steps - loss_len - 1, -1):
    pred = tf.matmul(outputs[out_idx], weights['out']) + biases['out']
    preds.append(pred)
    # Define loss and optimizer
    cost.append(tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)))
cost = tf.reduce_sum(cost)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
Example #42
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 embedding_size_lex,
                 attention_depth_w2v,
                 attention_depth_lex,
                 l2_reg_lambda=0.0,
                 l1_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x_2c = tf.placeholder(
            tf.float32, [None, sequence_length, embedding_size, 2],
            name="input_x_2c")
        self.input_x = tf.placeholder(tf.float32,
                                      [None, sequence_length, embedding_size],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        self.input_x_lexicon = tf.placeholder(
            tf.float32, [None, sequence_length, embedding_size_lex],
            name="input_x_lexicon")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        l1_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.embedded_chars = self.input_x
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)
            print self.embedded_chars_expanded

            # lexicon embedding
            self.embedded_chars_lexicon = self.input_x_lexicon
            self.embedded_chars_expanded_lexicon = tf.expand_dims(
                self.embedded_chars_lexicon, -1)

            print '[self.embedded_chars]', self.embedded_chars
            print '[self.embedded_chars_expanded]', self.embedded_chars_expanded

            print '[self.embedded_chars_lexicon]', self.embedded_chars_lexicon
            print '[self.embedded_chars_expanded_lexicon]', self.embedded_chars_expanded_lexicon

        attention_outputs = []
        with tf.name_scope("pre-attention"):
            U_shape = [embedding_size, attention_depth_w2v]  # (400, 60)
            self.U_w2v = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1),
                                     name="U_w2v")
            U_shape = [embedding_size_lex, attention_depth_lex]  # (15, 60)
            self.U_lex = tf.Variable(tf.truncated_normal(U_shape, stddev=0.1),
                                     name="U_lex")

            self.embedded_chars_tr = tf.batch_matrix_transpose(
                self.embedded_chars)
            self.embedded_chars_lexicon_tr = tf.batch_matrix_transpose(
                self.embedded_chars_lexicon)
            print '[self.embedded_chars_lexicon_tr]', self.embedded_chars_lexicon_tr

            def fn_matmul_w2v(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_w2v)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            def fn_matmul_lex(previous_output, current_input):
                print(current_input.get_shape())
                current_ouput = tf.matmul(current_input, self.U_lex)
                print 'previous_output', previous_output
                print 'current_ouput', current_ouput
                return current_ouput

            initializer = tf.constant(np.zeros(
                [sequence_length, attention_depth_w2v]),
                                      dtype=tf.float32)
            WU_w2v = tf.scan(fn_matmul_w2v,
                             self.embedded_chars,
                             initializer=initializer)
            print '[WU_w2v]', WU_w2v

            initializer = tf.constant(np.zeros(
                [sequence_length, attention_depth_lex]),
                                      dtype=tf.float32)
            LU_lex = tf.scan(fn_matmul_lex,
                             self.embedded_chars_lexicon,
                             initializer=initializer)
            print '[LU_lex]', LU_lex

            WU_w2v_expanded = tf.expand_dims(WU_w2v, -1)
            print '[WU_w2v_expanded]', WU_w2v_expanded  # (?, 60(seq_len), 60(depth), 1)

            w2v_pool = tf.nn.max_pool(WU_w2v_expanded,
                                      ksize=[1, 1, attention_depth_w2v, 1],
                                      strides=[1, 1, 1, 1],
                                      padding='VALID',
                                      name="w2v_pool")

            print '[w2v_pool]', w2v_pool  # (?, 60(seq_len), 1, 1) #select attention for w2v

            LU_lex_expanded = tf.expand_dims(LU_lex, -1)
            print '[LU_lex_expanded]', LU_lex_expanded  # (?, 60(seq_len), 60(depth), 1)

            lex_pool = tf.nn.max_pool(LU_lex_expanded,
                                      ksize=[1, 1, attention_depth_lex, 1],
                                      strides=[1, 1, 1, 1],
                                      padding='VALID',
                                      name="lex_pool")

            print '[lex_pool]', lex_pool  # (?, 60(seq_len), 1, 1) #select attention for lex

            w2v_pool_sq = tf.expand_dims(
                tf.squeeze(w2v_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[w2v_pool_sq]', w2v_pool_sq

            lex_pool_sq = tf.expand_dims(
                tf.squeeze(lex_pool, squeeze_dims=[2, 3]), -1)  # (?, 60, 1)
            print '[lex_pool_sq]', lex_pool_sq

            attentioned_w2v = tf.batch_matmul(self.embedded_chars_tr,
                                              w2v_pool_sq)
            attentioned_lex = tf.batch_matmul(self.embedded_chars_lexicon_tr,
                                              lex_pool_sq)

            attentioned_w2v_sq = tf.squeeze(attentioned_w2v, squeeze_dims=[2])
            attentioned_lex_sq = tf.squeeze(attentioned_lex, squeeze_dims=[2])

            print '[attentioned_w2v]', attentioned_w2v_sq
            print '[attentioned_lex]', attentioned_lex_sq
            attention_outputs.append(attentioned_w2v_sq)
            attention_outputs.append(attentioned_lex_sq)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 2, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")

                # l2_loss += tf.nn.l2_loss(W)/1000
                # l2_loss += tf.nn.l2_loss(b)/1000

                conv = tf.nn.conv2d(self.input_x_2c,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        self.appended_pool = tf.concat(
            1, [self.h_pool_flat, attention_outputs[0], attention_outputs[1]])
        print '[self.appended_pool]', self.appended_pool
        num_filters_total = num_filters_total + embedding_size + embedding_size_lex

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.appended_pool,
                                        self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W) / 30
            l2_loss += tf.nn.l2_loss(b) / 30
            l1_loss += tf.reduce_sum(tf.abs(W))
            l1_loss += tf.reduce_sum(tf.abs(b))
            self._b = b
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                self.scores, self.input_y)
            self.loss = tf.reduce_mean(
                losses) + l2_reg_lambda * l2_loss + l1_reg_lambda * l1_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")

        with tf.name_scope("avg_f1"):
            self.golds = tf.argmax(self.input_y, 1, name="golds")
            self.preds = self.predictions

            # positive recall
            pos_gold_sel = tf.equal(self.golds, 2)  # positive_gold
            posg_golds = tf.boolean_mask(self.golds, pos_gold_sel)
            posg_preds = tf.boolean_mask(self.preds, pos_gold_sel)
            correct_predictions_pr = tf.equal(posg_golds, posg_preds)
            pos_r = tf.reduce_mean(tf.cast(correct_predictions_pr, "float"),
                                   name="pos_recall")

            # positive precision
            pos_pred_sel = tf.equal(self.preds, 2)  # positive_pred
            posp_golds = tf.boolean_mask(self.golds, pos_pred_sel)
            posp_preds = tf.boolean_mask(self.preds, pos_pred_sel)
            correct_predictions_pp = tf.equal(posp_golds, posp_preds)
            pos_p = tf.reduce_mean(tf.cast(correct_predictions_pp, "float"),
                                   name="pos_precision")

            # negative recall
            neg_gold_sel = tf.equal(self.golds, 0)  # positive_gold
            negg_golds = tf.boolean_mask(self.golds, neg_gold_sel)
            negg_preds = tf.boolean_mask(self.preds, neg_gold_sel)
            correct_predictions_nr = tf.equal(negg_golds, negg_preds)
            self.neg_r = tf.reduce_mean(tf.cast(correct_predictions_nr,
                                                "float"),
                                        name="neg_recall")

            # negative precision
            neg_pred_sel = tf.equal(self.preds, 0)  # positive_pred
            negp_golds = tf.boolean_mask(self.golds, neg_pred_sel)
            negp_preds = tf.boolean_mask(self.preds, neg_pred_sel)
            correct_predictions_np = tf.equal(negp_golds, negp_preds)
            self.neg_p = tf.reduce_mean(tf.cast(correct_predictions_np,
                                                "float"),
                                        name="neg_precision")

            self.f1_neg = 2 * self.neg_p * self.neg_r / (self.neg_p +
                                                         self.neg_r) * 100
            self.f1_pos = 2 * pos_p * pos_r / (pos_p + pos_r) * 100

            self.avg_f1 = (self.f1_neg + self.f1_pos) / 2
Example #43
0
def memory_augmented_neural_network(input_var, target_var, \
                                    batch_size=16, nb_class=5, memory_shape=(128, 40), \
                                    controller_size=200, input_size=20 * 20, nb_reads=4):
    ## input_var has dimensions (batch_size, time, 	input_dim)
    ## target_var has dimensions (batch_size, time) (label indices)

    M_0 = shared_float32(1e-6 * np.ones((batch_size, ) + memory_shape),
                         name='memory')
    c_0 = shared_float32(np.zeros((batch_size, controller_size)),
                         name='memory_cell_state')
    h_0 = shared_float32(np.zeros((batch_size, controller_size)),
                         name='hidden_state')
    r_0 = shared_float32(np.zeros((batch_size, nb_reads * memory_shape[1])),
                         name='read_vector')
    wr_0 = shared_one_hot((batch_size, nb_reads, memory_shape[0]), name='wr')
    wu_0 = shared_one_hot((batch_size, memory_shape[0]), name='wu')

    def shape_high(shape):
        shape = np.array(shape)
        if isinstance(shape, int):
            high = np.sqrt(6. / shape)
            return (list(shape), high)
        else:
            high = np.sqrt(6. / (np.sum(shape[:2]) * np.prod(shape[2:])))
            return (list(shape), high)

    with tf.variable_scope("Weights"):
        shape, high = shape_high((nb_reads, controller_size, memory_shape[1]))
        W_key = tf.get_variable('W_key',
                                shape=shape,
                                initializer=tf.random_uniform_initializer(
                                    -1 * high, high))
        b_key = tf.get_variable('b_key',
                                shape=(nb_reads, memory_shape[1]),
                                initializer=tf.constant_initializer(0))
        shape, high = shape_high((nb_reads, controller_size, memory_shape[1]))
        W_add = tf.get_variable('W_add',
                                shape=shape,
                                initializer=tf.random_uniform_initializer(
                                    -1 * high, high))
        b_add = tf.get_variable('b_add',
                                shape=(nb_reads, memory_shape[1]),
                                initializer=tf.constant_initializer(0))
        shape, high = shape_high((nb_reads, controller_size, 1))
        W_sigma = tf.get_variable('W_sigma',
                                  shape=shape,
                                  initializer=tf.random_uniform_initializer(
                                      -1 * high, high))
        b_sigma = tf.get_variable('b_sigma',
                                  shape=(nb_reads, 1),
                                  initializer=tf.constant_initializer(0))
        shape, high = shape_high((input_size + nb_class, 4 * controller_size))
        W_xh = tf.get_variable('W_xh',
                               shape=shape,
                               initializer=tf.random_uniform_initializer(
                                   -1 * high, high))
        b_h = tf.get_variable('b_xh',
                              shape=(4 * controller_size),
                              initializer=tf.constant_initializer(0))
        shape, high = shape_high(
            (controller_size + nb_reads * memory_shape[1], nb_class))
        W_o = tf.get_variable('W_o',
                              shape=shape,
                              initializer=tf.random_uniform_initializer(
                                  -1 * high, high))
        b_o = tf.get_variable('b_o',
                              shape=(nb_class),
                              initializer=tf.constant_initializer(0))
        shape, high = shape_high(
            (nb_reads * memory_shape[1], 4 * controller_size))
        W_rh = tf.get_variable('W_rh',
                               shape=shape,
                               initializer=tf.random_uniform_initializer(
                                   -1 * high, high))
        shape, high = shape_high((controller_size, 4 * controller_size))
        W_hh = tf.get_variable('W_hh',
                               shape=shape,
                               initializer=tf.random_uniform_initializer(
                                   -1 * high, high))
        gamma = tf.get_variable('gamma',
                                shape=[1],
                                initializer=tf.constant_initializer(0.95))

    def slice_equally(x, size, nb_slice):
        # type: (object, object, object) -> object
        return [x[:, n * size:(n + 1) * size] for n in range(nb_slice)]

    def step(xparameter1, x_t):
        #M_tm1, c_tm1, h_tm1, r_tm1, wr_tm1, wu_tm1
        M_tm1, c_tm1, h_tm1, r_tm1, wr_tm1, wu_tm1 = xparameter1
        with tf.variable_scope("Weights", reuse=True):
            W_key = tf.get_variable('W_key',
                                    shape=(nb_reads, controller_size,
                                           memory_shape[1]))
            b_key = tf.get_variable('b_key', shape=(nb_reads, memory_shape[1]))
            W_add = tf.get_variable('W_add',
                                    shape=(nb_reads, controller_size,
                                           memory_shape[1]))
            b_add = tf.get_variable('b_add', shape=(nb_reads, memory_shape[1]))
            W_sigma = tf.get_variable('W_sigma',
                                      shape=(nb_reads, controller_size, 1))
            b_sigma = tf.get_variable('b_sigma', shape=(nb_reads, 1))
            W_xh = tf.get_variable('W_xh',
                                   shape=(input_size + nb_class,
                                          4 * controller_size))
            b_h = tf.get_variable('b_xh', shape=(4 * controller_size))
            W_o = tf.get_variable('W_o',
                                  shape=(controller_size +
                                         nb_reads * memory_shape[1], nb_class))
            b_o = tf.get_variable('b_o', shape=(nb_class))
            W_rh = tf.get_variable('W_rh',
                                   shape=(nb_reads * memory_shape[1],
                                          4 * controller_size))
            W_hh = tf.get_variable('W_hh',
                                   shape=(controller_size,
                                          4 * controller_size))
            gamma = tf.get_variable('gamma',
                                    shape=[1],
                                    initializer=tf.constant_initializer(0.95))

        #pt = M_tm1[0:2]
        #pt = tf.Print(pt, [pt], message='Prinitng W_key: ')
        #x_t = tf.transpose(X_t, perm=[1, 0, 2])[ix]
        #with tf.control_dependencies([pt]):
        preactivations = tf.matmul(x_t, W_xh) + tf.matmul(
            r_tm1, W_rh) + tf.matmul(h_tm1, W_hh) + b_h
        gf_, gi_, go_, u_ = slice_equally(preactivations, controller_size, 4)
        gf = tf.sigmoid(gf_)
        gi = tf.sigmoid(gi_)
        go = tf.sigmoid(go_)
        u = tf.sigmoid(u_)

        c_t = gf * c_tm1 + gi * u
        h_t = go * tf.tanh(c_t)  #(batch_size, controller_size)

        h_t_W_key = tf.matmul(h_t,
                              tf.reshape(W_key, shape=(controller_size, -1)))
        k_t = tf.tanh(
            tf.reshape(h_t_W_key,
                       shape=(batch_size, nb_reads, memory_shape[1])) +
            b_key)  #(batch_size, nb_reads, memory_shape[1])
        h_t_W_add = tf.matmul(h_t,
                              tf.reshape(W_add, shape=(controller_size, -1)))
        a_t = tf.tanh(
            tf.reshape(h_t_W_add,
                       shape=(batch_size, nb_reads, memory_shape[1])) + b_add)
        h_t_W_sigma = tf.matmul(
            h_t, tf.reshape(W_sigma, shape=(controller_size, -1)))
        sigma_t = tf.sigmoid(
            tf.reshape(h_t_W_sigma, shape=(batch_size, nb_reads, 1)) +
            b_sigma)  #(batch_size, nb_reads, 1)

        _, temp_indices = tf.nn.top_k(wu_tm1, memory_shape[0])
        wlu_tm1 = tf.slice(temp_indices, [0, 0],
                           [batch_size, nb_reads])  #(batch_size, nb_reads)

        sigma_t_wr_tm_1 = tf.tile(
            sigma_t, tf.stack([1, 1, wr_tm1.get_shape().as_list()[2]]))
        ww_t = tf.reshape(
            sigma_t * wr_tm1,
            (batch_size * nb_reads,
             memory_shape[0]))  #(batch_size*nb_reads, memory_shape[0])
        #with tf.variable_scope("ww_t"):
        ww_t = update_tensor(ww_t, tf.reshape(wlu_tm1, [-1]), 1.0 - tf.reshape(
            sigma_t, shape=[-1]))  #Update tensor done using index slicing
        ww_t = tf.reshape(ww_t, (batch_size, nb_reads, memory_shape[0]))

        with tf.variable_scope("M_t"):
            print('wlu_tm1 : ', wlu_tm1.get_shape().as_list())
            M_t = update_tensor(
                M_tm1, wlu_tm1[:, 0],
                tf.constant(0., shape=[
                    batch_size, memory_shape[1]
                ]))  #Update tensor done using sparse to dense
        M_t = tf.add(
            M_t, tf.matmul(tf.transpose(ww_t, perm=[0, 2, 1]),
                           a_t))  #(batch_size, memory_size[0], memory_size[1])
        K_t = cosine_similarity(k_t, M_t)

        wr_t = tf.nn.softmax(
            tf.reshape(K_t, (batch_size * nb_reads, memory_shape[0])))
        wr_t = tf.reshape(
            wr_t, (batch_size, nb_reads,
                   memory_shape[0]))  #(batch_size, nb_reads, memory_size[0])

        wu_t = gamma * wu_tm1 + tf.reduce_sum(wr_t, axis=1) + tf.reduce_sum(
            ww_t, axis=1)  #(batch_size, memory_size[0])

        r_t = tf.reshape(tf.matmul(wr_t, M_t), [batch_size, -1])

        return [M_t, c_t, h_t, r_t, wr_t, wu_t]

    #Model Part:
    sequence_length_var = target_var.get_shape().as_list()[
        1]  #length of the input
    output_shape_var = (batch_size * sequence_length_var, nb_class
                        )  #(batch_size*sequence_length_vat,nb_class)

    # Input concat with time offset
    one_hot_target_flattened = tf.one_hot(tf.reshape(target_var, [-1]),
                                          depth=nb_class)
    one_hot_target = tf.reshape(
        one_hot_target_flattened,
        (batch_size, sequence_length_var,
         nb_class))  #(batch_size, sequence_var_length, nb_class)
    offset_target_var = tf.concat(
        [
            tf.zeros_like(tf.expand_dims(one_hot_target[:, 0], 1)),
            one_hot_target[:, :-1]
        ],
        axis=1)  #(batch_size, sequence_var_length, nb_class)
    l_input_var = tf.concat(
        [input_var, offset_target_var],
        axis=2)  #(batch_size, sequence_var_length, input_size+nb_class)

    #ix = tf.variable(0,dtype=tf.int32)
    #cond = lambda M_0, c_0, h_0, r_0, wr_0, wu_0, ix: ix < sequence_length_var
    l_ntm_var = tf.scan(
        step,
        elems=tf.transpose(l_input_var, perm=[1, 0, 2]),
        initializer=[M_0, c_0, h_0, r_0, wr_0, wu_0],
        name="Scan_MANN_Last")  #Set of all above parameters, as list
    l_ntm_output_var = tf.transpose(
        tf.concat(l_ntm_var[2:4], axis=2), perm=[1, 0, 2]
    )  #h_t & r_t, size=(batch_size, sequence_var_length, controller_size+nb_reads*memory_size[1])

    l_input_var_W_o = tf.matmul(
        tf.reshape(l_ntm_output_var,
                   shape=(batch_size * sequence_length_var, -1)), W_o)
    output_var_preactivation = tf.add(
        tf.reshape(l_input_var_W_o,
                   (batch_size, sequence_length_var, nb_class)), b_o)
    output_var_flatten = tf.nn.softmax(
        tf.reshape(output_var_preactivation, output_shape_var))
    output_var = tf.reshape(output_var_flatten,
                            output_var_preactivation.get_shape().as_list())

    #Parameters
    params = [
        W_key, b_key, W_add, b_add, W_sigma, b_sigma, W_xh, W_rh, W_hh, b_h,
        W_o, b_o
    ]

    return output_var, output_var_flatten, params
Example #44
0
x_place=tf.placeholder(tf.float32,[None,time_steps,element_size])
y_place=tf.placeholder(tf.float32,[None,num_classes])

with tf.name_scope('rnn_weights') as scope:
	wl=tf.Variable(tf.zeros((element_size,hidden_layer_size)))
	wh=tf.Variable(tf.zeros((hidden_layer_size,hidden_layer_size)))
	bl=tf.Variable(tf.zeros((hidden_layer_size)))


def rnn_step(previous_hidden_layer,x):
	initial=tf.tanh(tf.matmul(x,wl)+tf.matmul(previous_hidden_layer,wh)+bl)
	return initial
processed_inputs=tf.transpose(x_place,perm=[1,0,2])
print('processed_inputs ',processed_inputs.get_shape())
initial_hidden=tf.zeros((batch_size,hidden_layer_size))
all_hidden_states=tf.scan(rnn_step,processed_inputs,initializer=initial_hidden)
print('all_hidden_states',all_hidden_states.get_shape())
with tf.name_scope('linear_weights') as scope:
	WL=tf.Variable(tf.truncated_normal((hidden_layer_size,num_classes)))
	BL=tf.Variable(tf.truncated_normal([num_classes]))
def linear_layer(hidden_state):
	return tf.matmul(hidden_state,WL)+BL
all_outputs=tf.map_fn(linear_layer,all_hidden_states)
print('all_outputs',all_outputs.get_shape())
output=all_outputs[-1]
print(output.get_shape())
with tf.name_scope('accuracy'):
	loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=y_place))
	train_step=tf.train.RMSPropOptimizer(0.001,0.9).minimize(loss)
	accuracy=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(output,1),tf.argmax(y_place,1)),tf.float32))
# finally the training part
Example #45
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except TypeError:
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn, L_flat, initializer=K.zeros((self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Example #46
0
    def tf_discounted_cumulative_reward(self, terminal, reward, discount=None, final_reward=0.0, horizon=0):
        """
        Creates and returns the TensorFlow operations for calculating the sequence of discounted cumulative rewards
        for a given sequence of single rewards.

        Example:
        single rewards = 2.0 1.0 0.0 0.5 1.0 -1.0
        terminal = False, False, False, False True False
        gamma = 0.95
        final_reward = 100.0 (only matters for last episode (r=-1.0) as this episode has no terminal signal)
        horizon=3
        output = 2.95 1.45 1.38 1.45 1.0 94.0

        Args:
            terminal: Tensor (bool) holding the is-terminal sequence. This sequence may contain more than one
                True value. If its very last element is False (not terminating), the given `final_reward` value
                is assumed to follow the last value in the single rewards sequence (see below).
            reward: Tensor (float) holding the sequence of single rewards. If the last element of `terminal` is False,
                an assumed last reward of the value of `final_reward` will be used.
            discount (float): The discount factor (gamma). By default, take the Model's discount factor.
            final_reward (float): Reward value to use if last episode in sequence does not terminate (terminal sequence
                ends with False). This value will be ignored if horizon == 1 or discount == 0.0.
            horizon (int): The length of the horizon (e.g. for n-step cumulative rewards in continuous tasks
                without terminal signals). Use 0 (default) for an infinite horizon. Note that horizon=1 leads to the
                exact same results as a discount factor of 0.0.

        Returns:
            Discounted cumulative reward tensor with the same shape as `reward`.
        """

        # By default -> take Model's gamma value
        if discount is None:
            discount = self.discount

        # Accumulates discounted (n-step) reward (start new if terminal)
        def cumulate(cumulative, reward_terminal_horizon_subtract):
            rew, is_terminal, is_over_horizon, sub = reward_terminal_horizon_subtract
            return tf.where(
                # If terminal, start new cumulation.
                condition=is_terminal,
                x=rew,
                y=tf.where(
                    # If we are above the horizon length (H) -> subtract discounted value from H steps back.
                    condition=is_over_horizon,
                    x=(rew + cumulative * discount - sub),
                    y=(rew + cumulative * discount)
                )
            )

        # Accumulates length of episodes (starts new if terminal)
        def len_(cumulative, term):
            return tf.where(
                condition=term,
                # Start counting from 1 after is-terminal signal
                x=tf.ones(shape=(), dtype=tf.int32),
                # Otherwise, increase length by 1
                y=cumulative + 1
            )

        # Reverse, since reward cumulation is calculated right-to-left, but tf.scan only works left-to-right.
        reward = tf.reverse(tensor=reward, axis=(0,))
        # e.g. -1.0 1.0 0.5 0.0 1.0 2.0
        terminal = tf.reverse(tensor=terminal, axis=(0,))
        # e.g. F T F F F F

        # Store the steps until end of the episode(s) determined by the input terminal signals (True starts new count).
        lengths = tf.scan(fn=len_, elems=terminal, initializer=0)
        # e.g. 1 1 2 3 4 5
        off_horizon = tf.greater(lengths, tf.fill(dims=tf.shape(lengths), value=horizon))
        # e.g. F F F F T T

        # Calculate the horizon-subtraction value for each step.
        if horizon > 0:
            horizon_subtractions = tf.map_fn(lambda x: (discount ** horizon) * x, reward, dtype=tf.float32)
            # Shift right by size of horizon (fill rest with 0.0).
            horizon_subtractions = tf.concat([np.zeros(shape=(horizon,)), horizon_subtractions], axis=0)
            horizon_subtractions = tf.slice(horizon_subtractions, begin=(0,), size=tf.shape(reward))
            # e.g. 0.0, 0.0, 0.0, -1.0*g^3, 1.0*g^3, 0.5*g^3
        # all 0.0 if infinite horizon (special case: horizon=0)
        else:
            horizon_subtractions = tf.zeros(shape=tf.shape(reward))

        # Now do the scan, each time summing up the previous step (discounted by gamma) and
        # subtracting the respective `horizon_subtraction`.
        reward = tf.scan(
            fn=cumulate,
            elems=(reward, terminal, off_horizon, horizon_subtractions),
            initializer=final_reward if horizon != 1 else 0.0
        )
        # Re-reverse again to match input sequences.
        return tf.reverse(tensor=reward, axis=(0,))
Example #47
0
        ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
        ind = tf.reduce_max(tf.scatter_nd(
            ends_indices, all_ends[:, 1],
            [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                            axis=-1)
        range_ind = tf.range(tf.shape(ind)[0])
        mask_ends = tf.cast(
            tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                          tf.ones_like(range_ind),
                          [tf.reduce_max(ind) + 1,
                           tf.shape(ind)[0]]), bool)
        # A bit of a trick. With the locations of the ends of the mask (the last periods in
        #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
        #  (starting from all 1). For each context in the batch, this will result in 1s
        #  up until the marker (the location of that last period) and 0s afterwards.
        mask = tf.scan(tf.logical_xor, mask_ends,
                       tf.ones_like(range_ind, dtype=bool))

    # We score each possible word inversely with their Euclidean distance to the regressed word.
    #  The highest score (lowest distance) will correspond to the selected word.
    logits = -tf.reduce_sum(tf.square(context * tf.transpose(
        tf.expand_dims(tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit),
                            axis=-1)

print("Done with building model")
print("Training...")

# Training

# gold_standard: The real answers.
gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer")
with tf.variable_scope('accuracy'):
Example #48
0
    def construct_model(self, images, actions, rewards):
        """Build convolutional lstm video predictor using CDNA, or DNA.

    Args:
      images: list of tensors of ground truth image sequences
              there should be a 4D image ?xWxHxC for each timestep
      actions: list of action tensors
               each action should be in the shape ?x1xZ
      rewards: list of reward tensors
               each reward should be in the shape ?x1xZ
    Returns:
      gen_images: predicted future image frames
      gen_rewards: predicted future rewards
      latent_mean: mean of approximated posterior
      latent_std: std of approximated posterior

    Raises:
      ValueError: if more than 1 mask specified for DNA model.
    """
        context_frames = self.hparams.video_num_input_frames

        batch_size = common_layers.shape_list(images)[1]
        ss_func = self.get_scheduled_sample_func(batch_size)

        def process_single_frame(prev_outputs, inputs):
            """Process a single frame of the video."""
            cur_image, cur_reward, action = inputs
            time_step, prev_image, prev_reward, lstm_states = prev_outputs

            generated_items = [prev_image, prev_reward]
            groundtruth_items = [cur_image, cur_reward]
            done_warm_start = tf.greater(time_step, context_frames - 1)
            input_image, input_reward = self.get_scheduled_sample_inputs(
                done_warm_start, groundtruth_items, generated_items, ss_func)

            # Prediction
            pred_image, lstm_states = self.construct_predictive_tower(
                input_image, input_reward, action, lstm_states, latent)

            if self.hparams.reward_prediction:
                reward_input_image = pred_image
                if self.hparams.reward_prediction_stop_gradient:
                    reward_input_image = tf.stop_gradient(reward_input_image)
                pred_reward = self.reward_prediction(reward_input_image,
                                                     input_reward, action,
                                                     latent)
            else:
                pred_reward = input_reward

            time_step += 1
            outputs = (time_step, pred_image, pred_reward, lstm_states)

            return outputs

        # Latent tower
        latent = None
        if self.hparams.stochastic_model:
            latent_mean, latent_std = self.construct_latent_tower(images)
            latent = self.get_gaussian_latent(latent_mean, latent_std)

        # HACK: Do first step outside to initialize all the variables
        lstm_states = [None] * 7
        inputs = images[0], rewards[0], actions[0]
        prev_outputs = (tf.constant(0), tf.zeros_like(images[0]),
                        tf.zeros_like(rewards[0]), lstm_states)

        initializers = process_single_frame(prev_outputs, inputs)
        first_gen_images = tf.expand_dims(initializers[1], axis=0)
        first_gen_rewards = tf.expand_dims(initializers[2], axis=0)

        inputs = (images[1:-1], rewards[1:-1], actions[1:-1])

        outputs = tf.scan(process_single_frame, inputs, initializers)
        gen_images, gen_rewards = outputs[1:3]

        gen_images = tf.concat((first_gen_images, gen_images), axis=0)
        gen_rewards = tf.concat((first_gen_rewards, gen_rewards), axis=0)

        return gen_images, gen_rewards, [latent_mean], [latent_std]
Example #49
0
def rnnrbm():
    # This function builds the RNN-RBM and returns the parameters of the model

    # The placeholder variable that holds our data
    x = tf.placeholder(tf.float32, [None, n_visible])
    # The learning rate. We set and change this value during training.
    lr = tf.placeholder(tf.float32)
    # the batch size
    size_bt = tf.shape(x)[0]

    # Here we set aside the space for each of the variables.
    # We initialize these variables when we load saved parameters in rnn_rbm_train.py or rnn_rbm_generate.py
    W = tf.Variable(tf.zeros([n_visible, n_hidden]), name="W")
    Wuh = tf.Variable(tf.zeros([n_hidden_recurrent, n_hidden]), name="Wuh")
    Wuv = tf.Variable(tf.zeros([n_hidden_recurrent, n_visible]), name="Wuv")
    Wvu = tf.Variable(tf.zeros([n_visible, n_hidden_recurrent]), name="Wvu")
    Wuu = tf.Variable(tf.zeros([n_hidden_recurrent, n_hidden_recurrent]),
                      name="Wuu")
    bh = tf.Variable(tf.zeros([1, n_hidden]), name="bh")
    bv = tf.Variable(tf.zeros([1, n_visible]), name="bv")
    bu = tf.Variable(tf.zeros([1, n_hidden_recurrent]), name="bu")
    u0 = tf.Variable(tf.zeros([1, n_hidden_recurrent]), name="u0")
    BH_t = tf.Variable(tf.zeros([1, n_hidden]), name="BH_t")
    BV_t = tf.Variable(tf.zeros([1, n_visible]), name="BV_t")

    def rnn_recurrence(u_tm1, sl):
        # Iterate through the data in the batch and generate the values of the RNN hidden nodes
        sl = tf.reshape(sl, [1, n_visible])
        u_t = (tf.tanh(bu + tf.matmul(sl, Wvu) + tf.matmul(u_tm1, Wuu)))
        return u_t

    def visible_bias_recurrence(bv_t, u_tm1):
        # Iterate through the values of the RNN hidden nodes and generate the values of the visible bias vectors
        bv_t = tf.add(bv, tf.matmul(u_tm1, Wuv))
        return bv_t

    def hidden_bias_recurrence(bh_t, u_tm1):
        # Iterate through the values of the RNN hidden nodes and generate the values of the hidden bias vectors
        bh_t = tf.add(bh, tf.matmul(u_tm1, Wuh))
        return bh_t

    def generate_recurrence(count, k, u_tm1, primer, x, music):
        # This function builds and runs the gibbs steps for each RBM in the chain to generate music
        # Get the bias vectors from the current state of the RNN
        bv_t = tf.add(bv, tf.matmul(u_tm1, Wuv))
        bh_t = tf.add(bh, tf.matmul(u_tm1, Wuh))

        # Run the Gibbs step to get the music output. Prime the RBM with the previous musical output.
        x_out = RBM.gibbs_sample(primer, W, bv_t, bh_t, k=25)

        # Update the RNN hidden state based on the musical output and current hidden state.
        u_t = (tf.tanh(bu + tf.matmul(x_out, Wvu) + tf.matmul(u_tm1, Wuu)))

        # Add the new output to the musical piece
        music = tf.concat(axis=0, values=[music, x_out])

        return count + 1, k, u_t, x_out, x, music

    def generate(num,
                 x=x,
                 size_bt=size_bt,
                 u0=u0,
                 n_visible=n_visible,
                 prime_length=100):
        """
            This function handles generating music. This function is one of the outputs of the build_rnnrbm function
            Args:
                num (int): The number of time steps to generate
                x (tf.placeholder): The data vector. We can use feed_dict to set this to the music primer. 
                size_bt (tf.float32): The batch size
                u0 (tf.Variable): The initial state of the RNN
                n_visible (int): The size of the data vectors
                prime_length (int): The number of times teps into the primer song that we use befoe beginning to generate music
            Returns:
                The generated music, as a tf.Tensor

        """
        Uarr = tf.scan(rnn_recurrence, x, initializer=u0)
        # U = Uarr[np.floor(prime_length/midi_manipulation.num_timesteps), :, :]
        U = Uarr[int(np.floor(prime_length /
                              midi_manipulation.num_timesteps)), :, :]
        # [_, _, _, _, _, music] = control_flow_ops.While(lambda count, num_iter, *args: count < num_iter,
        #                                                  generate_recurrence, [tf.constant(1, tf.int32), tf.constant(num), U,
        #                                                  tf.zeros([1, n_visible], tf.float32), x,
        #                                                 tf.zeros([1, n_visible],  tf.float32)])

        time_steps = tf.constant(1, tf.int32)
        iterations = tf.constant(num)
        u_t = tf.zeros([1, n_visible], tf.float32)
        music = tf.zeros([1, n_visible], tf.float32)
        loop_vars = [time_steps, iterations, U, u_t, x, music]

        [_, _, _, _, _, music
         ] = tf.while_loop(lambda count, num_iter, *args: count < num_iter,
                           generate_recurrence,
                           loop_vars,
                           shape_invariants=[
                               time_steps.get_shape(),
                               iterations.get_shape(),
                               U.get_shape(),
                               u_t.get_shape(),
                               x.get_shape(),
                               tf.TensorShape([None, 780])
                           ])

        return music

    # Reshape our bias matrices to be the same size as the batch.
    tf.assign(BH_t, tf.tile(BH_t, [size_bt, 1]))
    tf.assign(BV_t, tf.tile(BV_t, [size_bt, 1]))

    # Scan through the rnn and generate the value for each hidden node in the batch
    u_t = tf.scan(rnn_recurrence, x, initializer=u0)

    # Scan through the rnn and generate the visible and hidden biases for each RBM in the batch
    BV_t = tf.reshape(
        tf.scan(visible_bias_recurrence, u_t,
                tf.zeros([1, n_visible], tf.float32)), [size_bt, n_visible])
    BH_t = tf.reshape(
        tf.scan(hidden_bias_recurrence, u_t,
                tf.zeros([1, n_hidden], tf.float32)), [size_bt, n_hidden])

    # Get the free energy cost from each of the RBMs in the batch
    cost = RBM.get_free_energy_cost(x, W, BV_t, BH_t, k=15)
    return x, cost, generate, W, bh, bv, x, lr, Wuh, Wuv, Wvu, Wuu, bu, u0
Example #50
0
def define_ppo_epoch(memory, policy_factory, config):
    observation, reward, done, action, old_pdf, value = memory

    # This is to avoid propagating gradients through simulated environment.
    observation = tf.stop_gradient(observation)
    action = tf.stop_gradient(action)
    reward = tf.stop_gradient(reward)
    if hasattr(config, "rewards_preprocessing_fun"):
        reward = config.rewards_preprocessing_fun(reward)
    done = tf.stop_gradient(done)
    value = tf.stop_gradient(value)
    old_pdf = tf.stop_gradient(old_pdf)

    advantage = calculate_generalized_advantage_estimator(
        reward, value, done, config.gae_gamma, config.gae_lambda)

    discounted_reward = tf.stop_gradient(advantage + value)

    advantage_mean, advantage_variance = tf.nn.moments(advantage,
                                                       axes=[0, 1],
                                                       keep_dims=True)
    advantage_normalized = tf.stop_gradient(
        (advantage - advantage_mean) / (tf.sqrt(advantage_variance) + 1e-8))

    add_lists_elementwise = lambda l1, l2: [x + y for x, y in zip(l1, l2)]

    number_of_batches = (config.epoch_length * config.optimization_epochs /
                         config.optimization_batch_size)

    dataset = tf.data.Dataset.from_tensor_slices(
        (observation, action, discounted_reward, advantage_normalized,
         old_pdf))
    dataset = dataset.shuffle(buffer_size=config.epoch_length,
                              reshuffle_each_iteration=True)
    dataset = dataset.repeat(config.optimization_epochs)
    dataset = dataset.batch(config.optimization_batch_size)
    iterator = dataset.make_initializable_iterator()
    optimizer = get_optimiser(config)

    with tf.control_dependencies([iterator.initializer]):
        ppo_step_rets = tf.scan(
            lambda a, i: add_lists_elementwise(  # pylint: disable=g-long-lambda
                a,
                define_ppo_step(iterator.get_next(), policy_factory, optimizer,
                                config)),
            tf.range(number_of_batches),
            [0., 0., 0., 0., 0., 0.],
            parallel_iterations=1)

    ppo_summaries = [
        tf.reduce_mean(ret) / number_of_batches for ret in ppo_step_rets
    ]
    summaries_names = [
        "policy_loss", "value_loss", "entropy_loss", "policy_gradient",
        "value_gradient", "entropy_gradient"
    ]

    summaries = [
        tf.summary.scalar(summary_name, summary)
        for summary_name, summary in zip(summaries_names, ppo_summaries)
    ]
    losses_summary = tf.summary.merge(summaries)

    for summary_name, summary in zip(summaries_names, ppo_summaries):
        losses_summary = tf.Print(losses_summary, [summary],
                                  summary_name + ": ")

    return losses_summary
Example #51
0
    def _build_precision_matrix(self):
        # get inverse of data-dependent covariances
        self.c_psi_inv = tf.matmul(self.r_psi_sqrt,
                                   tf.transpose(self.r_psi_sqrt,
                                                perm=[0, 1, 3, 2]),
                                   name='precision_diag_data_dep')

        if self.dim_latent > 1:
            self.AQ0_invA_Q_inv = tf.matmul(
                tf.matmul(self.A, self.Q0_inv), self.A, transpose_b=True) \
                + self.Q_inv
            self.AQ_invA_Q_inv = tf.matmul(
                tf.matmul(self.A, self.Q_inv), self.A, transpose_b=True) \
                + self.Q_inv
            self.AQ0_inv = tf.matmul(-self.A, self.Q0_inv)
            self.AQ_inv = tf.matmul(-self.A, self.Q_inv)
        else:
            self.AQ0_invA_Q_inv = tf.multiply(tf.multiply(self.A, self.Q0_inv),
                                              self.A) + self.Q_inv
            self.AQ_invA_Q_inv = tf.multiply(tf.multiply(self.A, self.Q_inv),
                                             self.A) + self.Q_inv
            self.AQ0_inv = tf.multiply(-self.A, self.Q0_inv)
            self.AQ_inv = tf.multiply(-self.A, self.Q_inv)

        # put together components of precision matrix Sinv in tensor of
        # shape [batch_size, num_time_pts, dim_latent, dim_latent]
        Sinv_diag = tf.tile(tf.expand_dims(self.AQ_invA_Q_inv, 0),
                            [self.num_time_pts - 2, 1, 1])
        Sinv_diag = tf.concat([
            tf.expand_dims(self.Q0_inv, 0),
            tf.expand_dims(self.AQ0_invA_Q_inv, 0), Sinv_diag
        ],
                              axis=0,
                              name='precision_diag_static')
        self.Sinv_diag = tf.add(Sinv_diag,
                                self.c_psi_inv,
                                name='precision_diag')

        Sinv_ldiag = tf.tile(tf.expand_dims(self.AQ_inv, 0),
                             [self.num_time_pts - 2, 1, 1],
                             name='precision_lower_diag')
        Sinv_ldiag0 = tf.concat([tf.expand_dims(self.AQ0_inv, 0), Sinv_ldiag],
                                axis=0)

        # we now have Sinv (represented as diagonal and off-diagonal
        # blocks); to sample from the posterior we need the square root
        # of the inverse of Sinv; fortunately this is fast given the
        # tridiagonal block structure of Sinv. First we'll compute the
        # Cholesky decomposition of Sinv, then calculate the inverse using
        # that decomposition

        # get cholesky decomposition for each element in batch
        def scan_chol(_, inputs):
            """inputs refer to diagonal blocks, outputs the L/U matrices"""
            chol_decomp_Sinv = blk_tridiag_chol(inputs, Sinv_ldiag0)
            return chol_decomp_Sinv

        self.chol_decomp_Sinv = tf.scan(
            fn=scan_chol,
            elems=self.Sinv_diag,
            initializer=[Sinv_diag, Sinv_ldiag0],  # throwaway to get scan
            name='precision_chol_decomp')  # to behave
Example #52
0
    def build(self, We, Wx, Wh, bh, h0, Wo, bo):
        # make them tf Variables
        self.We = tf.Variable(We)
        self.Wx = tf.Variable(Wx)
        self.Wh = tf.Variable(Wh)
        self.bh = tf.Variable(bh)
        self.h0 = tf.Variable(h0)
        self.Wo = tf.Variable(Wo)
        self.bo = tf.Variable(bo)
        self.params = [
            self.We, self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo
        ]

        # for easy access
        V = self.V
        D = self.D
        M = self.M

        # placeholders
        self.tfX = tf.placeholder(tf.int32, shape=(None, ), name='X')
        self.tfY = tf.placeholder(tf.int32, shape=(None, ), name='Y')

        # convert word indexes to word vectors
        # this would be equivalent to doing
        # We[tfX] in Numpy / Theano
        # or:
        # X_one_hot = one_hot_encode(X)
        # X_one_hot.dot(We)
        XW = tf.nn.embedding_lookup(We, self.tfX)

        def recurrence(h_t1, xWe_t):
            # returns h(t), y(t)
            h_t1 = tf.reshape(h_t1, (1, M))
            h_t = self.f(xWe_t + tf.matmul(h_t1, self.Wh) + self.bh)
            h_t = tf.reshape(h_t, (M, ))
            return h_t

        h = tf.scan(
            fn=recurrence,
            elems=XW,
            initializer=self.h0,
        )

        # output
        logits = tf.matmul(h, self.Wo) + self.bo
        prediction = tf.argmax(logits, 1)
        self.output_probs = tf.nn.softmax(logits)

        nce_weights = tf.transpose(self.Wo, [1, 0])  # needs to be VxD, not DxV
        nce_biases = self.bo

        h = tf.reshape(h, (-1, M))
        labels = tf.reshape(self.tfY, (-1, 1))

        self.cost = tf.reduce_mean(
            tf.nn.sampled_softmax_loss(
                weights=nce_weights,
                biases=nce_biases,
                labels=labels,
                inputs=h,
                num_sampled=50,  # number of negative samples
                num_classes=V))

        self.predict_op = prediction
        self.train_op = tf.train.AdamOptimizer(1e-2).minimize(self.cost)
        # self.train_op = tf.train.MomentumOptimizer(1e-3, 0.9).minimize(self.cost)

        # init all variables
        init = tf.global_variables_initializer()
        self.session.run(init)
Example #53
0
def forward(observations, transitions, viterbi=False,
            return_alpha=False, return_best_sequence=False):
    """
    Takes as input:
        - observations, sequence of shape (n_steps, n_classes)
        - transitions, sequence of shape (n_classes, n_classes)
    Probabilities must be given in the log space.
    Compute alpha, matrix of size (n_steps, n_classes), such that
    alpha[i, j] represents one of these 2 values:
        - the probability that the real path at node i ends in j
        - the maximum probability of a path finishing in j at node i (Viterbi)
    Returns one of these 2 values:
        - alpha
        - the final probability, which can be:
            - the sum of the probabilities of all paths
            - the probability of the best path (Viterbi)
    """
    assert not return_best_sequence or (viterbi and not return_alpha)

    def recurrence(prev, obs):
        previous = prev
        if return_best_sequence:
            previous = prev[0]
        previous = tf.expand_dims(previous, 1)
        obs = tf.expand_dims(obs, 0)
        if viterbi:
            scores = previous + obs + transitions
            out = tf.reduce_max(scores, axis=0)
            if return_best_sequence:
                out2 = tf.argmax(scores, axis=0)
                return [out, out2]
            else:
                return out
        else:
            return log_sum_exp(previous + obs + transitions, axis=0)

    
    initial = observations[0]
    ones = tf.ones(tf.shape(initial), dtype=tf.int64)
    if return_best_sequence:
        initial = [initial, ones]
    alpha = tf.scan(
        fn=recurrence,
        elems=observations[1:],
        initializer=initial
    )
    if return_alpha:
        return alpha
    elif return_best_sequence:
        output_info = tf.cast(tf.argmax(alpha[0][-1], axis=0), tf.int32)
        sequence = tf.scan(
            fn=lambda previous, beta_i: beta_i[previous], 
            elems=tf.cast(alpha[1][::-1], tf.int32),
            initializer=output_info
        )
        sequence = tf.concat([sequence[::-1], [tf.cast(tf.argmax(alpha[0][-1], axis=0), tf.int32)]], axis=0)
        return sequence    
    else:
        if viterbi:
            return tf.reduce_max(alpha[-1], axis=0)
        else:
            return log_sum_exp(alpha[-1], axis=0)
Example #54
0
def integrate_steps(
    model: models.TimeStepModel,
    state: KeyedTensors,
    steps: ArrayLike,
    initial_time: float = 0.0,
    axis: int = 0,
    xla_compile: bool = False,
) -> KeyedTensors:
    """Integrate some fixed number of time steps.

  Args:
    model: model to integrate.
    state: starting value of the state.
    steps: number of time steps at which the solution is saved.
    initial_time: initial time for time integration.
    axis: axis in result tensors along which the integrated solution is
      stacked.
    xla_compile: whether to compile with XLA or not.

  Returns:
    Time evolved states at the times specified in `times`. Each tensor has the
    same shape as the inputs, with an additional dimension inserted to store
    values at each requested time.
  """
    # TODO(shoyer): explicitly include time?
    del initial_time  # unused

    state = nest.map_structure(tf.convert_to_tensor, state)
    steps = tf.convert_to_tensor(steps, dtype=tf.int32)
    constant_state = {
        k: v
        for k, v in state.items() if k in model.equation.constant_keys
    }
    evolving_state = {
        k: v
        for k, v in state.items() if k in model.equation.evolving_keys
    }

    def advance_one_step(state):
        return model.take_time_step({**state, **constant_state})

    def advance_until_saved_step(evolving_state, start_stop):
        """Integrate until the next step at which to save results."""
        start, stop = start_stop
        result, _ = tf.while_loop(
            lambda _, i: i < stop,
            lambda state, i: (advance_one_step(state), i + 1),
            loop_vars=(evolving_state, start),
        )
        return result

    if xla_compile:
        advance_until_saved_step = _xla_decorator(advance_until_saved_step)

    starts = tf.concat([[0], steps[:-1]], axis=0)
    integrated = tf.scan(advance_until_saved_step, [starts, steps],
                         initializer=evolving_state)

    integrated_constants = nest.map_structure(
        lambda x: tf.broadcast_to(x,
                                  steps.shape.as_list() + x.shape.as_list()),
        constant_state)
    integrated.update(integrated_constants)

    return tensor_ops.moveaxis(integrated, 0, axis)
    def prediction_pmstrnn(self):
        print('=' * 100)
        print('@ prediction _pmstrnn')

        if self._isThisTrain:
            # transpose inputs for scan and make tuple
            v_t = tf.transpose(self._v_in, perm=[1, 0, 2, 3])
            m_t = tf.transpose(self._prop_in, perm=[1, 0, 2, 3])
            input_t = (m_t, v_t)
        else:
            # transpose inputs for scan and make tuple
            v_t = tf.transpose(self._windInput_vision, perm=[1, 0, 2, 3])
            m_t = tf.transpose(self._windInput_prop, perm=[1, 0, 2, 3])
            input_t = (m_t, v_t)
            #dyn_input_shape = tf.shape(self._windInput_vision)
            #batch_size = dyn_input_shape[0]

        # make the initializer for the scan function
        # Using the embedding_lookup, it reads the corresponding initial states from the variables
        # h: internal states, y: activation value

        # Prop. Fast
        myInit_p1_h = tf.nn.embedding_lookup(self._myInit_p1_h, self._idxd)
        myInit_p1_h = tf.reshape(myInit_p1_h, [-1, self._p1_unit])
        myInit_p1_y = tf.nn.embedding_lookup(self._myInit_p1_y, self._idxd)
        myInit_p1_y = tf.reshape(myInit_p1_y, [-1, self._p1_unit])
        new_c_p1 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p1_y, myInit_p1_h)
        # Prop. Mid
        myInit_p2_h = tf.nn.embedding_lookup(self._myInit_p2_h, self._idxd)
        myInit_p2_h = tf.reshape(myInit_p2_h, [-1, self._p2_unit])
        myInit_p2_y = tf.nn.embedding_lookup(self._myInit_p2_y, self._idxd)
        myInit_p2_y = tf.reshape(myInit_p2_y, [-1, self._p2_unit])
        new_c_p2 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p2_y, myInit_p2_h)
        # Prop. Slow
        myInit_p3_h = tf.nn.embedding_lookup(self._myInit_p3_h, self._idxd)
        myInit_p3_y = tf.nn.embedding_lookup(self._myInit_p3_y, self._idxd)
        new_c_p3 = tf.nn.rnn_cell.LSTMStateTuple(myInit_p3_y, myInit_p3_h)

        # Vision Fast
        myInit_v1_h = tf.nn.embedding_lookup(self._myInit_v1_h, self._idxd)
        myInit_v1_y = tf.nn.embedding_lookup(self._myInit_v1_y, self._idxd)
        new_c_v1 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v1_y, myInit_v1_h)
        # Vision Mid
        myInit_v2_h = tf.nn.embedding_lookup(self._myInit_v2_h, self._idxd)
        myInit_v2_y = tf.nn.embedding_lookup(self._myInit_v2_y, self._idxd)
        new_c_v2 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v2_y, myInit_v2_h)
        # Vision Slow
        myInit_v3_h = tf.nn.embedding_lookup(self._myInit_v3_h, self._idxd)
        myInit_v3_y = tf.nn.embedding_lookup(self._myInit_v3_y, self._idxd)
        new_c_v3 = tf.nn.rnn_cell.LSTMStateTuple(myInit_v3_y, myInit_v3_h)

        if self._isThisTrain:
            init_state = (self._prop_init, new_c_p1, new_c_p2, new_c_p3,
                          self._v_init, new_c_v1, new_c_v2, new_c_v3)
        else:
            init_state = (self._windInit_prop, new_c_p1, new_c_p2, new_c_p3,
                          self._windInit_vision, new_c_v1, new_c_v2, new_c_v3)

        scan_outputs = tf.scan(lambda a, x: self.model_step_pmstrnn(x, a),
                               input_t,
                               initializer=init_state)

        pred_prop_t, c_p1_t, c_p2_t, c_p3_t, \
        pred_vision_t, c_v1_t, c_v2_t, c_v3_t = scan_outputs

        pred_prop = tf.transpose(pred_prop_t,
                                 perm=[1, 0, 2, 3],
                                 name='pred_prop')
        c_p1 = tf.transpose(c_p1_t, perm=[2, 1, 0, 3], name='states_propFast')
        c_p2 = tf.transpose(c_p2_t, perm=[2, 1, 0, 3], name='states_propMid')
        c_p3 = tf.transpose(c_p3_t,
                            perm=[2, 1, 0, 3, 4, 5],
                            name='states_propSlow')

        pred_vision = tf.transpose(pred_vision_t,
                                   perm=[1, 0, 2, 3],
                                   name='pred_vision')
        c_v1 = tf.transpose(c_v1_t,
                            perm=[2, 1, 0, 3, 4, 5],
                            name='states_visionFast')
        c_v2 = tf.transpose(c_v2_t,
                            perm=[2, 1, 0, 3, 4, 5],
                            name='states_visionMid')
        c_v3 = tf.transpose(c_v3_t,
                            perm=[2, 1, 0, 3, 4, 5],
                            name='states_visionSlow')

        return pred_prop, c_p1, c_p2, c_p3, pred_vision, c_v1, c_v2, c_v3, input_t, init_state, self._idxd
                         name='inputs')
y = tf.placeholder(tf.float32, shape=[None, NUM_CLASSES], name='labels')

# Training data set in required batch size
batch_x, batch_y = mnist.train.next_batch(BATCH_SIZE)
# Reshape data to get 28 sequences of 28 pixels
batch_x = batch_x.reshape((BATCH_SIZE, TIME_STEPS, INPUT_SIZE))

# Transpose the input data, tensorflow scan interates on the first dimension of the input data.
# Initialize hidden states of RNN
processed_input = tf.transpose(_inputs, perm=[1, 0, 2])
initial_hidden = tf.zeros([BATCH_SIZE, HIDDEN_LAYER_SIZE])

# Compute states for all rnn steps
all_hidden_states = tf.scan(rnn.rnn,
                            processed_input,
                            initializer=initial_hidden,
                            name='states')

# Compute linear layer
all_outputs = tf.map_fn(rnn.linear_layer, all_hidden_states)
# We need only the final layer output
output = all_outputs[-1]

# Compute loss (Here we are doing cross_entropy)
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=y))
# Training using Adam optimizer
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)
# Compute prediction
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(output, 1))
# Compute accuracy
Example #57
0
    def scan(cls, node, input_dict, strict):
        current_opset = [make_opsetid(cls.DOMAIN, cls.VERSION)]

        body = node.attrs["body"]

        # in version 8, node.inputs[0] is the sequence_lens
        node_inputs = node.inputs if cls.SINCE_VERSION != 8 else \
            node.inputs[1:]
        # M
        num_scan_inputs = int(node.attrs["num_scan_inputs"])
        # N = num_inputs - M
        num_state_vars = len(node_inputs) - num_scan_inputs
        # K = num_outputs - N
        num_scan_outputs = len(node.outputs) - num_state_vars
        """
            Function to run subgraph used with tf.scan
        """
        def run_subgraph(a, b):
            input_values = {}
            # set the input values for the subgraph
            # set the values for the state variables
            for i in range(num_state_vars):
                input_values[body.input[i].name] = a[i]
            # set the values for the scan inputs
            for i in range(num_scan_inputs):
                input_values[body.input[i + num_state_vars].name] = b[i]

            # get the tensor operations for the onnx graph
            tensor_dict = \
                onnx_tf.backend.onnx_graph_to_tensorflow_ops(
                    graph_def=body,
                    input_values=input_values,
                    opset=current_opset,
                    strict=strict)
            # return sequence of tensors for every subgraph output
            outputs = [tensor_dict[output.name] for output in body.output]
            return outputs

        scan_input_axes = node.attrs.get("scan_input_axes",
                                         [0] * num_scan_inputs)
        scan_input_directions = node.attrs.get(
            "directions" if cls.SINCE_VERSION == 8 else
            "scan_input_directions", [0] * num_scan_inputs)
        scan_output_axes = node.attrs.get("scan_output_axes",
                                          [0] * num_scan_outputs)
        scan_output_directions = node.attrs.get("scan_output_directions",
                                                [0] * num_scan_outputs)

        # if version 8 read the sequnce_lens from the first input
        if cls.SINCE_VERSION == 8:
            sequence_lens = input_dict[node.inputs[0]] \
                            if node.inputs[0] != '' else None

        inputs = [input_dict[node_input] for node_input in node_inputs]

        scan_inputs = inputs[num_state_vars:]
        # loop over all the scan inputs and apply transpose depending
        # on input axes provided and also reverse the scan inputs if
        # reverse direction for scan is provided
        for i in range(num_scan_inputs):
            # if input axes are different than 0, use transpose to scan over
            # the provided axes
            if scan_input_axes[i] != 0:
                transpose_perm = cls._calc_transpose_perm_input(
                    tf.rank(scan_inputs[i]), scan_input_axes[i])
                scan_inputs[i] = tf.transpose(scan_inputs[i], transpose_perm)

            # check for reverse direction scans
            if scan_input_directions[i] == 1:
                # version 8 has a batch dimension
                axis = 0 if cls.SINCE_VERSION != 8 else 1
                scan_inputs[i] = tf.reverse(scan_inputs[i], [axis])

        state_vars_init = inputs[:num_state_vars]

        scan_outputs_init = []
        # generate sequence of zero tensors for all scan outputs
        # with the correct shape and dtype
        for scan_output in body.output[num_state_vars:]:
            tensor_type = scan_output.type.tensor_type
            shape = [
                d.dim_value if
                (d.dim_value > 0 and d.dim_param == "") else None
                for d in tensor_type.shape.dim
            ]
            dtype = data_type.onnx2tf(tensor_type.elem_type)
            scan_outputs_init.append(tf.zeros(shape, dtype=dtype))

        # tf.scan initilizer is state_variables_init + scan_outputs_init
        initializer = state_vars_init + scan_outputs_init

        if cls.SINCE_VERSION == 8:
            # version == 8
            # function to process the batches. it is used with tf.map_fn
            def run_batches(x):
                # state vars initial values per batch
                initial = x[0]
                # scan inputs per batch
                scan_inputs = x[1]
                # sequence length for the batch
                seq_len = x[2]

                # slice the input to the current sequence len
                scan_inputs = [
                    scan_input[:seq_len, ...] for scan_input in scan_inputs
                ]

                # run scan on the current batch
                out = tf.scan(run_subgraph,
                              scan_inputs,
                              initializer=initial + scan_outputs_init)

                # pad to the original shape with zeros
                paddings = [[
                    0,
                    tf.shape(x[1][0], out_type=seq_len.dtype)[0] - seq_len
                ]]
                for i in range(len(out)):
                    pads = tf.concat([
                        paddings,
                        tf.zeros([(tf.rank(out[i]) - 1), 2], dtype=tf.int32)
                    ],
                                     axis=0)
                    out[i] = tf.pad(out[i], pads)
                return out

            if sequence_lens is None:
                # if sequence_lens is None, fill it with the shape of
                # the input axis 1
                sequence_lens = tf.fill([tf.shape(scan_inputs[0])[0]],
                                        tf.shape(scan_inputs[0],
                                                 out_type=tf.int32)[1])

            output_types = [
                data_type.onnx2tf(output.type.tensor_type.elem_type)
                for output in body.output
            ]
            # run scan for every batch
            out = tf.map_fn(run_batches,
                            (state_vars_init, scan_inputs, sequence_lens),
                            dtype=output_types)

            state_vars_outputs = []
            # extract the final values of the state variables
            for state_var in out[:num_state_vars]:
                state_vars_outputs.append(
                    tf.map_fn(lambda x: x[0][x[1] - 1],
                              (state_var, sequence_lens), state_var.dtype))
        else:
            # version > 8
            # run the scan
            out = tf.scan(run_subgraph, scan_inputs, initializer=initializer)

            # extract the final values of the state variables
            state_vars_outputs = [
                state_var[tf.shape(state_var)[0] - 1]
                for state_var in out[:num_state_vars]
            ]

        scan_outputs = out[num_state_vars:]

        # post process the scan outputs depending on the directions and
        # axes provided.
        for i in range(num_scan_outputs):
            # check for reverse direction scan outputs
            if scan_output_directions[i] == 1:
                scan_outputs[i] = tf.reverse(scan_outputs[i], [0])

            if scan_output_axes[i] != 0:
                transpose_perm = cls._calc_transpose_perm_output(
                    tf.rank(scan_outputs[i]), scan_output_axes[i])
                scan_outputs[i] = tf.transpose(scan_outputs[i], transpose_perm)

        return state_vars_outputs + scan_outputs
def tf_discount_rewards(tf_r):  # tf_r ~ [game_steps,1]
    discount_f = lambda a, v: a * gamma + v
    tf_r_reverse = tf.scan(discount_f, tf.reverse(tf_r, [True, False]))
    tf_discounted_r = tf.reverse(tf_r_reverse, [True, False])
    return tf_discounted_r
Example #59
0
 def noniso_KLD(self, mu, log_sigma_sq):
     return 0.5 * ( tf.trace( tf.scan(lambda a, x: tf.matmul(tf.matrix_inverse(self.tf_cov_prior), x), tf.matrix_diag(tf.exp(log_sigma_sq)) ) ) 
                   + tf.reduce_sum( tf.multiply( tf.matmul( tf.subtract(self.tf_mu_prior, mu), tf.matrix_inverse(self.tf_cov_prior) ), tf.subtract(self.tf_mu_prior, mu) ), 1)
                   - float(self.cov_prior.shape[0]) + np.log(np.linalg.det(self.cov_prior)) - tf.reduce_sum(log_sigma_sq, 1) )  
def overshooting(cell,
                 target,
                 embedded,
                 prev_action,
                 length,
                 amount,
                 ignore_input=False):
    """Perform open loop rollouts from the posteriors at every step.

  First, we apply the encoder to embed raw inputs and apply the model to obtain
  posterior states for every time step. Then, we perform `amount` long open
  loop rollouts from these posteriors.

  Note that the actions should be those leading to the current time step. So
  under common convention, it contains the last actions while observations are
  the current ones.

  Input:

    target, embedded:
      [A B C D E F] [A B C D E  ]

    prev_action:
      [0 A B C D E] [0 A B C D  ]

    length:
      [6 5]

    amount:
      3

  Output:

    prior, posterior, target:
      [A B C D E F] [A B C D E  ]
      [B C D E F  ] [B C D E    ]
      [C D E F    ] [C D E      ]
      [D E F      ] [D E        ]

    mask:
      [1 1 1 1 1 1] [1 1 1 1 1 0]
      [1 1 1 1 1 0] [1 1 1 1 0 0]
      [1 1 1 1 0 0] [1 1 1 0 0 0]
      [1 1 1 0 0 0] [1 1 0 0 0 0]

  """
    # Closed loop unroll to get posterior states, which are the starting points
    # for open loop unrolls. We don't need the last time step, since we have no
    # targets for unrolls from it.
    use_obs = tf.ones(
        tf.shape(nested.flatten(embedded)[0][:, :, :1])[:3], tf.bool)
    use_obs = tf.cond(tf.convert_to_tensor(ignore_input),
                      lambda: tf.zeros_like(use_obs, tf.bool), lambda: use_obs)
    print(cell._layers)
    (prior, posterior), _ = tf.nn.dynamic_rnn(cell,
                                              (embedded, prev_action, use_obs),
                                              length,
                                              dtype=tf.float32,
                                              swap_memory=True)
    #inputs = tf.concat([embedded, prev_action, tf.cast(use_obs, tf.float32)], axis=-1)
    #(prior, posterior), _ = keras.layers.RNN(cell)(inputs, )
    #(prior, posterior), = cell((embedded, prev_action, use_obs),

    # Arrange inputs for every iteration in the open loop unroll. Every loop
    # iteration below corresponds to one row in the docstring illustration.
    max_length = shape.shape(nested.flatten(embedded)[0])[1]
    first_output = {
        'observ': embedded,
        'prev_action': prev_action,
        'posterior': posterior,
        'target': target,
        'mask': tf.sequence_mask(length, max_length, tf.int32),
    }
    progress_fn = lambda tensor: tf.concat([tensor[:, 1:], 0 * tensor[:, :1]],
                                           1)
    other_outputs = tf.scan(
        lambda past_output, _: nested.map(progress_fn, past_output),
        tf.range(amount), first_output)
    sequences = nested.map(lambda lhs, rhs: tf.concat([lhs[None], rhs], 0),
                           first_output, other_outputs)

    # Merge batch and time dimensions of steps to compute unrolls from every
    # time step as one batch. The time dimension becomes the number of
    # overshooting distances.
    sequences = nested.map(lambda tensor: _merge_dims(tensor, [1, 2]),
                           sequences)
    sequences = nested.map(
        lambda tensor: tf.transpose(tensor, [1, 0] + list(
            range(2, tensor.shape.ndims))), sequences)
    merged_length = tf.reduce_sum(sequences['mask'], 1)

    # Mask out padding frames; unnecessary if the input is already masked.
    sequences = nested.map(
        lambda tensor: tensor * tf.cast(
            _pad_dims(sequences['mask'], tensor.shape.ndims), tensor.dtype),
        sequences)

    # Compute open loop rollouts.
    use_obs = tf.zeros(tf.shape(sequences['mask']), tf.bool)[..., None]
    prev_state = nested.map(
        lambda tensor: tf.concat([0 * tensor[:, :1], tensor[:, :-1]], 1),
        posterior)
    prev_state = nested.map(lambda tensor: _merge_dims(tensor, [0, 1]),
                            prev_state)
    (priors, _), _ = tf.nn.dynamic_rnn(
        cell, (sequences['observ'], sequences['prev_action'], use_obs),
        merged_length, prev_state)

    # Restore batch dimension.
    target, prior, posterior, mask = nested.map(
        functools.partial(_restore_batch_dim,
                          batch_size=shape.shape(length)[0]),
        (sequences['target'], priors, sequences['posterior'],
         sequences['mask']))

    mask = tf.cast(mask, tf.bool)
    return target, prior, posterior, mask