def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.INITIAL_FEATURES = np.zeros((4, self.state_size))
     self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE))
     cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False)
     self.hidden_layer = tf.placeholder(tf.float32,
                                        (None, 2 * self.LAYER_SIZE))
     self.rnn, self.last_state = tf.nn.dynamic_rnn(
         inputs=self.X,
         cell=cell,
         dtype=tf.float32,
         initial_state=self.hidden_layer)
     tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1)
     feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=self.LEARNING_RATE).minimize(self.cost)
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Exemple #2
0
  def _read_weights(self, inputs, memory, prev_read_weights, link):
    """Calculates read weights for each read head.

    The read weights are a combination of following the link graphs in the
    forward or backward directions from the previous read position, and doing
    content-based lookup. The interpolation between these different modes is
    done by `inputs['read_mode']`.

    Args:
      inputs: Controls for this access module. This contains the content-based
          keys to lookup, and the weightings for the different read modes.
      memory: A tensor of shape `[batch_size, memory_size, word_size]`
          containing the current memory contents to do content-based lookup.
      prev_read_weights: A tensor of shape `[batch_size, num_reads,
          memory_size]` containing the previous read locations.
      link: A tensor of shape `[batch_size, num_writes, memory_size,
          memory_size]` containing the temporal write transition graphs.

    Returns:
      A tensor of shape `[batch_size, num_reads, memory_size]` containing the
      read weights for each read head.
    """
    with tf.name_scope(
        'read_weights', values=[inputs, memory, prev_read_weights, link]):
      # c_t^{r, i} - The content weightings for each read head.
      content_weights = self._read_content_weights_mod(
          memory, inputs['read_content_keys'], inputs['read_content_strengths'])

      # Calculates f_t^i and b_t^i.
      forward_weights = self._linkage.directional_read_weights(
          link, prev_read_weights, forward=True)
      backward_weights = self._linkage.directional_read_weights(
          link, prev_read_weights, forward=False)

      backward_mode = inputs['read_mode'][:, :, :self._num_writes]
      forward_mode = (
          inputs['read_mode'][:, :, self._num_writes:2 * self._num_writes])
      content_mode = inputs['read_mode'][:, :, 2 * self._num_writes]

      read_weights = (
          tf.expand_dims(content_mode, 2) * content_weights + tf.reduce_sum(
              tf.expand_dims(forward_mode, 3) * forward_weights, 2) +
          tf.reduce_sum(tf.expand_dims(backward_mode, 3) * backward_weights, 2))

      return read_weights
Exemple #3
0
    def __init__(
        self,
        learning_rate,
        num_layers,
        size,
        size_layer,
        output_size,
        forget_bias = 0.1,
        lambda_coeff = 0.5
    ):
        def lstm_cell(size_layer):
            return tf.nn.rnn_cell.GRUCell(size_layer)

        rnn_cells = tf.nn.rnn_cell.MultiRNNCell(
            [lstm_cell(size_layer) for _ in range(num_layers)],
            state_is_tuple = False,
        )
        self.X = tf.placeholder(tf.float32, (None, None, size))
        self.Y = tf.placeholder(tf.float32, (None, output_size))
        drop = tf.contrib.rnn.DropoutWrapper(
            rnn_cells, output_keep_prob = forget_bias
        )
        self.hidden_layer = tf.placeholder(
            tf.float32, (None, num_layers * size_layer)
        )
        _, last_state = tf.nn.dynamic_rnn(
            drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32
        )
        
        self.z_mean = tf.layers.dense(last_state, size)
        self.z_log_sigma = tf.layers.dense(last_state, size)
        
        epsilon = tf.random_normal(tf.shape(self.z_log_sigma))
        self.z_vector = self.z_mean + tf.exp(self.z_log_sigma)
        
        with tf.variable_scope('decoder', reuse = False):
            rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell(
                [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False
            )
            drop_dec = tf.contrib.rnn.DropoutWrapper(
                rnn_cells_dec, output_keep_prob = forget_bias
            )
            x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X], axis = 1)
            self.outputs, self.last_state = tf.nn.dynamic_rnn(
                drop_dec, self.X, initial_state = last_state, dtype = tf.float32
            )
            
        self.logits = tf.layers.dense(self.outputs[-1], output_size)
        self.lambda_coeff = lambda_coeff
        
        self.kl_loss = -0.5 * tf.reduce_sum(1.0 + 2 * self.z_log_sigma - self.z_mean ** 2 - 
                             tf.exp(2 * self.z_log_sigma), 1)
        self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss)
        self.cost = tf.reduce_mean(tf.square(self.Y - self.logits) + self.kl_loss)
        self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
            self.cost
        )
 def __init__(self, input_size, output_size, layer_size, learning_rate):
     self.X = tf.placeholder(tf.float32, (None, input_size))
     self.Y = tf.placeholder(tf.float32, (None, output_size))
     feed_forward = tf.layers.dense(self.X,
                                    layer_size,
                                    activation=tf.nn.relu)
     self.logits = tf.layers.dense(feed_forward, output_size)
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=learning_rate).minimize(self.cost)
Exemple #5
0
 def __init__(self, input_size, output_size, layer_size, learning_rate):
     self.X = tf.placeholder(tf.float32, (None, input_size))
     self.Y = tf.placeholder(tf.float32, (None, output_size))
     feed = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu)
     tensor_action, tensor_validation = tf.split(feed, 2, 1)
     feed_action = tf.layers.dense(tensor_action, output_size)
     feed_validation = tf.layers.dense(tensor_validation, 1)
     self.logits = feed_validation + tf.subtract(
         feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True))
     self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
     self.optimizer = tf.train.AdamOptimizer(
         learning_rate=learning_rate).minimize(self.cost)
 def __init__(self, input_size, output_size, layer_size, learning_rate, name):
     with tf.variable_scope(name):
         self.X = tf.placeholder(tf.float32, (None, None, input_size))
         self.Y = tf.placeholder(tf.float32, (None, output_size))
         cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False)
         self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size))
         self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell,
                                                 dtype=tf.float32,
                                                 initial_state=self.hidden_layer)
         self.logits = tf.layers.dense(self.rnn[:,-1], output_size)
         self.cost = tf.reduce_sum(tf.square(self.Y - self.logits))
         self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
 def __init__(self, state_size, window_size, trend, skip):
     self.state_size = state_size
     self.window_size = window_size
     self.half_window = window_size // 2
     self.trend = trend
     self.skip = skip
     tf.reset_default_graph()
     self.X = tf.placeholder(tf.float32, (None, self.state_size))
     self.Y = tf.placeholder(tf.float32, (None, self.state_size))
     self.ACTION = tf.placeholder(tf.float32, (None))
     self.REWARD = tf.placeholder(tf.float32, (None))
     self.batch_size = tf.shape(self.ACTION)[0]
     
     with tf.variable_scope('curiosity_model'):
         action = tf.reshape(self.ACTION, (-1,1))
         state_action = tf.concat([self.X, action], axis=1)
         save_state = tf.identity(self.Y)
         
         feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu)
         self.curiosity_logits = tf.layers.dense(feed, self.state_size)
         self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1)
         
         self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE)            .minimize(tf.reduce_mean(self.curiosity_cost))
     
     total_reward = tf.add(self.curiosity_cost, self.REWARD)
     
     with tf.variable_scope("q_model"):
         with tf.variable_scope("eval_net"):
             x_action = tf.layers.dense(self.X, 128, tf.nn.relu)
             self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE)
         
         with tf.variable_scope("target_net"):
             y_action = tf.layers.dense(self.Y, 128, tf.nn.relu)
             y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE)
         
         q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
         action = tf.cast(self.ACTION, tf.int32)
         action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
         q = tf.gather_nd(params=self.logits, indices=action_indices)
         self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q)
         self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize(
         self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))
         
     t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net')
     e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net')
     self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)]
     
     self.sess = tf.InteractiveSession()
     self.sess.run(tf.global_variables_initializer())
Exemple #8
0
    def _precedence_weights(self, prev_precedence_weights, write_weights):
        """Calculates the new precedence weights given the current write weights.

    The precedence weights are the "aggregated write weights" for each write
    head, where write weights with sum close to zero will leave the precedence
    weights unchanged, but with sum close to one will replace the precedence
    weights.

    Args:
      prev_precedence_weights: A tensor of shape `[batch_size, num_writes,
          memory_size]` containing the previous precedence weights.
      write_weights: A tensor of shape `[batch_size, num_writes, memory_size]`
          containing the new write weights.

    Returns:
      A tensor of shape `[batch_size, num_writes, memory_size]` containing the
      new precedence weights.
    """
        with tf.name_scope('precedence_weights'):
            write_sum = tf.reduce_sum(write_weights, 2, keep_dims=True)
            return (1 - write_sum) * prev_precedence_weights + write_weights
    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        tf.reset_default_graph()
        self.INITIAL_FEATURES = np.zeros((4, self.state_size))
        self.X = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.Y = tf.placeholder(tf.float32, (None, None, self.state_size))
        self.hidden_layer = tf.placeholder(tf.float32,
                                           (None, 2 * self.LAYER_SIZE))
        self.ACTION = tf.placeholder(tf.float32, (None))
        self.REWARD = tf.placeholder(tf.float32, (None))
        self.batch_size = tf.shape(self.ACTION)[0]
        self.seq_len = tf.shape(self.X)[1]

        with tf.variable_scope('curiosity_model'):
            action = tf.reshape(self.ACTION, (-1, 1, 1))
            repeat_action = tf.tile(action, [1, self.seq_len, 1])
            state_action = tf.concat([self.X, repeat_action], axis=-1)
            save_state = tf.identity(self.Y)
            cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                           state_is_tuple=False)
            self.rnn, last_state = tf.nn.dynamic_rnn(
                inputs=state_action,
                cell=cell,
                dtype=tf.float32,
                initial_state=self.hidden_layer)
            self.curiosity_logits = tf.layers.dense(self.rnn[:, -1],
                                                    self.state_size)
            self.curiosity_cost = tf.reduce_sum(
                tf.square(save_state[:, -1] - self.curiosity_logits), axis=1)

            self.curiosity_optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    tf.reduce_mean(self.curiosity_cost))

        total_reward = tf.add(self.curiosity_cost, self.REWARD)

        with tf.variable_scope("q_model"):
            with tf.variable_scope("eval_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, self.last_state = tf.nn.dynamic_rnn(
                    inputs=self.X,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            with tf.variable_scope("target_net"):
                cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE,
                                               state_is_tuple=False)
                rnn, last_state = tf.nn.dynamic_rnn(
                    inputs=self.Y,
                    cell=cell,
                    dtype=tf.float32,
                    initial_state=self.hidden_layer)
                y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE)

            q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1)
            action = tf.cast(self.ACTION, tf.int32)
            action_indices = tf.stack(
                [tf.range(self.batch_size, dtype=tf.int32), action], axis=1)
            q = tf.gather_nd(params=self.logits, indices=action_indices)
            self.cost = tf.losses.mean_squared_error(labels=q_target,
                                                     predictions=q)
            self.optimizer = tf.train.RMSPropOptimizer(
                self.LEARNING_RATE).minimize(
                    self.cost,
                    var_list=tf.get_collection(
                        tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net"))

        t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/target_net')
        e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope='q_model/eval_net')
        self.target_replace_op = [
            tf.assign(t, e) for t, e in zip(t_params, e_params)
        ]

        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
Exemple #10
0
def _vector_norms(m):
    squared_norms = tf.reduce_sum(m * m, axis=2, keep_dims=True)
    return tf.sqrt(squared_norms + _EPSILON)