def write_allocation_weights(self, usage, write_gates, num_writes): """Calculates freeness-based locations for writing to. This finds unused memory by ranking the memory locations by usage, for each write head. (For more than one write head, we use a "simulated new usage" which takes into account the fact that the previous write head will increase the usage in that area of the memory.) Args: usage: A tensor of shape `[batch_size, memory_size]` representing current memory usage. write_gates: A tensor of shape `[batch_size, num_writes]` with values in the range [0, 1] indicating how much each write head does writing based on the address returned here (and hence how much usage increases). num_writes: The number of write heads to calculate write weights for. Returns: tensor of shape `[batch_size, num_writes, memory_size]` containing the freeness-based write locations. Note that this isn't scaled by `write_gate`; this scaling must be applied externally. """ with tf.name_scope('write_allocation_weights'): # expand gatings over memory locations write_gates = tf.expand_dims(write_gates, -1) allocation_weights = [] for i in range(num_writes): allocation_weights.append(self._allocation(usage)) # update usage to take into account writing to this new allocation usage += ((1 - usage) * write_gates[:, i, :] * allocation_weights[i]) # Pack the allocation weights for the write heads into one tensor. return tf.stack(allocation_weights, axis=1)
def directional_read_weights(self, link, prev_read_weights, forward): """Calculates the forward or the backward read weights. For each read head (at a given address), there are `num_writes` link graphs to follow. Thus this function computes a read address for each of the `num_reads * num_writes` pairs of read and write heads. Args: link: tensor of shape `[batch_size, num_writes, memory_size, memory_size]` representing the link graphs L_t. prev_read_weights: tensor of shape `[batch_size, num_reads, memory_size]` containing the previous read weights w_{t-1}^r. forward: Boolean indicating whether to follow the "future" direction in the link graph (True) or the "past" direction (False). Returns: tensor of shape `[batch_size, num_reads, num_writes, memory_size]` """ with tf.name_scope('directional_read_weights'): # We calculate the forward and backward directions for each pair of # read and write heads; hence we need to tile the read weights and do a # sort of "outer product" to get this. expanded_read_weights = tf.stack([prev_read_weights] * self._num_writes, 1) result = tf.matmul(expanded_read_weights, link, adjoint_b=forward) # Swap dimensions 1, 2 so order is [batch, reads, writes, memory]: return tf.transpose(result, perm=[0, 2, 1, 3])
def batch_invert_permutation(permutations): """Returns batched `tf.invert_permutation` for every row in `permutations`.""" with tf.name_scope('batch_invert_permutation', values=[permutations]): unpacked = tf.unstack(permutations) inverses = [ tf.invert_permutation(permutation) for permutation in unpacked ] return tf.stack(inverses)
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.X = tf.placeholder(tf.float32, (None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, self.state_size)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1,1)) state_action = tf.concat([self.X, action], axis=1) save_state = tf.identity(self.Y) feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu) self.curiosity_logits = tf.layers.dense(feed, self.state_size) self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE) .minimize(tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): x_action = tf.layers.dense(self.X, 128, tf.nn.relu) self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE) with tf.variable_scope("target_net"): y_action = tf.layers.dense(self.Y, 128, tf.nn.relu) y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def batch_gather(values, indices): """Returns batched `tf.gather` for every row in the input.""" with tf.name_scope('batch_gather', values=[values, indices]): unpacked = zip(tf.unstack(values), tf.unstack(indices)) result = [tf.gather(value, index) for value, index in unpacked] return tf.stack(result)
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.X = tf.placeholder(tf.float32, (None, None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, None, self.state_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] self.seq_len = tf.shape(self.X)[1] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1, 1, 1)) repeat_action = tf.tile(action, [1, self.seq_len, 1]) state_action = tf.concat([self.X, repeat_action], axis=-1) save_state = tf.identity(self.Y) cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) self.rnn, last_state = tf.nn.dynamic_rnn( inputs=state_action, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.curiosity_logits = tf.layers.dense(self.rnn[:, -1], self.state_size) self.curiosity_cost = tf.reduce_sum( tf.square(save_state[:, -1] - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) with tf.variable_scope("target_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, last_state = tf.nn.dynamic_rnn( inputs=self.Y, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack( [tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())