def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.X = tf.placeholder(tf.float32, (None, None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE)) cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE)) self.rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1) feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract( feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True)) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.LEARNING_RATE).minimize(self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def _read_weights(self, inputs, memory, prev_read_weights, link): """Calculates read weights for each read head. The read weights are a combination of following the link graphs in the forward or backward directions from the previous read position, and doing content-based lookup. The interpolation between these different modes is done by `inputs['read_mode']`. Args: inputs: Controls for this access module. This contains the content-based keys to lookup, and the weightings for the different read modes. memory: A tensor of shape `[batch_size, memory_size, word_size]` containing the current memory contents to do content-based lookup. prev_read_weights: A tensor of shape `[batch_size, num_reads, memory_size]` containing the previous read locations. link: A tensor of shape `[batch_size, num_writes, memory_size, memory_size]` containing the temporal write transition graphs. Returns: A tensor of shape `[batch_size, num_reads, memory_size]` containing the read weights for each read head. """ with tf.name_scope( 'read_weights', values=[inputs, memory, prev_read_weights, link]): # c_t^{r, i} - The content weightings for each read head. content_weights = self._read_content_weights_mod( memory, inputs['read_content_keys'], inputs['read_content_strengths']) # Calculates f_t^i and b_t^i. forward_weights = self._linkage.directional_read_weights( link, prev_read_weights, forward=True) backward_weights = self._linkage.directional_read_weights( link, prev_read_weights, forward=False) backward_mode = inputs['read_mode'][:, :, :self._num_writes] forward_mode = ( inputs['read_mode'][:, :, self._num_writes:2 * self._num_writes]) content_mode = inputs['read_mode'][:, :, 2 * self._num_writes] read_weights = ( tf.expand_dims(content_mode, 2) * content_weights + tf.reduce_sum( tf.expand_dims(forward_mode, 3) * forward_weights, 2) + tf.reduce_sum(tf.expand_dims(backward_mode, 3) * backward_weights, 2)) return read_weights
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias = 0.1, lambda_coeff = 0.5 ): def lstm_cell(size_layer): return tf.nn.rnn_cell.GRUCell(size_layer) rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop = tf.contrib.rnn.DropoutWrapper( rnn_cells, output_keep_prob = forget_bias ) self.hidden_layer = tf.placeholder( tf.float32, (None, num_layers * size_layer) ) _, last_state = tf.nn.dynamic_rnn( drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32 ) self.z_mean = tf.layers.dense(last_state, size) self.z_log_sigma = tf.layers.dense(last_state, size) epsilon = tf.random_normal(tf.shape(self.z_log_sigma)) self.z_vector = self.z_mean + tf.exp(self.z_log_sigma) with tf.variable_scope('decoder', reuse = False): rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False ) drop_dec = tf.contrib.rnn.DropoutWrapper( rnn_cells_dec, output_keep_prob = forget_bias ) x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X], axis = 1) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop_dec, self.X, initial_state = last_state, dtype = tf.float32 ) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.lambda_coeff = lambda_coeff self.kl_loss = -0.5 * tf.reduce_sum(1.0 + 2 * self.z_log_sigma - self.z_mean ** 2 - tf.exp(2 * self.z_log_sigma), 1) self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits) + self.kl_loss) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost )
def __init__(self, input_size, output_size, layer_size, learning_rate): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) feed_forward = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu) self.logits = tf.layers.dense(feed_forward, output_size) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost)
def __init__(self, input_size, output_size, layer_size, learning_rate): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) feed = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu) tensor_action, tensor_validation = tf.split(feed, 2, 1) feed_action = tf.layers.dense(tensor_action, output_size) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract( feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True)) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost)
def __init__(self, input_size, output_size, layer_size, learning_rate, name): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size)) self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(self.rnn[:,-1], output_size) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.X = tf.placeholder(tf.float32, (None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, self.state_size)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1,1)) state_action = tf.concat([self.X, action], axis=1) save_state = tf.identity(self.Y) feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu) self.curiosity_logits = tf.layers.dense(feed, self.state_size) self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE) .minimize(tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): x_action = tf.layers.dense(self.X, 128, tf.nn.relu) self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE) with tf.variable_scope("target_net"): y_action = tf.layers.dense(self.Y, 128, tf.nn.relu) y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def _precedence_weights(self, prev_precedence_weights, write_weights): """Calculates the new precedence weights given the current write weights. The precedence weights are the "aggregated write weights" for each write head, where write weights with sum close to zero will leave the precedence weights unchanged, but with sum close to one will replace the precedence weights. Args: prev_precedence_weights: A tensor of shape `[batch_size, num_writes, memory_size]` containing the previous precedence weights. write_weights: A tensor of shape `[batch_size, num_writes, memory_size]` containing the new write weights. Returns: A tensor of shape `[batch_size, num_writes, memory_size]` containing the new precedence weights. """ with tf.name_scope('precedence_weights'): write_sum = tf.reduce_sum(write_weights, 2, keep_dims=True) return (1 - write_sum) * prev_precedence_weights + write_weights
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.X = tf.placeholder(tf.float32, (None, None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, None, self.state_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] self.seq_len = tf.shape(self.X)[1] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1, 1, 1)) repeat_action = tf.tile(action, [1, self.seq_len, 1]) state_action = tf.concat([self.X, repeat_action], axis=-1) save_state = tf.identity(self.Y) cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) self.rnn, last_state = tf.nn.dynamic_rnn( inputs=state_action, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.curiosity_logits = tf.layers.dense(self.rnn[:, -1], self.state_size) self.curiosity_cost = tf.reduce_sum( tf.square(save_state[:, -1] - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) with tf.variable_scope("target_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, last_state = tf.nn.dynamic_rnn( inputs=self.Y, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack( [tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def _vector_norms(m): squared_norms = tf.reduce_sum(m * m, axis=2, keep_dims=True) return tf.sqrt(squared_norms + _EPSILON)