def __init__( self, learning_rate, num_layers, size, size_layer, output_size, kernel_size=3, n_attn_heads=16, dropout=0.9, ): self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) encoder_embedded = tf.layers.dense(self.X, size_layer) encoder_embedded += position_encoding(encoder_embedded) e = tf.identity(encoder_embedded) for i in range(num_layers): dilation_rate = 2**i pad_sz = (kernel_size - 1) * dilation_rate with tf.variable_scope('block_%d' % i): encoder_embedded += cnn_block(encoder_embedded, dilation_rate, pad_sz, size_layer, kernel_size) encoder_output, output_memory = encoder_embedded, encoder_embedded + e g = tf.identity(encoder_embedded) for i in range(num_layers): dilation_rate = 2**i pad_sz = (kernel_size - 1) * dilation_rate with tf.variable_scope('decode_%d' % i): attn_res = h = cnn_block(encoder_embedded, dilation_rate, pad_sz, size_layer, kernel_size) C = [] for j in range(n_attn_heads): h_ = tf.layers.dense(h, size_layer // n_attn_heads) g_ = tf.layers.dense(g, size_layer // n_attn_heads) zu_ = tf.layers.dense(encoder_output, size_layer // n_attn_heads) ze_ = tf.layers.dense(output_memory, size_layer // n_attn_heads) d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_ dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1])) a = tf.nn.softmax(dz) c_ = tf.matmul(a, ze_) C.append(c_) c = tf.concat(C, 2) h = tf.layers.dense(attn_res + c, size_layer) h = tf.nn.dropout(h, keep_prob=dropout) encoder_embedded += h encoder_embedded = tf.sigmoid(encoder_embedded[-1]) self.logits = tf.layers.dense(encoder_embedded, output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, name, input_size, output_size, size_layer, learning_rate): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer)) self.REWARD = tf.placeholder(tf.float32, (None, 1)) feed_critic = tf.layers.dense(self.X, size_layer, activation=tf.nn.relu) cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False) self.rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) feed_critic = tf.layers.dense( self.rnn[:, -1], output_size, activation=tf.nn.relu) + self.Y feed_critic = tf.layers.dense(feed_critic, size_layer // 2, activation=tf.nn.relu) self.logits = tf.layers.dense(feed_critic, 1) self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, learning_rate, num_layers, size, size_layer, forget_bias=0.8): def lstm_cell(size_layer): return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False) rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, size)) drop = tf.contrib.rnn.DropoutWrapper(rnn_cells, output_keep_prob=forget_bias) self.hidden_layer = tf.placeholder(tf.float32, (None, num_layers * 2 * size_layer)) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop, self.X, initial_state=self.hidden_layer, dtype=tf.float32) self.logits = tf.layers.dense( self.outputs[-1], size, kernel_initializer=tf.glorot_uniform_initializer()) self.cost = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=self.Y, logits=self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.X = tf.placeholder(tf.float32, (None, None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, self.OUTPUT_SIZE)) cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE)) self.rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) tensor_action, tensor_validation = tf.split(self.rnn[:, -1], 2, 1) feed_action = tf.layers.dense(tensor_action, self.OUTPUT_SIZE) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract( feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True)) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.LEARNING_RATE).minimize(self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip self.X = tf.placeholder(tf.float32, (None, self.state_size)) self.REWARDS = tf.placeholder(tf.float32, (None)) self.ACTIONS = tf.placeholder(tf.int32, (None)) feed_forward = tf.layers.dense(self.X, self.LAYER_SIZE, activation=tf.nn.relu) self.logits = tf.layers.dense(feed_forward, self.OUTPUT_SIZE, activation=tf.nn.softmax) input_y = tf.one_hot(self.ACTIONS, self.OUTPUT_SIZE) loglike = tf.log((input_y * (input_y - self.logits) + (1 - input_y) * (input_y + self.logits)) + 1) rewards = tf.tile(tf.reshape(self.REWARDS, (-1, 1)), [1, self.OUTPUT_SIZE]) self.cost = -tf.reduce_mean(loglike * (rewards + 1)) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.LEARNING_RATE).minimize(self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias = 0.1, ): def lstm_cell(size_layer): return tf.nn.rnn_cell.GRUCell(size_layer) rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop = tf.contrib.rnn.DropoutWrapper( rnn_cells, output_keep_prob = forget_bias ) self.hidden_layer = tf.placeholder( tf.float32, (None, num_layers * size_layer) ) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32 ) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost )
def __init__(self, state_size, window_size, trend, skip, batch_size): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip self.action_size = 3 self.batch_size = batch_size self.memory = deque(maxlen=1000) self.inventory = [] self.gamma = 0.95 self.epsilon = 0.5 self.epsilon_min = 0.01 self.epsilon_decay = 0.999 tf.reset_default_graph() self.sess = tf.InteractiveSession() self.X = tf.placeholder(tf.float32, [None, self.state_size]) self.Y = tf.placeholder(tf.float32, [None, self.action_size]) feed = tf.layers.dense(self.X, 512, activation=tf.nn.relu) tensor_action, tensor_validation = tf.split(feed, 2, 1) feed_action = tf.layers.dense(tensor_action, self.action_size) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract( feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True)) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.GradientDescentOptimizer(1e-5).minimize( self.cost) self.sess.run(tf.global_variables_initializer())
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias = 0.1, lambda_coeff = 0.5 ): def lstm_cell(size_layer): return tf.nn.rnn_cell.GRUCell(size_layer) rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop = tf.contrib.rnn.DropoutWrapper( rnn_cells, output_keep_prob = forget_bias ) self.hidden_layer = tf.placeholder( tf.float32, (None, num_layers * size_layer) ) _, last_state = tf.nn.dynamic_rnn( drop, self.X, initial_state = self.hidden_layer, dtype = tf.float32 ) self.z_mean = tf.layers.dense(last_state, size) self.z_log_sigma = tf.layers.dense(last_state, size) epsilon = tf.random_normal(tf.shape(self.z_log_sigma)) self.z_vector = self.z_mean + tf.exp(self.z_log_sigma) with tf.variable_scope('decoder', reuse = False): rnn_cells_dec = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False ) drop_dec = tf.contrib.rnn.DropoutWrapper( rnn_cells_dec, output_keep_prob = forget_bias ) x = tf.concat([tf.expand_dims(self.z_vector, axis=0), self.X], axis = 1) self.outputs, self.last_state = tf.nn.dynamic_rnn( drop_dec, self.X, initial_state = last_state, dtype = tf.float32 ) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.lambda_coeff = lambda_coeff self.kl_loss = -0.5 * tf.reduce_sum(1.0 + 2 * self.z_log_sigma - self.z_mean ** 2 - tf.exp(2 * self.z_log_sigma), 1) self.kl_loss = tf.scalar_mul(self.lambda_coeff, self.kl_loss) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits) + self.kl_loss) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost )
def __init__(self, input_size, output_size, layer_size, learning_rate): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) feed_forward = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu) self.logits = tf.layers.dense(feed_forward, output_size) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost)
def __init__(self, name, input_size, output_size, size_layer, learning_rate): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) self.REWARD = tf.placeholder(tf.float32, (None, 1)) feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu) feed_critic = tf.layers.dense(feed_critic, output_size, activation = tf.nn.relu) + self.Y feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu) self.logits = tf.layers.dense(feed_critic, 1) self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
def __init__(self, input_size, output_size, layer_size, learning_rate): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) feed = tf.layers.dense(self.X, layer_size, activation=tf.nn.relu) tensor_action, tensor_validation = tf.split(feed, 2, 1) feed_action = tf.layers.dense(tensor_action, output_size) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract( feed_action, tf.reduce_mean(feed_action, axis=1, keep_dims=True)) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(self.cost)
def __init__(self, input_size, output_size, layer_size, learning_rate, name): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) cell = tf.nn.rnn_cell.LSTMCell(layer_size, state_is_tuple = False) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * layer_size)) self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X,cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(self.rnn[:,-1], output_size) self.cost = tf.reduce_sum(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(self.cost)
def __init__(self, name, input_size, output_size, size_layer): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, None, input_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer)) cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple=False) self.rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(self.rnn[:, -1], output_size)
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias=0.1, ): def lstm_cell(size_layer): return tf.nn.rnn_cell.BasicRNNCell(size_layer) with tf.variable_scope('forward', reuse=False): rnn_cells_forward = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False, ) self.X_forward = tf.placeholder(tf.float32, (None, None, size)) drop_forward = tf.contrib.rnn.DropoutWrapper( rnn_cells_forward, output_keep_prob=forget_bias) self.hidden_layer_forward = tf.placeholder( tf.float32, (None, num_layers * size_layer)) self.outputs_forward, self.last_state_forward = tf.nn.dynamic_rnn( drop_forward, self.X_forward, initial_state=self.hidden_layer_forward, dtype=tf.float32, ) with tf.variable_scope('backward', reuse=False): rnn_cells_backward = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple=False, ) self.X_backward = tf.placeholder(tf.float32, (None, None, size)) drop_backward = tf.contrib.rnn.DropoutWrapper( rnn_cells_backward, output_keep_prob=forget_bias) self.hidden_layer_backward = tf.placeholder( tf.float32, (None, num_layers * size_layer)) self.outputs_backward, self.last_state_backward = tf.nn.dynamic_rnn( drop_backward, self.X_backward, initial_state=self.hidden_layer_backward, dtype=tf.float32, ) self.outputs = self.outputs_backward - self.outputs_forward self.Y = tf.placeholder(tf.float32, (None, output_size)) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, name, input_size, output_size, size_layer): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, None, input_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * size_layer)) cell = tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False) self.rnn,self.last_state = tf.nn.dynamic_rnn(inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) tensor_action, tensor_validation = tf.split(self.rnn[:,-1],2,1) feed_action = tf.layers.dense(tensor_action, output_size) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract(feed_action, tf.reduce_mean(feed_action,axis=1,keep_dims=True))
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.X = tf.placeholder(tf.float32, (None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, self.state_size)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1,1)) state_action = tf.concat([self.X, action], axis=1) save_state = tf.identity(self.Y) feed = tf.layers.dense(state_action, 32, activation=tf.nn.relu) self.curiosity_logits = tf.layers.dense(feed, self.state_size) self.curiosity_cost = tf.reduce_sum(tf.square(save_state - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE) .minimize(tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): x_action = tf.layers.dense(self.X, 128, tf.nn.relu) self.logits = tf.layers.dense(x_action, self.OUTPUT_SIZE) with tf.variable_scope("target_net"): y_action = tf.layers.dense(self.Y, 128, tf.nn.relu) y_q = tf.layers.dense(y_action, self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack([tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer(self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [tf.assign(t, e) for t, e in zip(t_params, e_params)] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def __init__(self, name, input_size, output_size, size_layer, learning_rate): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, input_size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) self.REWARD = tf.placeholder(tf.float32, (None, 1)) feed_critic = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu) tensor_action, tensor_validation = tf.split(feed_critic,2,1) feed_action = tf.layers.dense(tensor_action, output_size) feed_validation = tf.layers.dense(tensor_validation, 1) feed_critic = feed_validation + tf.subtract(feed_action,tf.reduce_mean(feed_action,axis=1,keep_dims=True)) feed_critic = tf.nn.relu(feed_critic) + self.Y feed_critic = tf.layers.dense(feed_critic, size_layer//2, activation = tf.nn.relu) self.logits = tf.layers.dense(feed_critic, 1) self.cost = tf.reduce_mean(tf.square(self.REWARD - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize(self.cost)
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, forget_bias = 0.1, ): def lstm_cell(size_layer): return tf.nn.rnn_cell.LSTMCell(size_layer, state_is_tuple = False) backward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) forward_rnn_cells = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell(size_layer) for _ in range(num_layers)], state_is_tuple = False, ) self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) drop_backward = tf.contrib.rnn.DropoutWrapper( backward_rnn_cells, output_keep_prob = forget_bias ) forward_backward = tf.contrib.rnn.DropoutWrapper( forward_rnn_cells, output_keep_prob = forget_bias ) self.backward_hidden_layer = tf.placeholder( tf.float32, shape = (None, num_layers * 2 * size_layer) ) self.forward_hidden_layer = tf.placeholder( tf.float32, shape = (None, num_layers * 2 * size_layer) ) self.outputs, self.last_state = tf.nn.bidirectional_dynamic_rnn( forward_backward, drop_backward, self.X, initial_state_fw = self.forward_hidden_layer, initial_state_bw = self.backward_hidden_layer, dtype = tf.float32, ) self.outputs = tf.concat(self.outputs, 2) self.logits = tf.layers.dense(self.outputs[-1], output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost )
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.skip = skip tf.reset_default_graph() self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE) self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE) self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE) self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE) self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y) self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE]) weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor') self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad) grads = zip(self.grad_actor, weights_actor) self.optimizer = tf.train.AdamOptimizer( self.LEARNING_RATE).apply_gradients(grads) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def __init__(self, name, input_size, output_size, size_layer): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, input_size)) feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu) tensor_action, tensor_validation = tf.split(feed_actor,2,1) feed_action = tf.layers.dense(tensor_action, output_size) feed_validation = tf.layers.dense(tensor_validation, 1) self.logits = feed_validation + tf.subtract(feed_action, tf.reduce_mean(feed_action,axis=1,keep_dims=True))
def __init__(self, size_layer, embedded_size, learning_rate, size, output_size, num_blocks=2, num_heads=8, min_freq=50): self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) encoder_embedded = tf.layers.dense(self.X, embedded_size) encoder_embedded = tf.nn.dropout(encoder_embedded, keep_prob=0.8) x_mean = tf.reduce_mean(self.X, axis=2) en_masks = tf.sign(x_mean) encoder_embedded += sinusoidal_position_encoding( self.X, en_masks, embedded_size) for i in range(num_blocks): with tf.variable_scope('encoder_self_attn_%d' % i, reuse=tf.AUTO_REUSE): encoder_embedded = multihead_attn(queries=encoder_embedded, keys=encoder_embedded, q_masks=en_masks, k_masks=en_masks, future_binding=False, num_units=size_layer, num_heads=num_heads) with tf.variable_scope('encoder_feedforward_%d' % i, reuse=tf.AUTO_REUSE): encoder_embedded = pointwise_feedforward(encoder_embedded, embedded_size, activation=tf.nn.relu) self.logits = tf.layers.dense(encoder_embedded[-1], output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)
def __init__(self, input_, dimension=2, learning_rate=0.01, hidden_layer=256, epoch=20): input_size = input_.shape[1] self.X = tf.placeholder("float", [None, input_.shape[1]]) weights = { 'encoder_h1': tf.Variable(tf.random_normal([input_size, hidden_layer])), 'encoder_h2': tf.Variable(tf.random_normal([hidden_layer, dimension])), 'decoder_h1': tf.Variable(tf.random_normal([dimension, hidden_layer])), 'decoder_h2': tf.Variable(tf.random_normal([hidden_layer, input_size])), } biases = { 'encoder_b1': tf.Variable(tf.random_normal([hidden_layer])), 'encoder_b2': tf.Variable(tf.random_normal([dimension])), 'decoder_b1': tf.Variable(tf.random_normal([hidden_layer])), 'decoder_b2': tf.Variable(tf.random_normal([input_size])), } first_layer_encoder = tf.nn.sigmoid( tf.add(tf.matmul(self.X, weights['encoder_h1']), biases['encoder_b1'])) self.second_layer_encoder = tf.nn.sigmoid( tf.add(tf.matmul(first_layer_encoder, weights['encoder_h2']), biases['encoder_b2'])) first_layer_decoder = tf.nn.sigmoid( tf.add(tf.matmul(self.second_layer_encoder, weights['decoder_h1']), biases['decoder_b1'])) second_layer_decoder = tf.nn.sigmoid( tf.add(tf.matmul(first_layer_decoder, weights['decoder_h2']), biases['decoder_b2'])) self.cost = tf.reduce_mean(tf.pow(self.X - second_layer_decoder, 2)) self.optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize( self.cost) self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer()) for i in range(epoch): last_time = time.time() _, loss = self.sess.run([self.optimizer, self.cost], feed_dict={self.X: input_}) if (i + 1) % 10 == 0: print('epoch:', i + 1, 'loss:', loss, 'time:', time.time() - last_time)
def __init__(self, name, input_size, output_size, size_layer): with tf.variable_scope(name): self.X = tf.placeholder(tf.float32, (None, input_size)) feed_actor = tf.layers.dense(self.X, size_layer, activation = tf.nn.relu) self.logits = tf.layers.dense(feed_actor, output_size)
def __init__(self, state_size, window_size, trend, skip): self.state_size = state_size self.window_size = window_size self.half_window = window_size // 2 self.trend = trend self.skip = skip tf.reset_default_graph() self.INITIAL_FEATURES = np.zeros((4, self.state_size)) self.X = tf.placeholder(tf.float32, (None, None, self.state_size)) self.Y = tf.placeholder(tf.float32, (None, None, self.state_size)) self.hidden_layer = tf.placeholder(tf.float32, (None, 2 * self.LAYER_SIZE)) self.ACTION = tf.placeholder(tf.float32, (None)) self.REWARD = tf.placeholder(tf.float32, (None)) self.batch_size = tf.shape(self.ACTION)[0] self.seq_len = tf.shape(self.X)[1] with tf.variable_scope('curiosity_model'): action = tf.reshape(self.ACTION, (-1, 1, 1)) repeat_action = tf.tile(action, [1, self.seq_len, 1]) state_action = tf.concat([self.X, repeat_action], axis=-1) save_state = tf.identity(self.Y) cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) self.rnn, last_state = tf.nn.dynamic_rnn( inputs=state_action, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.curiosity_logits = tf.layers.dense(self.rnn[:, -1], self.state_size) self.curiosity_cost = tf.reduce_sum( tf.square(save_state[:, -1] - self.curiosity_logits), axis=1) self.curiosity_optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( tf.reduce_mean(self.curiosity_cost)) total_reward = tf.add(self.curiosity_cost, self.REWARD) with tf.variable_scope("q_model"): with tf.variable_scope("eval_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, self.last_state = tf.nn.dynamic_rnn( inputs=self.X, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) self.logits = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) with tf.variable_scope("target_net"): cell = tf.nn.rnn_cell.LSTMCell(self.LAYER_SIZE, state_is_tuple=False) rnn, last_state = tf.nn.dynamic_rnn( inputs=self.Y, cell=cell, dtype=tf.float32, initial_state=self.hidden_layer) y_q = tf.layers.dense(rnn[:, -1], self.OUTPUT_SIZE) q_target = total_reward + self.GAMMA * tf.reduce_max(y_q, axis=1) action = tf.cast(self.ACTION, tf.int32) action_indices = tf.stack( [tf.range(self.batch_size, dtype=tf.int32), action], axis=1) q = tf.gather_nd(params=self.logits, indices=action_indices) self.cost = tf.losses.mean_squared_error(labels=q_target, predictions=q) self.optimizer = tf.train.RMSPropOptimizer( self.LEARNING_RATE).minimize( self.cost, var_list=tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "q_model/eval_net")) t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/target_net') e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_model/eval_net') self.target_replace_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer())
def __init__( self, learning_rate, num_layers, size, size_layer, output_size, kernel_size=3, n_attn_heads=16, dropout=0.9, ): self.X = tf.placeholder(tf.float32, (None, None, size)) self.Y = tf.placeholder(tf.float32, (None, output_size)) encoder_embedded = tf.layers.dense(self.X, size_layer) e = tf.identity(encoder_embedded) for i in range(num_layers): z = layer( encoder_embedded, encoder_block, kernel_size, size_layer * 2, encoder_embedded, ) z = tf.nn.dropout(z, keep_prob=dropout) encoder_embedded = z encoder_output, output_memory = z, z + e g = tf.identity(encoder_embedded) for i in range(num_layers): attn_res = h = layer( encoder_embedded, decoder_block, kernel_size, size_layer * 2, residual=tf.zeros_like(encoder_embedded), ) C = [] for j in range(n_attn_heads): h_ = tf.layers.dense(h, size_layer // n_attn_heads) g_ = tf.layers.dense(g, size_layer // n_attn_heads) zu_ = tf.layers.dense(encoder_output, size_layer // n_attn_heads) ze_ = tf.layers.dense(output_memory, size_layer // n_attn_heads) d = tf.layers.dense(h_, size_layer // n_attn_heads) + g_ dz = tf.matmul(d, tf.transpose(zu_, [0, 2, 1])) a = tf.nn.softmax(dz) c_ = tf.matmul(a, ze_) C.append(c_) c = tf.concat(C, 2) h = tf.layers.dense(attn_res + c, size_layer) h = tf.nn.dropout(h, keep_prob=dropout) encoder_embedded = h encoder_embedded = tf.sigmoid(encoder_embedded[-1]) self.logits = tf.layers.dense(encoder_embedded, output_size) self.cost = tf.reduce_mean(tf.square(self.Y - self.logits)) self.optimizer = tf.train.AdamOptimizer(learning_rate).minimize( self.cost)