def _layer(layer_input, dims_in, dims_out, layer_id): """ Constructs a single layer of the feed-forward network. """ scope_id = self.name + '_layer_{:d}'.format(layer_id) with tf.variable_scope(scope_id): # Normalize network input for GAN training; see github.com/soumith/ganhacks if layer_id == 1: layer_input = tf.tanh(layer_input) # Define the matrix multiplication at the basis of each layer layer_weight = tf.get_variable(name='layer_weight', shape=[dims_in, dims_out], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) output = tf.matmul(layer_input, layer_weight) # Optionally apply activation and normalization function, shortcuts, and dropout if layer_id < (len(self.opt.disc_hidden_list) - 1): # Normalization output = self.normalizer(output, dims_out, 'normalized', self.opt.is_train) output = prelu(output, scope_id, self.float_type) # Shortcut connections if self.opt.enable_shortcuts: sc_weight_1 = tf.get_variable(name='shortcut_weight_1', shape=[dims_in, dims_out], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) sc_weight_2 = tf.get_variable(name='shortcut_weight_2', shape=[dims_out, dims_out], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) output = _shortcut(layer_input, output, dims_in, dims_out, sc_weight_1, sc_weight_2, self.opt.is_train) # Dropout disabled for the final layer output = tf.nn.dropout(output, self.static_keep_prob, name='dropout') # Sigmoid point-wise non-linearity applied to output for standard GAN objective if layer_id == (len(self.opt.disc_hidden_list) - 1) and self.opt.gan_type == 'NLLGAN': output = tf.sigmoid(output) return output
def global_attention_subgraph(self): """ Defines the parameters for the global 'Luong' attention mechanism used during decoding; Publication: arxiv.org/pdf/1508.04025.pdf; With guidance from: github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py """ with tf.variable_scope('global_decoder_attention'), tf.device( '/gpu:0'): # Projects the encoder 'memories' (hidden states at each time step) to match decoder dimensions memory_key_weights = tf.get_variable( name='memory_key_weights', shape=[self.opt.enc_hidden_dims, self.opt.dec_hidden_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) # Used in computing the attention vector describing the alignment between input and output sequences attention_weights = tf.get_variable( name='attention_weights', shape=[ self.opt.enc_hidden_dims + self.opt.dec_hidden_dims, self.opt.dec_attention_dims ], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) # Used for combining the attention information with the decoder's input during the 'input feeding' step dec_mixture_weights = tf.get_variable( name='mixture_weights', shape=[ self.opt.dec_hidden_dims + self.opt.dec_attention_dims, self.opt.embedding_dims ], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) return memory_key_weights, attention_weights, dec_mixture_weights
def attention_subgraph(self): """ Designates the parameters for the self-attention mechanism used to obtain improved sentence encodings. """ with tf.variable_scope('attention'), tf.device('/gpu:0'): projection_weights = tf.get_variable(name='projection_weights', shape=[self.opt.hidden_dims * 2, self.opt.attention_dims], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) projection_biases = tf.get_variable(name='projection_biases', shape=[self.opt.attention_dims], initializer=tf.zeros_initializer(dtype=self.float_type), trainable=True) context_vector = tf.get_variable(name='context_vector', shape=[self.opt.attention_dims], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) return projection_weights, projection_biases, context_vector
def embeddings_subgraph(self): """ Initializes the embedding table and output biases; embedding table is jointly used as the projection matrix for projecting the RNN-generated logits into the vocabulary space in the decoder. """ with tf.variable_scope('embeddings'), tf.device('/cpu:0'): embedding_table = tf.get_variable( name='embedding_table', shape=[self.vocab.n_words, self.opt.embedding_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) # Embed input indices input_data = tf.nn.embedding_lookup(embedding_table, self.input_idx, name='embeddings') if self.opt.allow_dropout: input_data = tf.nn.dropout(input_data, self.static_keep_prob, name='enc_front_dropout') output_embedding_biases = tf.get_variable( name='output_embedding_biases', shape=[self.vocab.n_words], dtype=self.float_type, initializer=tf.zeros_initializer(dtype=self.float_type), trainable=True) return embedding_table, input_data, output_embedding_biases
def embeddings_subgraph(self): """ Initializes the embedding table. """ with tf.variable_scope('embeddings'), tf.device('/cpu:0'): embedding_table = tf.get_variable(name='embedding_table', shape=[self.vocab.n_words, self.opt.embedding_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) return embedding_table
def encoder_mixture_subgraph(self): """ Defines the mixture weights used to condition decoder outputs on sentence encodings produced by the encoder-side attention mechanism by combining so obtained encodings with decoder inputs at every decoding time-step; unused due to the observed inefficacy of encoder-side attention for the decoding process. """ with tf.variable_scope('encoder_mixture'), tf.device('/gpu:0'): enc_mixture_weights = tf.get_variable( name='encoder_mixture_weights', shape=[ self.opt.embedding_dims + self.opt.enc_attention_dims, self.opt.embedding_dims ], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) return enc_mixture_weights
def projection_subgraph(self): """ Defines the weight and bias parameters used to project RNN outputs into the embedding space following the completion of each full pass through the RNN. """ with tf.variable_scope('decoder_projection'), tf.device('/gpu:0'): projection_weights = tf.get_variable( name='projection_weights', shape=[self.opt.dec_hidden_dims, self.opt.embedding_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) projection_biases = tf.get_variable( name='projection_biases', shape=[self.opt.embedding_dims], dtype=self.float_type, initializer=tf.zeros_initializer(dtype=self.float_type), trainable=True) return projection_weights, projection_biases
def state_projection_subgraph(self): """ Defines parameters for the encoder state projection, in case of a state size mismatch between encoder and decoder; unused if encoder and decoder states are of identical size. """ with tf.variable_scope('state_projection'), tf.device('/gpu:0'): state_projection_weights = tf.get_variable( name='state_projection_weights', shape=[ self.opt.enc_hidden_dims * self.opt.enc_num_layers, self.opt.dec_hidden_dims * self.opt.dec_num_layers ], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) state_projection_biases = tf.get_variable( name='state_projection_biases', shape=[self.opt.dec_hidden_dims * self.opt.dec_num_layers], dtype=self.float_type, initializer=tf.zeros_initializer(self.float_type), trainable=True) # Unpack the final state representation c_state, h_state = tf.split(self.final_state, 2, axis=0) if self.opt.enc_num_layers == self.opt.dec_num_layers: # Assign encoder states to decoder states on a by layer basis c_states = tf.split(c_state, self.opt.enc_num_layers, axis=1) h_states = tf.split(h_state, self.opt.enc_num_layers, axis=1) elif self.opt.enc_num_layers == 1 and self.opt.dec_num_layers > 1: # Initialize each decoder layer with the a copy of the final encoder layer c_states = [c_state] * self.opt.dec_num_layers h_states = [h_state] * self.opt.dec_num_layers else: # Project encoder's hidden and cell states so as to match the decoder state's dimensionality projected_state = tf.nn.xw_plus_b(self.final_state, state_projection_weights, state_projection_biases) c_state, h_state = tf.split(projected_state, 2, axis=0) c_states = tf.split(c_state, self.opt.dec_num_layers, axis=1) h_states = tf.split(h_state, self.opt.dec_num_layers, axis=1) # Assemble the appropriate LSTM cell state tuple used to initialize the decoder decoder_state = tuple([ tf.contrib.rnn.LSTMStateTuple(c_states[layer_id], h_states[layer_id]) for layer_id in range(self.opt.dec_num_layers) ]) return c_state, h_state, decoder_state
def embeddings_subgraph(self): """ Initializes the embedding table and output biases; embedding table is jointly used as the projection matrix for projecting the RNN-generated logits into the vocabulary space in the decoder. """ with tf.variable_scope('embeddings'), tf.device('/cpu:0'): embedding_table = tf.get_variable( name='embedding_table', shape=[self.vocab.n_words, self.opt.embedding_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) output_embedding_biases = tf.get_variable( name='output_embedding_biases', shape=[self.vocab.n_words], dtype=self.float_type, initializer=tf.zeros_initializer(dtype=self.float_type), trainable=True) return embedding_table, output_embedding_biases
def embeddings_subgraph(self): """ Instantiates the embedding table and the embedding lookup operation. """ with tf.variable_scope('embeddings'), tf.device('/cpu:0'): embedding_table = tf.get_variable( name='embedding_table', shape=[self.vocab.n_words, self.opt.embedding_dims], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) # Embed input indices input_data = tf.nn.embedding_lookup(embedding_table, self.input_idx, name='embeddings') # Optionally apply dropout (at training time only) if self.opt.is_train: input_data = tf.nn.dropout(input_data, self.static_keep_prob, name='front_dropout') return embedding_table, input_data
def state_projection_subgraph(self): """ Defines parameters for the encoder state projection, in case of a state size mismatch between encoder and decoder; unused if encoder and decoder states are of identical size. """ with tf.variable_scope('state_projection'), tf.device('/gpu:0'): state_projection_weights = tf.get_variable( name='state_projection_weights', shape=[ self.opt.enc_hidden_dims * self.opt.enc_num_layers, self.opt.dec_hidden_dims * self.opt.dec_num_layers ], dtype=self.float_type, initializer=xi(uniform=False, dtype=self.float_type), trainable=True) state_projection_biases = tf.get_variable( name='state_projection_biases', shape=[self.opt.dec_hidden_dims * self.opt.dec_num_layers], dtype=self.float_type, initializer=tf.zeros_initializer(self.float_type), trainable=True) return state_projection_weights, state_projection_biases
def attention_subgraph(self): """ Defines the self-attention mechanism used to obtain improved sentence encodings; takes the hidden states of the topmost RNN layer as input; unused, as exploratory experiments were unable to show any positive effect on the reconstruction objective. """ with tf.variable_scope('sentence_attention'), tf.device('/gpu:0'): # Publication: see www.cs.cmu.edu/~diyiy/docs/naacl16.pdf # Publication code: github.com/ematvey/hierarchical-attention-networks/blob/master/HAN_model.py # Designate attention parameters projection_weights = tf.get_variable( name='projection_weights', shape=[self.opt.enc_hidden_dims, self.opt.enc_attention_dims], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) projection_biases = tf.get_variable( name='projection_biases', shape=[self.opt.enc_attention_dims], initializer=tf.zeros_initializer(dtype=self.float_type), trainable=True) context_vector = tf.get_variable( name='context_vector', shape=[self.opt.enc_attention_dims], initializer=xi(uniform=False, dtype=self.float_type), trainable=True) # Compute attention values memory_values = tf.reshape(self.rnn_outputs, shape=[-1, self.opt.enc_hidden_dims], name='memory_values') projected_memories = tf.nn.tanh(tf.nn.xw_plus_b( memory_values, projection_weights, projection_biases), name='projected_memories') projected_memories = tf.reshape( projected_memories, shape=[self.batch_length, self.batch_steps, -1]) # Mask out positions corresponding to padding within the input score_mask = tf.sequence_mask(self.length_mask, maxlen=tf.reduce_max( self.length_mask), dtype=self.float_type) score_mask = tf.expand_dims(score_mask, -1) score_mask = tf.matmul( score_mask, tf.ones([self.batch_length, self.opt.enc_attention_dims, 1]), transpose_b=True) projected_memories = tf.where(tf.cast(score_mask, dtype=tf.bool), projected_memories, tf.zeros_like(projected_memories)) # Calculate the importance of the individual encoder hidden states for the informativeness of the computed # sentence representation context_product = tf.reduce_sum(tf.multiply( projected_memories, context_vector, name='context_product'), axis=2, keep_dims=True) attention_weights = tf.nn.softmax(context_product, dim=1, name='importance_weight') # Weigh encoder hidden states according to the calculated importance weights weighted_memories = tf.multiply(projected_memories, attention_weights) # Sentence encodings are the importance-weighted sums of encoder hidden states / word representations sentence_encodings = tf.reduce_sum(weighted_memories, axis=1, name='sentence_encodings') return sentence_encodings
def __init__(self, hidden_size, rnn_cell, filter_dims, filter_nums, strides, all_scope, action_num, learning_rate): self.hidden_size = hidden_size self.rnn_cell = rnn_cell self.filter_dims = filter_dims self.filter_nums = filter_nums self.strides = strides self.all_scope = all_scope self.action_num = action_num self.learning_rate = learning_rate self.dtype = tf.float32 # Define placeholders for input, training parameters, and training values self.scalar_input = tf.placeholder(shape=[None, 80 * 80 * 3], dtype=self.dtype, name='scalar_input') self.trace_length = tf.placeholder(dtype=tf.int32, name='train_duration') self.batch_size = tf.placeholder(dtype=tf.int32, name='batch_size') # Both below have shape=[batch_size * trace_len] self.target_q_holder = tf.placeholder(shape=[None], dtype=self.dtype, name='target_Q_values') self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32, name='actions_taken') # Reshape the scalar input into image-shape cnn_input = tf.reshape(self.scalar_input, shape=[-1, 80, 80, 3]) # Filter output calculation: W1 = (W−F+2P)/S+1 72/4 # Define ConvNet layers for screen image analysis with tf.variable_scope(self.all_scope + '_cnn_1'): w_1 = tf.get_variable(name='weight', shape=[*self.filter_dims[0], 3, self.filter_nums[0]], initializer=xi_2d()) b_1 = tf.get_variable(name='bias', shape=[self.filter_nums[0]], initializer=tf.constant_initializer(0.1)) c_1 = tf.nn.conv2d(cnn_input, w_1, strides=[1, *self.strides[0], 1], padding='VALID', name='convolution') o_1 = tf.nn.relu(tf.nn.bias_add(c_1, b_1), name='output') # shape=[19, 19, 32] with tf.variable_scope(self.all_scope + '_cnn_2'): w_2 = tf.get_variable(name='weight', shape=[*self.filter_dims[1], self.filter_nums[0], self.filter_nums[1]], initializer=xi_2d()) b_2 = tf.get_variable(name='bias', shape=[self.filter_nums[1]], initializer=tf.constant_initializer(0.1)) c_2 = tf.nn.conv2d(o_1, w_2, strides=[1, *self.strides[1], 1], padding='VALID', name='convolution') o_2 = tf.nn.relu(tf.nn.bias_add(c_2, b_2), name='output') # shape=[8, 8, 64] with tf.variable_scope(self.all_scope + '_cnn_3'): w_3 = tf.get_variable(name='weight', shape=[*self.filter_dims[2], self.filter_nums[1], self.filter_nums[2]], initializer=xi_2d()) b_3 = tf.get_variable(name='bias', shape=[self.filter_nums[2]], initializer=tf.constant_initializer(0.1)) c_3 = tf.nn.conv2d(o_2, w_3, strides=[1, *self.strides[2], 1], padding='VALID', name='convolution') o_3 = tf.nn.relu(tf.nn.bias_add(c_3, b_3), name='output') # shape=[7, 7, 64] with tf.variable_scope(self.all_scope + '_cnn_out'): w_4 = tf.get_variable(name='weight', shape=[*self.filter_dims[3], self.filter_nums[2], self.filter_nums[3]], initializer=xi_2d()) b_4 = tf.get_variable(name='bias', shape=[self.filter_nums[3]], initializer=tf.constant_initializer(0.1)) c_4 = tf.nn.conv2d(o_3, w_4, strides=[1, *self.strides[3], 1], padding='VALID', name='convolution') cnn_out = tf.nn.relu(tf.nn.bias_add(c_4, b_4), name='output') # shape=[1, 1, 512] # Reshape ConvNet output to [batch_size, trace_len, hidden_size] to be fed into the RNN cnn_flat = tf.reshape(cnn_out, shape=[-1]) rnn_input = tf.reshape(cnn_flat, [self.batch_size, self.trace_length, self.hidden_size], name='RNN_input') # Initialize RNN and feed the input self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32) self.rnn_outputs, self.final_state = tf.nn.dynamic_rnn(cell=self.rnn_cell, inputs=rnn_input, initial_state=self.state_in, scope=self.all_scope + '_rnn', dtype=self.dtype) # Concatenate RNN time steps rnn_2d = tf.reshape(self.rnn_outputs, shape=[-1, self.hidden_size]) # [batch_size * trace_len, hidden_size] # Split RNN output into advantage and value streams which are to guide the agent's policy with tf.variable_scope(self.all_scope + '_advantage_and_value'): a_w = tf.get_variable(name='advantage_weight', shape=[self.hidden_size / 2, self.action_num], dtype=self.dtype, initializer=xi()) v_w = tf.get_variable(name='value_weight', shape=[self.hidden_size / 2, 1], dtype=self.dtype, initializer=xi()) a_stream, v_stream = tf.split(rnn_2d, 2, axis=1) self.advantage = tf.matmul(a_stream, a_w, name='advantage') self.value = tf.matmul(v_stream, v_w, name='value') self.improve_vision = tf.gradients(self.advantage, cnn_input) # Predict the next action self.q_out = tf.add(self.value, tf.subtract( self.advantage, tf.reduce_mean(self.advantage, axis=1, keep_dims=True)), name='predicted_action_distribution') # shape=[batch_size * trace_len, num_actions] self.prediction = tf.argmax(self.q_out, axis=1, name='predicted_action') with tf.variable_scope(self.all_scope + 'loss'): # Obtain loss by measuring the difference between the prediction and the target Q-value actions_one_hot = tf.one_hot(self.action_holder, self.action_num, dtype=self.dtype) self.predicted_q = tf.reduce_sum(tf.multiply(self.q_out, actions_one_hot), axis=1, name='predicted_Q_values') # predicted_q and l2_loss have shape=[batch_size * trace_len] -> calculated per step self.l2_loss = tf.square(tf.subtract(self.predicted_q, self.target_q_holder), name='l2_loss') # Mask first half of the losses to only keep the 'important' values mask_drop = tf.zeros(shape=[self.batch_size, tf.cast(self.trace_length / 2, dtype=tf.int32)]) mask_keep = tf.ones(shape=[self.batch_size, tf.cast(self.trace_length / 2, dtype=tf.int32)]) mask = tf.concat([mask_drop, mask_keep], axis=1) # shape=[batch_size, train_duration] flat_mask = tf.reshape(mask, [-1]) self.loss = tf.reduce_mean(tf.multiply(self.l2_loss, flat_mask), name='total_loss') optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.update_model = optimizer.minimize(self.loss)
def __init__(self, input_dims, hidden_1, hidden_2, hidden_3, num_actions, learning_rate, binary_objective=True): self.input_dims = input_dims self.hidden_1 = hidden_1 self.hidden_2 = hidden_2 self.hidden_3 = hidden_3 self.learning_rate = learning_rate self.dtype = tf.float32 self.binary = binary_objective if self.binary: self.num_actions = num_actions - 1 else: self.num_actions = num_actions self.state = tf.placeholder(shape=[None, self.input_dims], dtype=self.dtype, name='current_state') if self.binary: self.action_holder = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='actions') else: self.action_holder = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='actions') self.reward_holder = tf.placeholder(dtype=self.dtype, name='rewards') self.keep_prob = tf.placeholder(dtype=self.dtype, name='keep_prob') with tf.variable_scope('layer_1'): w1 = tf.get_variable(name='weight', shape=[self.input_dims, self.hidden_1], dtype=self.dtype, initializer=xi()) o1 = tf.nn.relu(tf.matmul(self.state, w1), name='output') d1 = tf.nn.dropout(o1, self.keep_prob) with tf.variable_scope('layer_2'): w2 = tf.get_variable(name='weight', shape=[self.hidden_1, self.hidden_2], dtype=self.dtype, initializer=xi()) o2 = tf.nn.relu(tf.matmul(d1, w2), name='output') d2 = tf.nn.dropout(o2, self.keep_prob) with tf.variable_scope('layer_3'): w3 = tf.get_variable(name='weight', shape=[self.hidden_2, self.hidden_3], dtype=self.dtype, initializer=xi()) o3 = tf.nn.relu(tf.matmul(d2, w3), name='hidden_1') with tf.variable_scope('layer_4'): w4 = tf.get_variable(name='weight', shape=[self.hidden_3, self.num_actions], dtype=self.dtype, initializer=xi()) score = tf.matmul(o3, w4, name='score') if self.binary: self.probability = tf.nn.sigmoid(score, name='action_probability') else: self.probability = tf.nn.softmax(score, name='action_probabilities') self.t_vars = tf.trainable_variables() with tf.variable_scope('loss'): optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.gradient_holders = list() for _idx, var in enumerate(self.t_vars): placeholder = tf.placeholder(dtype=tf.float32, name=str(_idx) + '_holder') self.gradient_holders.append(placeholder) if self.binary: self.action_holder = tf.abs(self.action_holder - 1) log_lh = tf.log( self.action_holder * (self.action_holder - self.probability) + (1 - self.action_holder) * (self.action_holder + self.probability)) self.loss = - tf.reduce_mean(log_lh * self.reward_holder) else: indices = tf.range(0, tf.shape(self.probability)[0]) * tf.shape(self.probability)[1] + \ self.action_holder responsible_outputs = tf.gather(tf.reshape(self.probability, [-1]), indices) self.loss = - tf.reduce_mean(tf.multiply(tf.log(responsible_outputs), self.reward_holder), name='loss') self.get_gradients = tf.gradients(self.loss, self.t_vars) self.batch_update = optimizer.apply_gradients(zip(self.gradient_holders, self.t_vars))
def __init__(self, hidden_1, hidden_2, hidden_3, input_dims, state_dims, learning_rate): self.hidden_1 = hidden_1 self.hidden_2 = hidden_2 self.hidden_3 = hidden_3 self.input_dims = input_dims self.state_dims = state_dims self.learning_rate = learning_rate self.dtype = tf.float32 self.previous_state = tf.placeholder(shape=[None, self.state_dims], dtype=self.dtype, name='model_input') self.true_observation = tf.placeholder(shape=[None, self.input_dims], dtype=self.dtype, name='true_obs') self.true_reward = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='true_reward') self.true_done = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='true_done') self.keep_prob = tf.placeholder(dtype=self.dtype, name='keep_prob') # Define layers with tf.variable_scope('layer_1'): w_1 = tf.get_variable(name='weights', shape=[self.state_dims, self.hidden_1], dtype=self.dtype, initializer=xi()) b_1 = tf.get_variable(name='biases', shape=[self.hidden_1], dtype=self.dtype, initializer=tf.constant_initializer(0.0)) o_1 = tf.nn.relu(tf.nn.xw_plus_b(self.previous_state, w_1, b_1), name='output') d_1 = tf.nn.dropout(o_1, keep_prob=self.keep_prob) with tf.variable_scope('layer_2'): w_2 = tf.get_variable(name='weights', shape=[self.hidden_1, self.hidden_2], dtype=self.dtype, initializer=xi()) b_2 = tf.get_variable(name='biases', shape=[self.hidden_2], dtype=self.dtype, initializer=tf.constant_initializer(0.0)) o_2 = tf.nn.relu(tf.nn.xw_plus_b(d_1, w_2, b_2), name='output') d_2 = tf.nn.dropout(o_2, self.keep_prob) with tf.variable_scope('layer_3'): w_3 = tf.get_variable(name='weights', shape=[self.hidden_2, self.hidden_3], dtype=self.dtype, initializer=xi()) b_3 = tf.get_variable(name='biases', shape=[self.hidden_3], dtype=self.dtype, initializer=tf.constant_initializer(0.0)) o_3 = tf.nn.relu(tf.nn.xw_plus_b(d_2, w_3, b_3), name='output') with tf.variable_scope('prediction_layer'): w_obs = tf.get_variable(name='state_weight', shape=[self.hidden_3, self.input_dims], dtype=self.dtype, initializer=xi()) b_obs = tf.get_variable(name='state_bias', shape=[self.input_dims], dtype=self.dtype, initializer=tf.constant_initializer(0.0)) w_reward = tf.get_variable(name='reward_weight', shape=[self.hidden_3, 1], dtype=self.dtype, initializer=xi()) b_reward = tf.get_variable(name='reward_bias', shape=[1], dtype=self.dtype, initializer=tf.constant_initializer(0.0)) w_done = tf.get_variable(name='done_weight', shape=[self.hidden_3, 1], dtype=self.dtype, initializer=xi()) b_done = tf.get_variable(name='done_bias', shape=[1], dtype=self.dtype, initializer=tf.constant_initializer(1.0)) predicted_observation = tf.nn.xw_plus_b(o_3, w_obs, b_obs, name='observation_prediction') predicted_reward = tf.nn.xw_plus_b(o_3, w_reward, b_reward, name='reward_prediction') predicted_done = tf.nn.sigmoid(tf.nn.xw_plus_b(o_3, w_done, b_done, name='done_prediction')) self.predicted_state = tf.concat(values=[predicted_observation, predicted_reward, predicted_done], axis=1, name='state_prediction') # Get losses with tf.variable_scope('loss'): observation_loss = tf.square(tf.subtract(self.true_observation, predicted_observation), name='observation_loss') reward_loss = tf.square(tf.subtract(self.true_reward, predicted_reward), name='reward_loss') # Cross-entropy due to one-hot nature of the done-vector (1 if match, 0 otherwise) done_loss = tf.multiply(self.true_done, predicted_done) + tf.multiply(1 - self.true_done, 1 - predicted_done) done_loss = - tf.log(done_loss) self.loss = tf.reduce_mean(1.0 * observation_loss + 1.0 * reward_loss + 2.0 * done_loss, name='combined_loss') optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) self.update_model = optimizer.minimize(loss=self.loss)