Exemple #1
0
 def _layer(layer_input, dims_in, dims_out, layer_id):
     """ Constructs a single layer of the feed-forward network. """
     scope_id = self.name + '_layer_{:d}'.format(layer_id)
     with tf.variable_scope(scope_id):
         # Normalize network input for GAN training; see github.com/soumith/ganhacks
         if layer_id == 1:
             layer_input = tf.tanh(layer_input)
         # Define the matrix multiplication at the basis of each layer
         layer_weight = tf.get_variable(name='layer_weight', shape=[dims_in, dims_out],
                                        initializer=xi(uniform=False, dtype=self.float_type), trainable=True)
         output = tf.matmul(layer_input, layer_weight)
         # Optionally apply activation and normalization function, shortcuts, and dropout
         if layer_id < (len(self.opt.disc_hidden_list) - 1):
             # Normalization
             output = self.normalizer(output, dims_out, 'normalized', self.opt.is_train)
             output = prelu(output, scope_id, self.float_type)
             # Shortcut connections
             if self.opt.enable_shortcuts:
                 sc_weight_1 = tf.get_variable(name='shortcut_weight_1', shape=[dims_in, dims_out],
                                               initializer=xi(uniform=False, dtype=self.float_type),
                                               trainable=True)
                 sc_weight_2 = tf.get_variable(name='shortcut_weight_2', shape=[dims_out, dims_out],
                                               initializer=xi(uniform=False, dtype=self.float_type),
                                               trainable=True)
                 output = _shortcut(layer_input, output, dims_in, dims_out, sc_weight_1, sc_weight_2,
                                    self.opt.is_train)
             # Dropout disabled for the final layer
             output = tf.nn.dropout(output, self.static_keep_prob, name='dropout')
         # Sigmoid point-wise non-linearity applied to output for standard GAN objective
         if layer_id == (len(self.opt.disc_hidden_list) - 1) and self.opt.gan_type == 'NLLGAN':
             output = tf.sigmoid(output)
         return output
Exemple #2
0
 def global_attention_subgraph(self):
     """ Defines the parameters for the global 'Luong' attention mechanism used during decoding;
     Publication: arxiv.org/pdf/1508.04025.pdf; With guidance from:
     github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py """
     with tf.variable_scope('global_decoder_attention'), tf.device(
             '/gpu:0'):
         # Projects the encoder 'memories' (hidden states at each time step) to match decoder dimensions
         memory_key_weights = tf.get_variable(
             name='memory_key_weights',
             shape=[self.opt.enc_hidden_dims, self.opt.dec_hidden_dims],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         # Used in computing the attention vector describing the alignment between input and output sequences
         attention_weights = tf.get_variable(
             name='attention_weights',
             shape=[
                 self.opt.enc_hidden_dims + self.opt.dec_hidden_dims,
                 self.opt.dec_attention_dims
             ],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         # Used for combining the attention information with the decoder's input during the 'input feeding' step
         dec_mixture_weights = tf.get_variable(
             name='mixture_weights',
             shape=[
                 self.opt.dec_hidden_dims + self.opt.dec_attention_dims,
                 self.opt.embedding_dims
             ],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         return memory_key_weights, attention_weights, dec_mixture_weights
Exemple #3
0
 def attention_subgraph(self):
     """ Designates the parameters for the self-attention mechanism used to obtain improved sentence encodings. """
     with tf.variable_scope('attention'), tf.device('/gpu:0'):
         projection_weights = tf.get_variable(name='projection_weights',
                                              shape=[self.opt.hidden_dims * 2, self.opt.attention_dims],
                                              initializer=xi(uniform=False, dtype=self.float_type),
                                              trainable=True)
         projection_biases = tf.get_variable(name='projection_biases', shape=[self.opt.attention_dims],
                                             initializer=tf.zeros_initializer(dtype=self.float_type),
                                             trainable=True)
         context_vector = tf.get_variable(name='context_vector', shape=[self.opt.attention_dims],
                                          initializer=xi(uniform=False, dtype=self.float_type), trainable=True)
     return projection_weights, projection_biases, context_vector
Exemple #4
0
 def embeddings_subgraph(self):
     """ Initializes the embedding table and output biases; embedding table is jointly used as the projection matrix
     for projecting the RNN-generated logits into the vocabulary space in the decoder. """
     with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
         embedding_table = tf.get_variable(
             name='embedding_table',
             shape=[self.vocab.n_words, self.opt.embedding_dims],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         # Embed input indices
         input_data = tf.nn.embedding_lookup(embedding_table,
                                             self.input_idx,
                                             name='embeddings')
         if self.opt.allow_dropout:
             input_data = tf.nn.dropout(input_data,
                                        self.static_keep_prob,
                                        name='enc_front_dropout')
         output_embedding_biases = tf.get_variable(
             name='output_embedding_biases',
             shape=[self.vocab.n_words],
             dtype=self.float_type,
             initializer=tf.zeros_initializer(dtype=self.float_type),
             trainable=True)
     return embedding_table, input_data, output_embedding_biases
Exemple #5
0
 def embeddings_subgraph(self):
     """ Initializes the embedding table. """
     with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
         embedding_table = tf.get_variable(name='embedding_table',
                                           shape=[self.vocab.n_words, self.opt.embedding_dims],
                                           dtype=self.float_type,
                                           initializer=xi(uniform=False, dtype=self.float_type),
                                           trainable=True)
         return embedding_table
Exemple #6
0
 def encoder_mixture_subgraph(self):
     """ Defines the mixture weights used to condition decoder outputs on sentence encodings produced by the
     encoder-side attention mechanism by combining so obtained encodings with decoder inputs at every decoding
     time-step; unused due to the observed inefficacy of encoder-side attention for the decoding process. """
     with tf.variable_scope('encoder_mixture'), tf.device('/gpu:0'):
         enc_mixture_weights = tf.get_variable(
             name='encoder_mixture_weights',
             shape=[
                 self.opt.embedding_dims + self.opt.enc_attention_dims,
                 self.opt.embedding_dims
             ],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
     return enc_mixture_weights
Exemple #7
0
 def projection_subgraph(self):
     """ Defines the weight and bias parameters used to project RNN outputs into the embedding space following
     the completion of each full pass through the RNN. """
     with tf.variable_scope('decoder_projection'), tf.device('/gpu:0'):
         projection_weights = tf.get_variable(
             name='projection_weights',
             shape=[self.opt.dec_hidden_dims, self.opt.embedding_dims],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         projection_biases = tf.get_variable(
             name='projection_biases',
             shape=[self.opt.embedding_dims],
             dtype=self.float_type,
             initializer=tf.zeros_initializer(dtype=self.float_type),
             trainable=True)
     return projection_weights, projection_biases
Exemple #8
0
    def state_projection_subgraph(self):
        """ Defines parameters for the encoder state projection, in case of a state size mismatch between
        encoder and decoder; unused if encoder and decoder states are of identical size. """
        with tf.variable_scope('state_projection'), tf.device('/gpu:0'):
            state_projection_weights = tf.get_variable(
                name='state_projection_weights',
                shape=[
                    self.opt.enc_hidden_dims * self.opt.enc_num_layers,
                    self.opt.dec_hidden_dims * self.opt.dec_num_layers
                ],
                dtype=self.float_type,
                initializer=xi(uniform=False, dtype=self.float_type),
                trainable=True)
            state_projection_biases = tf.get_variable(
                name='state_projection_biases',
                shape=[self.opt.dec_hidden_dims * self.opt.dec_num_layers],
                dtype=self.float_type,
                initializer=tf.zeros_initializer(self.float_type),
                trainable=True)

            # Unpack the final state representation
            c_state, h_state = tf.split(self.final_state, 2, axis=0)
            if self.opt.enc_num_layers == self.opt.dec_num_layers:
                # Assign encoder states to decoder states on a by layer basis
                c_states = tf.split(c_state, self.opt.enc_num_layers, axis=1)
                h_states = tf.split(h_state, self.opt.enc_num_layers, axis=1)
            elif self.opt.enc_num_layers == 1 and self.opt.dec_num_layers > 1:
                # Initialize each decoder layer with the a copy of the final encoder layer
                c_states = [c_state] * self.opt.dec_num_layers
                h_states = [h_state] * self.opt.dec_num_layers
            else:
                # Project encoder's hidden and cell states so as to match the decoder state's dimensionality
                projected_state = tf.nn.xw_plus_b(self.final_state,
                                                  state_projection_weights,
                                                  state_projection_biases)
                c_state, h_state = tf.split(projected_state, 2, axis=0)
                c_states = tf.split(c_state, self.opt.dec_num_layers, axis=1)
                h_states = tf.split(h_state, self.opt.dec_num_layers, axis=1)

            # Assemble the appropriate LSTM cell state tuple used to initialize the decoder
            decoder_state = tuple([
                tf.contrib.rnn.LSTMStateTuple(c_states[layer_id],
                                              h_states[layer_id])
                for layer_id in range(self.opt.dec_num_layers)
            ])
        return c_state, h_state, decoder_state
Exemple #9
0
 def embeddings_subgraph(self):
     """ Initializes the embedding table and output biases; embedding table is jointly used as the projection matrix
     for projecting the RNN-generated logits into the vocabulary space in the decoder. """
     with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
         embedding_table = tf.get_variable(
             name='embedding_table',
             shape=[self.vocab.n_words, self.opt.embedding_dims],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         output_embedding_biases = tf.get_variable(
             name='output_embedding_biases',
             shape=[self.vocab.n_words],
             dtype=self.float_type,
             initializer=tf.zeros_initializer(dtype=self.float_type),
             trainable=True)
     return embedding_table, output_embedding_biases
Exemple #10
0
 def embeddings_subgraph(self):
     """ Instantiates the embedding table and the embedding lookup operation. """
     with tf.variable_scope('embeddings'), tf.device('/cpu:0'):
         embedding_table = tf.get_variable(
             name='embedding_table',
             shape=[self.vocab.n_words, self.opt.embedding_dims],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         # Embed input indices
         input_data = tf.nn.embedding_lookup(embedding_table,
                                             self.input_idx,
                                             name='embeddings')
         # Optionally apply dropout (at training time only)
         if self.opt.is_train:
             input_data = tf.nn.dropout(input_data,
                                        self.static_keep_prob,
                                        name='front_dropout')
     return embedding_table, input_data
Exemple #11
0
 def state_projection_subgraph(self):
     """ Defines parameters for the encoder state projection, in case of a state size mismatch between
     encoder and decoder; unused if encoder and decoder states are of identical size. """
     with tf.variable_scope('state_projection'), tf.device('/gpu:0'):
         state_projection_weights = tf.get_variable(
             name='state_projection_weights',
             shape=[
                 self.opt.enc_hidden_dims * self.opt.enc_num_layers,
                 self.opt.dec_hidden_dims * self.opt.dec_num_layers
             ],
             dtype=self.float_type,
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         state_projection_biases = tf.get_variable(
             name='state_projection_biases',
             shape=[self.opt.dec_hidden_dims * self.opt.dec_num_layers],
             dtype=self.float_type,
             initializer=tf.zeros_initializer(self.float_type),
             trainable=True)
     return state_projection_weights, state_projection_biases
Exemple #12
0
 def attention_subgraph(self):
     """ Defines the self-attention mechanism used to obtain improved sentence encodings;
     takes the hidden states of the topmost RNN layer as input;
     unused, as exploratory experiments were unable to show any positive effect on the reconstruction objective. """
     with tf.variable_scope('sentence_attention'), tf.device('/gpu:0'):
         # Publication: see www.cs.cmu.edu/~diyiy/docs/naacl16.pdf
         # Publication code: github.com/ematvey/hierarchical-attention-networks/blob/master/HAN_model.py
         # Designate attention parameters
         projection_weights = tf.get_variable(
             name='projection_weights',
             shape=[self.opt.enc_hidden_dims, self.opt.enc_attention_dims],
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         projection_biases = tf.get_variable(
             name='projection_biases',
             shape=[self.opt.enc_attention_dims],
             initializer=tf.zeros_initializer(dtype=self.float_type),
             trainable=True)
         context_vector = tf.get_variable(
             name='context_vector',
             shape=[self.opt.enc_attention_dims],
             initializer=xi(uniform=False, dtype=self.float_type),
             trainable=True)
         # Compute attention values
         memory_values = tf.reshape(self.rnn_outputs,
                                    shape=[-1, self.opt.enc_hidden_dims],
                                    name='memory_values')
         projected_memories = tf.nn.tanh(tf.nn.xw_plus_b(
             memory_values, projection_weights, projection_biases),
                                         name='projected_memories')
         projected_memories = tf.reshape(
             projected_memories,
             shape=[self.batch_length, self.batch_steps, -1])
         # Mask out positions corresponding to padding within the input
         score_mask = tf.sequence_mask(self.length_mask,
                                       maxlen=tf.reduce_max(
                                           self.length_mask),
                                       dtype=self.float_type)
         score_mask = tf.expand_dims(score_mask, -1)
         score_mask = tf.matmul(
             score_mask,
             tf.ones([self.batch_length, self.opt.enc_attention_dims, 1]),
             transpose_b=True)
         projected_memories = tf.where(tf.cast(score_mask, dtype=tf.bool),
                                       projected_memories,
                                       tf.zeros_like(projected_memories))
         # Calculate the importance of the individual encoder hidden states for the informativeness of the computed
         # sentence representation
         context_product = tf.reduce_sum(tf.multiply(
             projected_memories, context_vector, name='context_product'),
                                         axis=2,
                                         keep_dims=True)
         attention_weights = tf.nn.softmax(context_product,
                                           dim=1,
                                           name='importance_weight')
         # Weigh encoder hidden states according to the calculated importance weights
         weighted_memories = tf.multiply(projected_memories,
                                         attention_weights)
         # Sentence encodings are the importance-weighted sums of encoder hidden states / word representations
         sentence_encodings = tf.reduce_sum(weighted_memories,
                                            axis=1,
                                            name='sentence_encodings')
     return sentence_encodings
Exemple #13
0
    def __init__(self, hidden_size, rnn_cell, filter_dims, filter_nums, strides, all_scope, action_num, learning_rate):
        self.hidden_size = hidden_size
        self.rnn_cell = rnn_cell
        self.filter_dims = filter_dims
        self.filter_nums = filter_nums
        self.strides = strides
        self.all_scope = all_scope
        self.action_num = action_num
        self.learning_rate = learning_rate
        self.dtype = tf.float32

        # Define placeholders for input, training parameters, and training values
        self.scalar_input = tf.placeholder(shape=[None, 80 * 80 * 3], dtype=self.dtype, name='scalar_input')
        self.trace_length = tf.placeholder(dtype=tf.int32, name='train_duration')
        self.batch_size = tf.placeholder(dtype=tf.int32, name='batch_size')
        # Both below have shape=[batch_size * trace_len]
        self.target_q_holder = tf.placeholder(shape=[None], dtype=self.dtype, name='target_Q_values')
        self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32, name='actions_taken')

        # Reshape the scalar input into image-shape
        cnn_input = tf.reshape(self.scalar_input, shape=[-1, 80, 80, 3])

        # Filter output calculation: W1 = (W−F+2P)/S+1 72/4
        # Define ConvNet layers for screen image analysis
        with tf.variable_scope(self.all_scope + '_cnn_1'):
            w_1 = tf.get_variable(name='weight', shape=[*self.filter_dims[0], 3, self.filter_nums[0]],
                                  initializer=xi_2d())
            b_1 = tf.get_variable(name='bias', shape=[self.filter_nums[0]], initializer=tf.constant_initializer(0.1))
            c_1 = tf.nn.conv2d(cnn_input, w_1, strides=[1, *self.strides[0], 1], padding='VALID', name='convolution')
            o_1 = tf.nn.relu(tf.nn.bias_add(c_1, b_1), name='output')  # shape=[19, 19, 32]

        with tf.variable_scope(self.all_scope + '_cnn_2'):
            w_2 = tf.get_variable(name='weight', shape=[*self.filter_dims[1], self.filter_nums[0], self.filter_nums[1]],
                                  initializer=xi_2d())
            b_2 = tf.get_variable(name='bias', shape=[self.filter_nums[1]], initializer=tf.constant_initializer(0.1))
            c_2 = tf.nn.conv2d(o_1, w_2, strides=[1, *self.strides[1], 1], padding='VALID', name='convolution')
            o_2 = tf.nn.relu(tf.nn.bias_add(c_2, b_2), name='output')  # shape=[8, 8, 64]

        with tf.variable_scope(self.all_scope + '_cnn_3'):
            w_3 = tf.get_variable(name='weight', shape=[*self.filter_dims[2], self.filter_nums[1], self.filter_nums[2]],
                                  initializer=xi_2d())
            b_3 = tf.get_variable(name='bias', shape=[self.filter_nums[2]], initializer=tf.constant_initializer(0.1))
            c_3 = tf.nn.conv2d(o_2, w_3, strides=[1, *self.strides[2], 1], padding='VALID', name='convolution')
            o_3 = tf.nn.relu(tf.nn.bias_add(c_3, b_3), name='output')  # shape=[7, 7, 64]

        with tf.variable_scope(self.all_scope + '_cnn_out'):
            w_4 = tf.get_variable(name='weight', shape=[*self.filter_dims[3], self.filter_nums[2], self.filter_nums[3]],
                                  initializer=xi_2d())
            b_4 = tf.get_variable(name='bias', shape=[self.filter_nums[3]], initializer=tf.constant_initializer(0.1))
            c_4 = tf.nn.conv2d(o_3, w_4, strides=[1, *self.strides[3], 1], padding='VALID', name='convolution')
            cnn_out = tf.nn.relu(tf.nn.bias_add(c_4, b_4), name='output')  # shape=[1, 1, 512]

        # Reshape ConvNet output to [batch_size, trace_len, hidden_size] to be fed into the RNN
        cnn_flat = tf.reshape(cnn_out, shape=[-1])
        rnn_input = tf.reshape(cnn_flat, [self.batch_size, self.trace_length, self.hidden_size], name='RNN_input')

        # Initialize RNN and feed the input
        self.state_in = rnn_cell.zero_state(self.batch_size, tf.float32)
        self.rnn_outputs, self.final_state = tf.nn.dynamic_rnn(cell=self.rnn_cell, inputs=rnn_input,
                                                               initial_state=self.state_in,
                                                               scope=self.all_scope + '_rnn', dtype=self.dtype)
        # Concatenate RNN time steps
        rnn_2d = tf.reshape(self.rnn_outputs, shape=[-1, self.hidden_size])  # [batch_size * trace_len, hidden_size]

        # Split RNN output into advantage and value streams which are to guide the agent's policy
        with tf.variable_scope(self.all_scope + '_advantage_and_value'):
            a_w = tf.get_variable(name='advantage_weight', shape=[self.hidden_size / 2, self.action_num],
                                  dtype=self.dtype, initializer=xi())
            v_w = tf.get_variable(name='value_weight', shape=[self.hidden_size / 2, 1], dtype=self.dtype,
                                  initializer=xi())
            a_stream, v_stream = tf.split(rnn_2d, 2, axis=1)
            self.advantage = tf.matmul(a_stream, a_w, name='advantage')
            self.value = tf.matmul(v_stream, v_w, name='value')

        self.improve_vision = tf.gradients(self.advantage, cnn_input)

        # Predict the next action
        self.q_out = tf.add(self.value, tf.subtract(
            self.advantage, tf.reduce_mean(self.advantage, axis=1, keep_dims=True)),
                            name='predicted_action_distribution')  # shape=[batch_size * trace_len, num_actions]
        self.prediction = tf.argmax(self.q_out, axis=1, name='predicted_action')

        with tf.variable_scope(self.all_scope + 'loss'):
            # Obtain loss by measuring the difference between the prediction and the target Q-value
            actions_one_hot = tf.one_hot(self.action_holder, self.action_num, dtype=self.dtype)
            self.predicted_q = tf.reduce_sum(tf.multiply(self.q_out, actions_one_hot), axis=1,
                                             name='predicted_Q_values')
            # predicted_q and l2_loss have shape=[batch_size * trace_len] -> calculated per step
            self.l2_loss = tf.square(tf.subtract(self.predicted_q, self.target_q_holder), name='l2_loss')
            # Mask first half of the losses to only keep the 'important' values
            mask_drop = tf.zeros(shape=[self.batch_size, tf.cast(self.trace_length / 2, dtype=tf.int32)])
            mask_keep = tf.ones(shape=[self.batch_size, tf.cast(self.trace_length / 2, dtype=tf.int32)])
            mask = tf.concat([mask_drop, mask_keep], axis=1)  # shape=[batch_size, train_duration]
            flat_mask = tf.reshape(mask, [-1])
            self.loss = tf.reduce_mean(tf.multiply(self.l2_loss, flat_mask), name='total_loss')

            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
            self.update_model = optimizer.minimize(self.loss)
    def __init__(self, input_dims, hidden_1, hidden_2, hidden_3, num_actions, learning_rate, binary_objective=True):
        self.input_dims = input_dims
        self.hidden_1 = hidden_1
        self.hidden_2 = hidden_2
        self.hidden_3 = hidden_3
        self.learning_rate = learning_rate
        self.dtype = tf.float32
        self.binary = binary_objective

        if self.binary:
            self.num_actions = num_actions - 1
        else:
            self.num_actions = num_actions

        self.state = tf.placeholder(shape=[None, self.input_dims], dtype=self.dtype, name='current_state')

        if self.binary:
            self.action_holder = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='actions')
        else:
            self.action_holder = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='actions')
        self.reward_holder = tf.placeholder(dtype=self.dtype, name='rewards')
        self.keep_prob = tf.placeholder(dtype=self.dtype, name='keep_prob')

        with tf.variable_scope('layer_1'):
            w1 = tf.get_variable(name='weight', shape=[self.input_dims, self.hidden_1], dtype=self.dtype,
                                 initializer=xi())
            o1 = tf.nn.relu(tf.matmul(self.state, w1), name='output')
            d1 = tf.nn.dropout(o1, self.keep_prob)
        with tf.variable_scope('layer_2'):
            w2 = tf.get_variable(name='weight', shape=[self.hidden_1, self.hidden_2], dtype=self.dtype,
                                 initializer=xi())
            o2 = tf.nn.relu(tf.matmul(d1, w2), name='output')
            d2 = tf.nn.dropout(o2, self.keep_prob)
        with tf.variable_scope('layer_3'):
            w3 = tf.get_variable(name='weight', shape=[self.hidden_2, self.hidden_3], dtype=self.dtype,
                                 initializer=xi())
            o3 = tf.nn.relu(tf.matmul(d2, w3), name='hidden_1')
        with tf.variable_scope('layer_4'):
            w4 = tf.get_variable(name='weight', shape=[self.hidden_3, self.num_actions], dtype=self.dtype,
                                 initializer=xi())
            score = tf.matmul(o3, w4, name='score')
            if self.binary:
                self.probability = tf.nn.sigmoid(score, name='action_probability')
            else:
                self.probability = tf.nn.softmax(score, name='action_probabilities')

        self.t_vars = tf.trainable_variables()

        with tf.variable_scope('loss'):
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
            self.gradient_holders = list()
            for _idx, var in enumerate(self.t_vars):
                placeholder = tf.placeholder(dtype=tf.float32, name=str(_idx) + '_holder')
                self.gradient_holders.append(placeholder)

            if self.binary:
                self.action_holder = tf.abs(self.action_holder - 1)
                log_lh = tf.log(
                    self.action_holder * (self.action_holder - self.probability) + (1 - self.action_holder) *
                    (self.action_holder + self.probability))
                self.loss = - tf.reduce_mean(log_lh * self.reward_holder)
            else:
                indices = tf.range(0, tf.shape(self.probability)[0]) * tf.shape(self.probability)[1] + \
                          self.action_holder
                responsible_outputs = tf.gather(tf.reshape(self.probability, [-1]), indices)
                self.loss = - tf.reduce_mean(tf.multiply(tf.log(responsible_outputs), self.reward_holder), name='loss')

            self.get_gradients = tf.gradients(self.loss, self.t_vars)
            self.batch_update = optimizer.apply_gradients(zip(self.gradient_holders, self.t_vars))
    def __init__(self, hidden_1, hidden_2, hidden_3, input_dims, state_dims, learning_rate):
        self.hidden_1 = hidden_1
        self.hidden_2 = hidden_2
        self.hidden_3 = hidden_3
        self.input_dims = input_dims
        self.state_dims = state_dims
        self.learning_rate = learning_rate
        self.dtype = tf.float32

        self.previous_state = tf.placeholder(shape=[None, self.state_dims], dtype=self.dtype, name='model_input')
        self.true_observation = tf.placeholder(shape=[None, self.input_dims], dtype=self.dtype, name='true_obs')
        self.true_reward = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='true_reward')
        self.true_done = tf.placeholder(shape=[None, 1], dtype=self.dtype, name='true_done')
        self.keep_prob = tf.placeholder(dtype=self.dtype, name='keep_prob')

        # Define layers
        with tf.variable_scope('layer_1'):
            w_1 = tf.get_variable(name='weights', shape=[self.state_dims, self.hidden_1], dtype=self.dtype,
                                  initializer=xi())
            b_1 = tf.get_variable(name='biases', shape=[self.hidden_1], dtype=self.dtype,
                                  initializer=tf.constant_initializer(0.0))
            o_1 = tf.nn.relu(tf.nn.xw_plus_b(self.previous_state, w_1, b_1), name='output')
            d_1 = tf.nn.dropout(o_1, keep_prob=self.keep_prob)
        with tf.variable_scope('layer_2'):
            w_2 = tf.get_variable(name='weights', shape=[self.hidden_1, self.hidden_2], dtype=self.dtype,
                                  initializer=xi())
            b_2 = tf.get_variable(name='biases', shape=[self.hidden_2], dtype=self.dtype,
                                  initializer=tf.constant_initializer(0.0))
            o_2 = tf.nn.relu(tf.nn.xw_plus_b(d_1, w_2, b_2), name='output')
            d_2 = tf.nn.dropout(o_2, self.keep_prob)
        with tf.variable_scope('layer_3'):
            w_3 = tf.get_variable(name='weights', shape=[self.hidden_2, self.hidden_3], dtype=self.dtype,
                                  initializer=xi())
            b_3 = tf.get_variable(name='biases', shape=[self.hidden_3], dtype=self.dtype,
                                  initializer=tf.constant_initializer(0.0))
            o_3 = tf.nn.relu(tf.nn.xw_plus_b(d_2, w_3, b_3), name='output')
        with tf.variable_scope('prediction_layer'):
            w_obs = tf.get_variable(name='state_weight', shape=[self.hidden_3, self.input_dims], dtype=self.dtype,
                                    initializer=xi())
            b_obs = tf.get_variable(name='state_bias', shape=[self.input_dims], dtype=self.dtype,
                                    initializer=tf.constant_initializer(0.0))
            w_reward = tf.get_variable(name='reward_weight', shape=[self.hidden_3, 1], dtype=self.dtype,
                                       initializer=xi())
            b_reward = tf.get_variable(name='reward_bias', shape=[1], dtype=self.dtype,
                                       initializer=tf.constant_initializer(0.0))
            w_done = tf.get_variable(name='done_weight', shape=[self.hidden_3, 1], dtype=self.dtype,
                                     initializer=xi())
            b_done = tf.get_variable(name='done_bias', shape=[1], dtype=self.dtype,
                                     initializer=tf.constant_initializer(1.0))
            predicted_observation = tf.nn.xw_plus_b(o_3, w_obs, b_obs, name='observation_prediction')
            predicted_reward = tf.nn.xw_plus_b(o_3, w_reward, b_reward, name='reward_prediction')
            predicted_done = tf.nn.sigmoid(tf.nn.xw_plus_b(o_3, w_done, b_done, name='done_prediction'))
            self.predicted_state = tf.concat(values=[predicted_observation, predicted_reward, predicted_done],
                                             axis=1, name='state_prediction')

        # Get losses
        with tf.variable_scope('loss'):
            observation_loss = tf.square(tf.subtract(self.true_observation, predicted_observation),
                                         name='observation_loss')
            reward_loss = tf.square(tf.subtract(self.true_reward, predicted_reward), name='reward_loss')
            # Cross-entropy due to one-hot nature of the done-vector (1 if match, 0 otherwise)
            done_loss = tf.multiply(self.true_done, predicted_done) + tf.multiply(1 - self.true_done,
                                                                                  1 - predicted_done)
            done_loss = - tf.log(done_loss)
            self.loss = tf.reduce_mean(1.0 * observation_loss + 1.0 * reward_loss + 2.0 * done_loss,
                                       name='combined_loss')
            optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
            self.update_model = optimizer.minimize(loss=self.loss)