def __init__(self, is_training, length):
     self.batch_size = batch_size = FLAGS.batch_size
     self.num_steps = num_steps = length
     hidden_size = FLAGS.hidden_dim
     
     self._input_data = tf.placeholder(tf.float32, [batch_size, None, FLAGS.input_dim])
     self._targets = tf.placeholder(tf.float32, [batch_size, None, FLAGS.output_dim])
     
     if FLAGS.model == "rnn":
         vanilla_rnn_cell = rnn_cell.BasicRNNCell(num_units=FLAGS.hidden_dim)
         if is_training and FLAGS.keep_prob < 1:
             vanilla_rnn_cell = rnn_cell.DropoutWrapper(vanilla_rnn_cell, 
                                                        output_keep_prob=FLAGS.keep_prob)
         if FLAGS.layer == 1:
             cell = vanilla_rnn_cell
         elif FLAGS.layer == 2:
             cell = rnn_cell.MultiRNNCell([vanilla_rnn_cell] * 2)
     elif FLAGS.model == "lstm":
         lstm_cell = rnn_cell.BasicLSTMCell(num_units=FLAGS.hidden_dim,
                                            forget_bias=1.0)
         if is_training and FLAGS.keep_prob < 1:
             lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, 
                                                 output_keep_prob=FLAGS.keep_prob)
         if FLAGS.layer == 1:
             cell = lstm_cell
         elif FLAGS.layer == 2:
             cell = rnn_cell.MultiRNNCell([lstm_cell] * 2)
     elif FLAGS.model == "gru":
         gru_cell = rnn_cell.GRUCell(num_units=FLAGS.hidden_dim)
         if is_training and FLAGS.keep_prob < 1:
             gru_cell = rnn_cell.DropoutWrapper(gru_cell, 
                                                output_keep_prob=FLAGS.keep_prob)
         cell = gru_cell
     else:
         raise ValueError("Invalid model: %s", FLAGS.model)
     
     self._initial_state = cell.zero_state(batch_size, tf.float32)
     
     outputs = []
     state = self._initial_state
     with tf.variable_scope("RNN"):
         for time_step in range(num_steps):
             if time_step > 0:
                 tf.get_variable_scope().reuse_variables()
             (cell_output, state) = cell(self._input_data[:, time_step, :], state)
             outputs.append(cell_output)
     self._final_state = state
     
     hidden_output = tf.reshape(tf.concat(1, outputs), [-1, hidden_size])
     
     V_1 = tf.get_variable("v_1", shape=[hidden_size, FLAGS.output_dim],
       initializer=tf.random_uniform_initializer(-tf.sqrt(1./hidden_size),tf.sqrt(1./hidden_size)))
     b_1 = tf.get_variable("b_1", shape=[FLAGS.output_dim], initializer=tf.constant_initializer(0.1))
     logits = tf.add(tf.matmul(hidden_output, V_1), b_1)
     
     target = tf.reshape(self._targets, [-1, FLAGS.output_dim])
     training_loss = tf.reduce_sum(tf.pow(logits-target, 2)) / 2        
     mse = tf.reduce_mean(tf.pow(logits-target, 2))        
     self._cost = mse
     
     if not is_training:
         return
     
     self._lr = tf.Variable(0.0, trainable=False)
     tvars = tf.trainable_variables()
     grads, _ = tf.clip_by_global_norm(tf.gradients(training_loss, tvars), FLAGS.max_grad_norm)
     optimizer = tf.train.GradientDescentOptimizer(self.lr)
     self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Esempio n. 2
0
    def __init__(self):
        # Input
        self.point = tf.placeholder(tf.float32, [m, 1],
                                    'points')  # Used in training only
        self.variances = tf.placeholder(tf.float32, [k, 1], 'variances')
        self.weights = tf.placeholder(tf.float32, [k, 1], 'weights')
        self.hyperplanes = tf.placeholder(
            tf.float32, [m, m, k],
            'hyperplanes')  # Points which define the hyperplanes

        if rnn_type == 'lstm':
            self.initial_rnn_state = tf.placeholder_with_default(
                input=tf.zeros([m, 2 * num_rnn_layers * rnn_size]),
                shape=[None, 2 * num_rnn_layers * rnn_size])
        else:
            # initial_rnn_state is passed during evaluation but not during training
            # each dimension has an independent hidden state, required in order to simulate Adam, RMSProp etc.
            self.initial_rnn_state = tf.placeholder_with_default(
                input=tf.zeros([m, num_rnn_layers * rnn_size]),
                shape=[None, num_rnn_layers * rnn_size])

        # The scope allows these variables to be excluded from being reinitialized during the comparison phase
        with tf.variable_scope("optimizer"):
            if rnn_type == 'rnn':
                cell = rnn_cell.BasicRNNCell(rnn_size)
            elif rnn_type == 'gru':
                cell = rnn_cell.GRUCell(rnn_size)
            elif rnn_type == 'lstm':
                cell = rnn_cell.LSTMCell(rnn_size)

            self.cell = rnn_cell.MultiRNNCell([cell] * num_rnn_layers)

            updates = []
            snf_losses = []

            # Arguments passed to the condition and body functions
            time = tf.constant(0)
            point = self.point

            snf_loss = snf.calc_snf_loss_tf(point, self.hyperplanes,
                                            self.variances, self.weights)
            snf_losses.append(snf_loss)
            snf_grads = snf.calc_grads_tf(snf_loss, point)
            snf_grads = tf.squeeze(snf_grads, [0])

            snf_loss_ta = tf.TensorArray(dtype=tf.float32, size=seq_length)
            update_ta = tf.TensorArray(dtype=tf.float32, size=seq_length)
            rnn_state = tf.zeros([m, rnn_size * num_rnn_layers])

            loop_vars = [
                time, point, snf_grads, rnn_state, snf_loss_ta, update_ta,
                self.hyperplanes, self.variances, self.weights
            ]

            def condition(time, point, snf_grads, rnn_state, snf_loss_ta,
                          update_ta, hyperplanes, variances, weights):
                return tf.less(time, seq_length)

            def body(time, point, snf_grads, rnn_state, snf_loss_ta, update_ta,
                     hyperplanes, variances, weights):

                h, rnn_state_out = self.cell(snf_grads, rnn_state)

                # Final layer of the optimizer
                # Cannot use fc_layer due to a 'must be from the same frame' error
                d = np.sqrt(1.0) / np.sqrt(
                    rnn_size + 1)  ### should be sqrt(2, 3 or 6?)
                initializer = tf.random_uniform_initializer(-d, d)
                W = tf.get_variable("W", [rnn_size, 1],
                                    initializer=initializer)

                # No bias, linear activation function
                update = tf.matmul(h, W)
                update = tf.reshape(update, [m, 1])
                update = inv_scale_grads(update)

                new_point = point + update

                snf_loss = snf.calc_snf_loss_tf(new_point, hyperplanes,
                                                variances, weights)
                snf_losses.append(snf_loss)

                snf_loss_ta = snf_loss_ta.write(time, snf_loss)
                update_ta = update_ta.write(time, update)

                snf_grads_out = snf.calc_grads_tf(snf_loss, point)
                snf_grads_out = tf.reshape(snf_grads_out, [m, 1])

                time += 1
                return [
                    time, new_point, snf_grads_out, rnn_state_out, snf_loss_ta,
                    update_ta, hyperplanes, variances, weights
                ]

            # Do the computation
            with tf.variable_scope("o1"):
                res = tf.while_loop(condition, body, loop_vars)

            self.new_point = res[1]
            self.rnn_state_out = res[3]
            losses = res[4].pack()
            updates = res[5].pack()

            # Total change in the SNF loss
            # Improvement: 2 - 3 = -1 (small loss)
            snf_loss_change = losses[seq_length - 1] - losses[0]
            snf_loss_change = tf.maximum(snf_loss_change, loss_asymmetry *
                                         snf_loss_change)  # Asymmetric loss
            self.loss_change_sign = tf.sign(snf_loss_change)

            # Oscillation cost
            overall_update = tf.zeros([m, 1])
            norm_sum = 0.0

            for i in range(seq_length):
                overall_update += updates[i, :, :]
                norm_sum += tf_norm(updates[i, :, :])

            osc_cost = norm_sum / tf_norm(overall_update)  # > 1

            self.total_loss = snf_loss_change * tf.pow(
                osc_cost, tf.sign(snf_loss_change))

            #===# Model training #===#
            #opt = tf.train.RMSPropOptimizer(0.01,momentum=0.5)
            opt = tf.train.AdamOptimizer()
            vars = tf.trainable_variables()

            gvs = opt.compute_gradients(self.total_loss, vars)

            self.gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var)
                        for (grad, var) in gvs]

            self.grads_input = [(tf.placeholder(tf.float32,
                                                shape=v.get_shape()), v)
                                for (g, v) in gvs]
            self.train_step = opt.apply_gradients(self.grads_input)

            #===# Comparison code #===#
            self.input_grads = tf.placeholder(
                tf.float32, [1, None, 1],
                'input_grads')  ### Remove first dimension?
            input_grads = tf.squeeze(self.input_grads, [0])

            with tf.variable_scope("o1", reuse=True) as scope:
                h, self.rnn_state_out_compare = self.cell(
                    input_grads, self.initial_rnn_state)

                W = tf.get_variable("W")
                update = tf.matmul(h, W)

                update = tf.reshape(update, [-1, 1])
                self.update = inv_scale_grads(update)