예제 #1
0
def seq2seq_f(cell, encoder_inputs, decoder_inputs, loop_output):
    ''' 
    The seq2seq neural network structurei
    
    Args: 
        cell: the RNNCell object
        encoder_inputs: a list of Tensors to feed the encoder
        decoder_inputs: a list of Tensors to feed the decoder
        loop_output: True for using the loop_func to construct the next 
            decoder_input element using the previous output element

    Returns:
        outputs: a list of Tensors generated by the decoder
        states: the hidden states at the final step of the encoder
    '''
    if loop_output:
        def loop_func(prev, i):
        # simplest construction: using the previous output as the next input
            return prev
        # use rnn() directly for modified decoder.
        _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=tf.float32)
        # note that the returned states are all hidden states, not just the last one
        outputs,states = seq2seq.rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_func)
    else:
        # using the given decoder inputs
        outputs,states = seq2seq.basic_rnn_seq2seq(
                 encoder_inputs, decoder_inputs, cell)

    # one way to bound the output in [-1,1]. but not used.
#            for x in outputs:
#                x = tf.tanh(x)
#  print(states)
    
    # the output states is just the last element of all hidden states
    return outputs,states
예제 #2
0
    def __init__(self, vocab_size, sequence_length, num_units,
        max_gradient_norm, batch_size, learning_rate,
        learning_rate_decay_factor):
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        w = training.utils.gaussian_weights_variable([num_units, self.vocab_size])
        b = tf.Variable(tf.zeros([self.vocab_size]))

        lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for _ in range(sequence_length):
            self.encoder_inputs.append(tf.placeholder(
                tf.float32, shape=(batch_size, self.vocab_size)))
            self.decoder_inputs.append(tf.placeholder(
                tf.float32, shape=(batch_size, self.vocab_size)))
            self.target_weights.append(tf.placeholder(
                tf.float32, shape=(batch_size,)))

        # Decoder has one extra cell because it starts with the GO symbol,
        # and the targets are shifted by one.
        # Not sure this is actually useful, as it is always set to 0.
        # As this is inspired by TensorFlow seq2seq models, there might be
        # something dodgy in there.
        self.decoder_inputs.append(tf.placeholder(
            tf.float32, shape=(batch_size, self.vocab_size)))
        self.target_weights.append(np.ones((batch_size,)))

        # Targets used by the sequence loss must be integer indices.
        targets = [tf.cast(tf.argmax(i, 1), dtype=tf.int32)
            for i in self.decoder_inputs[1:]]

        outputs, self.state = seq2seq.basic_rnn_seq2seq(
            self.encoder_inputs, self.decoder_inputs, lstm_cell)

        self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs]
        self.loss = seq2seq.sequence_loss(self.logits[:self.sequence_length],
            targets, self.target_weights[:self.sequence_length],
            self.vocab_size)

        params = tf.trainable_variables()
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, self.gradient_norms = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.updates = opt.apply_gradients(
            zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
예제 #3
0
  def testBasicRNNSeq2Seq(self):
    with self.test_session() as sess:
      with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)):
        inp = [tf.constant(0.5, shape=[2, 2]) for _ in xrange(2)]
        dec_inp = [tf.constant(0.4, shape=[2, 2]) for _ in xrange(3)]
        cell = rnn_cell.OutputProjectionWrapper(rnn_cell.GRUCell(2), 4)
        dec, mem = seq2seq.basic_rnn_seq2seq(inp, dec_inp, cell)
        sess.run([tf.initialize_all_variables()])
        res = sess.run(dec)
        self.assertEqual(len(res), 3)
        self.assertEqual(res[0].shape, (2, 4))

        res = sess.run(mem)
        self.assertEqual(len(res), 4)
        self.assertEqual(res[0].shape, (2, 2))
예제 #4
0
weights = [tf.ones_like(labels_t, dtype=tf.float32)
           for labels_t in labels]

# Decoder input: prepend some "GO" token and drop the final
# token of the encoder input
dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.float32, name="GO")]
           + enc_inp[:-1])

# Initial memory value for recurrence.
#prev_mem = tf.zeros((batch_size, memory_dim))

print("shapes", np.array(enc_inp).shape, np.array(dec_inp).shape, np.array(labels).shape)
cell = rnn_cell.GRUCell(memory_dim)

dec_outputs, dec_memory = seq2seq.basic_rnn_seq2seq(
    enc_inp, dec_inp, cell)

labels_t = tf.reshape(labels, [5,100])
print(labels_t)
print(dec_outputs)
loss = seq2seq.sequence_loss(dec_outputs, labels_t, weights, vocab_size)
tf.scalar_summary("loss", loss)
#magnitude = tf.sqrt(tf.reduce_sum(tf.square(dec_memory[1])))
#tf.scalar_summary("magnitude at t=1", magnitude)
summary_op = tf.merge_all_summaries()


learning_rate = 0.05
momentum = 0.9
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
train_op = optimizer.minimize(loss)
예제 #5
0
    def __init__(self, vocab_size, sequence_length, num_units,
                 max_gradient_norm, batch_size, learning_rate,
                 learning_rate_decay_factor):
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        w = training.utils.gaussian_weights_variable(
            [num_units, self.vocab_size])
        b = tf.Variable(tf.zeros([self.vocab_size]))

        lstm_cell = rnn_cell.LSTMCell(num_units, vocab_size)

        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        for _ in range(sequence_length):
            self.encoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(batch_size, self.vocab_size)))
            self.decoder_inputs.append(
                tf.placeholder(tf.float32,
                               shape=(batch_size, self.vocab_size)))
            self.target_weights.append(
                tf.placeholder(tf.float32, shape=(batch_size, )))

        # Decoder has one extra cell because it starts with the GO symbol,
        # and the targets are shifted by one.
        # Not sure this is actually useful, as it is always set to 0.
        # As this is inspired by TensorFlow seq2seq models, there might be
        # something dodgy in there.
        self.decoder_inputs.append(
            tf.placeholder(tf.float32, shape=(batch_size, self.vocab_size)))
        self.target_weights.append(np.ones((batch_size, )))

        # Targets used by the sequence loss must be integer indices.
        targets = [
            tf.cast(tf.argmax(i, 1), dtype=tf.int32)
            for i in self.decoder_inputs[1:]
        ]

        outputs, self.state = seq2seq.basic_rnn_seq2seq(
            self.encoder_inputs, self.decoder_inputs, lstm_cell)

        self.logits = [tf.nn.xw_plus_b(o, w, b) for o in outputs]
        self.loss = seq2seq.sequence_loss(
            self.logits[:self.sequence_length], targets,
            self.target_weights[:self.sequence_length], self.vocab_size)

        params = tf.trainable_variables()
        opt = tf.train.GradientDescentOptimizer(self.learning_rate)
        gradients = tf.gradients(self.loss, params)
        clipped_gradients, self.gradient_norms = tf.clip_by_global_norm(
            gradients, max_gradient_norm)
        self.updates = opt.apply_gradients(zip(clipped_gradients, params),
                                           global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
예제 #6
0
  def __init__(self, is_training, config):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    self.input_size = input_size = config.input_size
    self.num_classes = num_classes = config.num_classes
    self.vid_per_batch = config.vid_per_batch
    size = config.hidden_size
    self.cls_weight = config.cls_weight
    self.bbox_weight = config.bbox_weight
    self.ending_weight = config.ending_weight
    self.iter_epoch = config.iter_epoch
    self.momentum = config.momentum

    # placeholders for inputs and outputs
    self._input_data = inputs = tf.placeholder(tf.float32, [batch_size, num_steps, input_size])
    self._cls_targets = tf.placeholder(tf.int32, [batch_size, num_steps])
    self._bbox_targets = tf.placeholder(tf.float32, [batch_size, num_steps, num_classes * 4])
    self._bbox_weights = tf.placeholder(tf.float32, [batch_size, num_steps, num_classes * 4])
    self._end_targets = tf.placeholder(tf.float32, [batch_size, num_steps])

    if is_training and config.keep_prob < 1:
      inputs = tf.nn.dropout(inputs, config.keep_prob)

    # original inputs: batch_size * input_size * num_steps
    # after process: num_steps * [batch_size, input_size]
    inputs = [tf.squeeze(input_, [1])
              for input_ in tf.split(1, num_steps, inputs)]


    self.type = config.type
    if self.type == 'residual':
      lstm_cell = ResLSTMCell(size)
    elif self.type == 'basic':
      lstm_cell = tf.models.rnn.rnn_cell.BasicLSTMCell(size)
    else:
      raise ValueError('Unknown LSTM cell type: {}.'.format(self.type))
    if is_training and config.keep_prob < 1:
      lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
          lstm_cell, output_keep_prob=config.keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

    # TODO: decide initial state
    self._initial_state = cell.zero_state(batch_size, tf.float32)

    outputs_rev, state = basic_rnn_seq2seq(inputs, inputs[::-1], cell)
    outputs = outputs_rev[::-1]
    # output: (num_steps * batch_size) * input_size
    output = tf.reshape(tf.concat(0, outputs), [-1, size])

    self._small_lr_vars = []
    # build losses
    # class score
    if config.cls_init:
      # use pre-trained weights to initilize
      with open(config.cls_init, 'rb') as f:
        log.info("Loading classificiation params from {}".format(config.cls_init))
        cls_w, cls_b = cPickle.load(f)
        softmax_w = tf.get_variable("softmax_w", initializer=tf.constant(cls_w))
        softmax_b = tf.get_variable("softmax_b", initializer=tf.constant(cls_b))
        self._small_lr_vars.append(softmax_w.name)
        self._small_lr_vars.append(softmax_b.name)
    else:
      softmax_w = tf.get_variable("softmax_w", [size, num_classes])
      softmax_b = tf.get_variable("softmax_b", [num_classes], initializer=tf.constant_initializer(0.))
    logits = tf.matmul(output, softmax_w) + softmax_b
    self._cls_scores = tf.nn.softmax(logits, name='cls_scores')
    # transpose cls_targets to make num_steps the leading axis
    cls_targets = tf.reshape(tf.transpose(self._cls_targets), [-1])
    loss_cls_score = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, cls_targets, name='loss_cls_score')
    self._cls_cost = cls_cost = tf.reduce_sum(loss_cls_score) / batch_size / num_steps

    # boudning box regression: L2 loss
    if config.bbox_init:
      with open(config.bbox_init, 'rb') as f:
        log.info("Loading bbox regression params from {}".format(config.bbox_init))
        bbox_w, bbox_b = cPickle.load(f)
      bbox_w = tf.get_variable("bbox_w", initializer=tf.constant(bbox_w))
      bbox_b = tf.get_variable("bbox_b", initializer=tf.constant(bbox_b))
      self._small_lr_vars.append(bbox_w.name)
      self._small_lr_vars.append(bbox_b.name)
    else:
      bbox_w = tf.get_variable("bbox_w", [size, num_classes * 4])
      bbox_b = tf.get_variable("bbox_b", [num_classes * 4])
    self._bbox_pred = bbox_pred = tf.matmul(output, bbox_w) + bbox_b
    # permute num_steps and batch_size
    bbox_targets = tf.reshape(tf.transpose(self._bbox_targets, (1, 0, 2)), [-1, 4 * num_classes])
    self._bbox_cost = bbox_cost = tf.nn.l2_loss(bbox_pred - bbox_targets) / batch_size / num_steps / 4.
    #self._bbox_cost = bbox_cost = tf.constant(0.)

    # ending signal
    end_w = tf.get_variable("end_w", [size, 1])
    end_b = tf.get_variable("end_b", [1], initializer=tf.constant_initializer(0.))
    end_pred = tf.matmul(output, end_w) + end_b
    end_targets = tf.reshape(tf.transpose(self._end_targets), [-1, 1])
    self._end_probs = tf.nn.sigmoid(end_pred, name='end_probs')
    loss_ending = tf.nn.sigmoid_cross_entropy_with_logits(end_pred, end_targets, name='loss_ending')
    self._end_cost = end_cost = tf.reduce_sum(loss_ending) / batch_size / num_steps

    self._cost = cost = cls_cost * self.cls_weight + bbox_cost * self.bbox_weight + end_cost * self.ending_weight
    self._final_state = state

    if not is_training:
      return

    self._lr = tf.Variable(1.0, trainable=False)
    tvars = tf.trainable_variables()
    n_tvars = []
    s_tvars = []
    for tvar in tvars:
        if tvar.name in self._small_lr_vars:
            s_tvars.append(tvar)
        else:
            n_tvars.append(tvar)
    s_grads, global_norm = tf.clip_by_global_norm(tf.gradients(cost, s_tvars),
                                      config.max_grad_norm)
    n_grads, global_norm = tf.clip_by_global_norm(tf.gradients(cost, n_tvars),
                                      config.max_grad_norm)
    n_optimizer = tf.train.MomentumOptimizer(self.lr, self.momentum)
    s_optimizer = tf.train.MomentumOptimizer(self.lr * 0.01, self.momentum)
    self._train_op = tf.group(
        n_optimizer.apply_gradients(zip(n_grads, n_tvars)),
        s_optimizer.apply_gradients(zip(s_grads, s_tvars)))
    self.global_norm = global_norm
예제 #7
0
weights = [tf.ones_like(labels_t, dtype=tf.float32)
                   for labels_t in labels]

# Decoder input: prepend some "GO" token and drop the final
# token of the encoder input
dec_inp = ([tf.zeros_like(enc_inp[0], dtype=np.float32, name="GO")]
                   + enc_inp[:-1])

# Initial memory value for recurrence.
prev_mem = tf.zeros((batch_size, memory_dim))

cell = rnn_cell.BasicLSTMCell(memory_dim)


#enc_inp = np.tile(enc_inp, 2).tolist()
logits, state = seq2seq.basic_rnn_seq2seq(
        enc_inp, dec_inp, cell)#, vocab_size, vocab_size)

for i, inp in enumerate(enc_inp):
    print(i, inp)
print("logits", logits)
print('labels', labels)
loss = seq2seq.sequence_loss(logits, labels, weights)
summary_op = tf.scalar_summary("loss", loss)

square = tf.square(state)
sum = tf.reduce_sum(square)
magnitude = tf.sqrt(sum)
tf.scalar_summary("magnitude at t=1", magnitude)

learning_rate = 0.05
momentum = 0.9