예제 #1
파일: RNNLM.py 프로젝트: yysherlock/pomelo
    def load_data(self, data_dir, debug=False):
        """Loads starter word-vectors and train/dev/test data. """
        train_fp = data_dir+"{}.txt".format('train')
        valid_fp = data_dir+"{}.txt".format('valid')
        test_fp = data_dir+"{}.txt".format('test')

        self.vocab = Vocab()
        self.encoded_train = np.array(
            [self.vocab.encode(word) for word in get_dataset(train_fp)],
        self.encoded_valid = np.array(
            [self.vocab.encode(word) for word in get_dataset(valid_fp)],
        self.encoded_test = np.array(
            [self.vocab.encode(word) for word in get_dataset(test_fp)],
        if debug:
            num_debug = 1024*3
            self.encoded_train = self.encoded_train[:num_debug]
            self.encoded_valid = self.encoded_valid[:num_debug]
            self.encoded_test = self.encoded_test[:num_debug]
    def add_placeholders(self):
      """Generate placeholder variables to represent the input tensors

      These placeholders are used as inputs by the rest of the model building
      code and will be fed data during training.  Note that when "None" is in a
      placeholder's shape, it's flexible

      self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input')
      self.labels_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Target')
      self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout')

    def add_embedding(self):
      """Add embedding layer.
        inputs: List of length num_steps, each of whose elements should be
                a tensor of shape (batch_size, embed_size).
      # The embedding lookup is currently only implemented for the CPU
      with tf.device('/cpu:0'):
        embedding = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size])
        inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder) # (data_size, num_steps, embed_size)
        inputs = [tf.squeeze(x,[1]) for x in tf.split(1, self.config.num_steps, inputs)] # Each element is (data_size, embed_size).
        return inputs

    def add_projection(self, rnn_outputs):
      """Adds a projection layer.
        rnn_outputs: List of length num_steps, each of whose elements should be
                     a tensor of shape (batch_size, hidden_size).
        outputs: List of length num_steps, each a tensor of shape
                 (batch_size, len(vocab))
      with tf.variable_scope('Softmax') as scope:
          U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)])
          b_2 = tf.get_variable('b_2', [len(self.vocab)])
          outputs = [tf.matmul(rnn_output, U) + b_2 for rnn_output in rnn_outputs] # Each  rnn_output is a hidden layer states
      return outputs

    def add_loss_op(self, output):
      """Adds loss ops to the computational graph.
      Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.
        output: A tensor of shape (None, self.vocab)
        loss: A 0-d tensor (scalar)
      all_ones_weights = [tf.ones([self.config.batch_size * self.config.num_steps])]
      # output is logits
      loss = sequence_loss([output], \
          [tf.reshape(self.labels_placeholder, [-1])],\
          all_ones_weights) # , len(self.vocab)
      return loss

    def add_training_op(self, loss):
      """Sets up the training Ops.
        loss: Loss tensor, from cross_entropy_loss.
        train_op: The Op for training.
      tf.scalar_summary("cost", loss)
      opt = tf.train.AdamOptimizer(learning_rate=self.config.lr)
      global_step = tf.Variable(0, name='global_step', trainable=False)
      train_op = opt.minimize(loss,global_step=global_step)
      return train_op

    def __init__(self, config):
        self.config = config
        data_dir = config.data_dir
        self.inputs = self.add_embedding()
        self.rnn_outputs = self.add_model(self.inputs)
        self.outputs = self.add_projection(self.rnn_outputs)

        self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs]
        output = tf.reshape(tf.concat(1, self.outputs), [-1,len(self.vocab)])
        self.calculate_loss = self.add_loss_op(output)
        self.train_step = self.add_training_op(self.calculate_loss)

    def add_model(self, inputs):
        """Creates the RNN LM model.

          inputs: List of length num_steps, each of whose elements should be
                  a tensor of shape (batch_size, embed_size).
          outputs: List of length num_steps, each of whose elements should be
                   a tensor of shape (batch_size, hidden_size)
        with tf.variable_scope('RNN') as scope:
            self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size])
            hidden_state = self.initial_state
            rnn_outputs = []
            for tstep,rnn_input in enumerate(inputs):
                if tstep > 0: scope.reuse_variables()
                H = tf.get_variable('H', [self.config.hidden_size, self.config.hidden_size]) # Wh
                I = tf.get_variable('I', [self.config.embed_size, self.config.hidden_size]) # Wx
                b1 = tf.get_variable('b1', [self.config.hidden_size])
                rnn_input = tf.nn.dropout(rnn_input, self.dropout_placeholder)
                hidden_state = tf.nn.tanh( tf.matmul( rnn_input, I) + b1) + tf.nn.tanh(tf.matmul(hidden_state, H))
                output = tf.nn.dropout(hidden_state, self.dropout_placeholder)

        self.final_state = rnn_outputs[-1]

        return rnn_outputs

    def run_epoch(self, session, data, train_op=None, verbose=10):
      config = self.config
      dp = config.dropout
      if not train_op:
        train_op = tf.no_op()
        dp = 1
      batch_size = config.batch_size
      print('batch_size:', batch_size)
      data_len = len(data)
      batch_len = data_len // batch_size
      epoch_size = (batch_len - 1) // config.num_steps
      print('epoch_size:', epoch_size)
      total_steps = sum(1 for x in rnn_data_iterator(data, config.batch_size, config.num_steps))

      total_loss = []
      state = self.initial_state.eval()
      for step, (x, y) in enumerate(
        rnn_data_iterator(data, config.batch_size, config.num_steps)):
        # We need to pass in the initial state and retrieve the final state to give
        # the RNN proper history
        feed = {self.input_placeholder: x,
                self.labels_placeholder: y,
                self.initial_state: state,
                self.dropout_placeholder: dp}
        loss, state, _ = session.run(
            [self.calculate_loss, self.final_state, train_op], feed_dict=feed)
        if verbose and step % verbose == 0:
            sys.stdout.write('\r{} / {} : pp = {}'.format(
                step, total_steps, np.exp(np.mean(total_loss))))
      if verbose:
      return np.exp(np.mean(total_loss))