예제 #1
0
def get_model(input_vocab_size, output_vocab_size):
    return Seq2Seq(input_vocab_size, output_vocab_size,
                   get_buckets(FLAGS.buckets), FLAGS.layer_size,
                   FLAGS.n_layers, FLAGS.max_gradient_norm, FLAGS.batch_size,
                   FLAGS.learning_rate, FLAGS.learning_rate_decay_factor,
                   get_rnn_cell(FLAGS.rnn_cell), FLAGS.n_samples,
                   FLAGS.forward_only)
예제 #2
0
    def _define_embedings_(self):
        with tf.variable_scope("words"):
            _word_embeddings = tf.Variable(
                self.embeddings,
                name="_word_embeddings",
                dtype=tf.float32,
                trainable=self.config.train_embeddings)
            word_embeddings = tf.nn.embedding_lookup(_word_embeddings,
                                                     self.word_ids,
                                                     name="word_embeddings")

        with tf.variable_scope("chars"):
            _char_embeddings = tf.get_variable(
                name="_char_embeddings",
                dtype=tf.float32,
                shape=[self.nchars, self.config.dim_char])
            char_embeddings = tf.nn.embedding_lookup(_char_embeddings,
                                                     self.char_ids,
                                                     name="char_embeddings")

            s = tf.shape(char_embeddings)
            char_embeddings = tf.reshape(
                char_embeddings, shape=[-1, s[-2], self.config.dim_char])
            word_lengths = tf.reshape(self.word_lengths, shape=[-1])

            cell_fw = get_rnn_cell(self.config.char_rnn_size,
                                   "LSTM",
                                   state_is_tuple=True)
            cell_bw = get_rnn_cell(self.config.char_rnn_size,
                                   "LSTM",
                                   state_is_tuple=True)

            _, ((_, output_fw), (_,
                                 output_bw)) = tf.nn.bidirectional_dynamic_rnn(
                                     cell_fw,
                                     cell_bw,
                                     char_embeddings,
                                     sequence_length=word_lengths,
                                     dtype=tf.float32)

            output = tf.concat([output_fw, output_bw], axis=-1)
            output = tf.reshape(
                output, shape=[-1, s[1], 2 * self.config.char_rnn_size])

            word_embeddings = tf.concat([word_embeddings, output], axis=-1)
        self.word_embeddings = tf.nn.dropout(word_embeddings, self.dropout)
예제 #3
0
    def _define_logits_(self):
        with tf.variable_scope("bi-lstm"):
            cell_fw = get_rnn_cell(self.config.word_rnn_size, "LSTM")
            cell_bw = get_rnn_cell(self.config.word_rnn_size, "LSTM")
            (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                self.word_embeddings,
                sequence_length=self.sequence_lengths,
                dtype=tf.float32)
            output = tf.concat([output_fw, output_bw], axis=-1)

        ntime_steps = tf.shape(output)[1]
        output = tf.reshape(output, [-1, 2 * self.config.word_rnn_size])
        pred_dim = 1 if self.config.model == "lstm_sig" else self.ntags
        pred = create_feedforward(output, 2 * self.config.word_rnn_size,
                                  pred_dim, self.create_initializer, "linear",
                                  "projection")

        if self.config.model == "lstm_sig":
            pred = tf.sigmoid(pred)
            self.logits = tf.reshape(pred, [-1, ntime_steps])
        else:
            self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])
예제 #4
0
    def __init__(self,
                 input_vocab_size,
                 output_vocab_size,
                 buckets=get_buckets(DEFAULT_BUCKETS_STRING),
                 layer_size=DEFAULT_LAYER_SIZE,
                 n_layers=DEFAULT_N_LAYERS,
                 max_gradient_norm=DEFAULT_MAX_GRADIENT_NORM,
                 batch_size=DEFAULT_BATCH_SIZE,
                 learning_rate=DEFAULT_LEARNING_RATE,
                 learning_rate_decay_factor=DEFAULT_LEARNING_RATE_DECAY_FACTOR,
                 rnn_cell=get_rnn_cell(DEFAULT_RNN_CELL),
                 n_samples=DEFAULT_N_SAMPLES,
                 forward_only=DEFAULT_FORWARD_ONLY):
        logging.info('initializing Seq2Seq model')
        buckets = get_sorted_buckets(buckets)

        self.input_vocab_size = input_vocab_size
        self.output_vocab_size = output_vocab_size
        self.buckets = buckets
        self.layer_size = layer_size
        self.n_layers = n_layers
        self.max_gradient_norm = max_gradient_norm
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.learning_rate_decay_factor = learning_rate_decay_factor
        self.rnn_cell = rnn_cell
        self.n_samples = n_samples
        self.forward_only = forward_only

        logging.debug('saving params')
        self._save_params()

        logging.debug('assigning learning rate')
        self.learning_rate = tf.Variable(float(self.learning_rate),
                                         trainable=False)
        self.learning_rate_decay_operation = self.learning_rate.assign(
            self.learning_rate * self.learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        logging.debug('creating placeholders')
        self.encoder_inputs = [
            self._get_val_placeholder('encoder', i)
            for i in xrange(buckets[-1][0])
        ]

        self.decoder_inputs = [
            self._get_val_placeholder('decoder', i)
            for i in xrange(buckets[-1][1] + 1)
        ]
        self.target_weights = [
            self._get_val_placeholder('weight', i, dtype=tf.float32)
            for i in xrange(buckets[-1][1] + 1)
        ]

        self.outputs, self.losses = self._get_model_with_buckets()

        logging.debug('building saver')
        self.saver = tf.train.Saver(tf.global_variables())

        if not forward_only:
            out = self._get_gradient_norms_and_updates()
            self.gradient_norms, self.updates = out

        self.embedding_operation = self._get_embedding_operation()