def get_model(input_vocab_size, output_vocab_size): return Seq2Seq(input_vocab_size, output_vocab_size, get_buckets(FLAGS.buckets), FLAGS.layer_size, FLAGS.n_layers, FLAGS.max_gradient_norm, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.learning_rate_decay_factor, get_rnn_cell(FLAGS.rnn_cell), FLAGS.n_samples, FLAGS.forward_only)
def _define_embedings_(self): with tf.variable_scope("words"): _word_embeddings = tf.Variable( self.embeddings, name="_word_embeddings", dtype=tf.float32, trainable=self.config.train_embeddings) word_embeddings = tf.nn.embedding_lookup(_word_embeddings, self.word_ids, name="word_embeddings") with tf.variable_scope("chars"): _char_embeddings = tf.get_variable( name="_char_embeddings", dtype=tf.float32, shape=[self.nchars, self.config.dim_char]) char_embeddings = tf.nn.embedding_lookup(_char_embeddings, self.char_ids, name="char_embeddings") s = tf.shape(char_embeddings) char_embeddings = tf.reshape( char_embeddings, shape=[-1, s[-2], self.config.dim_char]) word_lengths = tf.reshape(self.word_lengths, shape=[-1]) cell_fw = get_rnn_cell(self.config.char_rnn_size, "LSTM", state_is_tuple=True) cell_bw = get_rnn_cell(self.config.char_rnn_size, "LSTM", state_is_tuple=True) _, ((_, output_fw), (_, output_bw)) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, char_embeddings, sequence_length=word_lengths, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.reshape( output, shape=[-1, s[1], 2 * self.config.char_rnn_size]) word_embeddings = tf.concat([word_embeddings, output], axis=-1) self.word_embeddings = tf.nn.dropout(word_embeddings, self.dropout)
def _define_logits_(self): with tf.variable_scope("bi-lstm"): cell_fw = get_rnn_cell(self.config.word_rnn_size, "LSTM") cell_bw = get_rnn_cell(self.config.word_rnn_size, "LSTM") (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) ntime_steps = tf.shape(output)[1] output = tf.reshape(output, [-1, 2 * self.config.word_rnn_size]) pred_dim = 1 if self.config.model == "lstm_sig" else self.ntags pred = create_feedforward(output, 2 * self.config.word_rnn_size, pred_dim, self.create_initializer, "linear", "projection") if self.config.model == "lstm_sig": pred = tf.sigmoid(pred) self.logits = tf.reshape(pred, [-1, ntime_steps]) else: self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])
def __init__(self, input_vocab_size, output_vocab_size, buckets=get_buckets(DEFAULT_BUCKETS_STRING), layer_size=DEFAULT_LAYER_SIZE, n_layers=DEFAULT_N_LAYERS, max_gradient_norm=DEFAULT_MAX_GRADIENT_NORM, batch_size=DEFAULT_BATCH_SIZE, learning_rate=DEFAULT_LEARNING_RATE, learning_rate_decay_factor=DEFAULT_LEARNING_RATE_DECAY_FACTOR, rnn_cell=get_rnn_cell(DEFAULT_RNN_CELL), n_samples=DEFAULT_N_SAMPLES, forward_only=DEFAULT_FORWARD_ONLY): logging.info('initializing Seq2Seq model') buckets = get_sorted_buckets(buckets) self.input_vocab_size = input_vocab_size self.output_vocab_size = output_vocab_size self.buckets = buckets self.layer_size = layer_size self.n_layers = n_layers self.max_gradient_norm = max_gradient_norm self.batch_size = batch_size self.learning_rate = learning_rate self.learning_rate_decay_factor = learning_rate_decay_factor self.rnn_cell = rnn_cell self.n_samples = n_samples self.forward_only = forward_only logging.debug('saving params') self._save_params() logging.debug('assigning learning rate') self.learning_rate = tf.Variable(float(self.learning_rate), trainable=False) self.learning_rate_decay_operation = self.learning_rate.assign( self.learning_rate * self.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) logging.debug('creating placeholders') self.encoder_inputs = [ self._get_val_placeholder('encoder', i) for i in xrange(buckets[-1][0]) ] self.decoder_inputs = [ self._get_val_placeholder('decoder', i) for i in xrange(buckets[-1][1] + 1) ] self.target_weights = [ self._get_val_placeholder('weight', i, dtype=tf.float32) for i in xrange(buckets[-1][1] + 1) ] self.outputs, self.losses = self._get_model_with_buckets() logging.debug('building saver') self.saver = tf.train.Saver(tf.global_variables()) if not forward_only: out = self._get_gradient_norms_and_updates() self.gradient_norms, self.updates = out self.embedding_operation = self._get_embedding_operation()