def get_project(sess): target_size = self.target_size if self.target_size is not None else self.vocab_size with tf.variable_scope('project', reuse=True): project_w = tf.get_variable( "project_w", [self.cell_list[-1].output_size, target_size], dtype=data_type()) projcet_b = tf.get_variable("project_b", [target_size], dtype=data_type()) return sess.run([project_w, projcet_b])
def build_rnn(rnn_config): """ Build an instance of RNN from config :param rnn_config: a RNNConfig instance :return: a compiled model """ assert isinstance(rnn_config, RNNConfig) try: word_to_id = get_dataset(rnn_config.dataset, ['word_to_id'])['word_to_id'] except: raise NoDataError _rnn = RNN(rnn_config.name, rnn_config.initializer, graph=tf.Graph(), word_to_id=word_to_id) _rnn.set_input([None], rnn_config.input_dtype, rnn_config.vocab_size, rnn_config.embedding_size) for cell in rnn_config.cells: _rnn.add_cell(rnn_config.cell, **cell) _rnn.set_output([None, rnn_config.vocab_size], data_type(), rnn_config.use_last_output) _rnn.set_target([None], rnn_config.target_dtype, rnn_config.target_size) _rnn.set_loss_func(rnn_config.loss_func) _rnn.compile() return _rnn
def project_output(self, outputs): """ Project outputs into a distribution with same dimensions as the targets :param outputs: a tensor of shape (batch_size, output_size) :return: softmax distributions on vocab_size, a Tensor of shape (batch_size, vocab_size) """ if self.has_project: target_size = self.target_size if self.target_size is not None else self.vocab_size with tf.variable_scope('project', initializer=self.initializer): project_w = tf.get_variable( "project_w", [self.cell_list[-1].output_size, target_size], dtype=data_type()) projcet_b = tf.get_variable("project_b", [target_size], dtype=data_type()) return tf.matmul(outputs, project_w) + projcet_b else: return None
def get_embedding(sess): with tf.variable_scope(self.name, reuse=True, initializer=self.initializer): with tf.variable_scope('embedding'): with tf.device( "/cpu:0" ): # Force CPU since GPU implementation is missing embedding = tf.get_variable( "embedding", [self.vocab_size + 1, self.embedding_size], dtype=data_type()) return embedding.eval(sess)
def map_to_embedding(self, inputs): """ Map the input ids into embedding :param inputs: a 2D Tensor of shape (num_steps, batch_size) of type int32, denoting word ids :return: a 3D Tensor of shape (num_Steps, batch_size, embedding_size) of type float32. """ if self.has_embedding: # The Variables are already created in the compile(), need to with tf.variable_scope('embedding', initializer=self.initializer): with tf.device( "/cpu:0" ): # Force CPU since GPU implementation is missing embedding = tf.get_variable( "embedding", [self.vocab_size + 1, self.embedding_size], dtype=data_type()) return tf.nn.embedding_lookup(embedding, inputs) else: return None
def __init__(self, rnn, batch_size, num_steps, keep_prob=None, name=None, dynamic=True): """ Create an unrolled rnn model with TF tensors :param rnn: :param batch_size: :param num_steps: :param keep_prob: :param name: """ assert isinstance(rnn, RNN) self._rnn = rnn self._cell = rnn.cell self.batch_size = batch_size self.num_steps = num_steps self.name = name or "UnRolled" self.dynamic = dynamic self.current_state = None # Ugly hacks for DropoutWrapper if keep_prob is not None and keep_prob < 1.0: cell_list = [ DropOutWrapper(cell, output_keep_prob=keep_prob) for cell in rnn.cell_list ] self._cell = MultiRNNCell(cell_list, state_is_tuple=True) # The abstract name scope is not that easily dealt with, try to make the transparent to user with tf.name_scope(name): reuse = rnn.need_reuse with tf.variable_scope(rnn.name, reuse=reuse, initializer=rnn.initializer): # Build TF computation Graph input_shape = [batch_size] + [num_steps] + list( rnn.input_shape)[1:] # self.input_holders = tf.placeholder(rnn.input_dtype, input_shape, "input_holders") zero_initializer = tf.constant_initializer( value=0, dtype=rnn.input_dtype) self.input_holders = tf.Variable( zero_initializer(shape=input_shape), trainable=False, collections=_input_and_global, name='input_holders') # self.input_holders = tf.Variable(np.zeros(input_shape)) # self.batch_size = tf.shape(self.input_holders)[0] self.state = self.cell.zero_state(self.batch_size, rnn.output_dtype) # ugly hacking for EmbeddingWrapper Badness self.inputs = self.input_holders if not rnn.has_embedding \ else rnn.map_to_embedding(self.input_holders+1) if keep_prob is not None and keep_prob < 1.0: self.inputs = tf.nn.dropout(self.inputs, keep_prob) # Call TF api to create recurrent neural network self.input_length = sequence_length(self.inputs) if dynamic: self.outputs, self.final_state = \ tf.nn.dynamic_rnn(self.cell, self.inputs, sequence_length=self.input_length, initial_state=self.state, dtype=data_type(), time_major=False) else: inputs = [self.inputs[:, i] for i in range(num_steps)] # Since we do not want it to be dynamic, sequence length is not fed, # so that evaluator can fetch gate tensor values. outputs, self.final_state = \ tf.nn.rnn(self.cell, inputs, initial_state=self.state, dtype=data_type()) self.outputs = tf.stack(outputs, axis=1) if rnn.use_last_output: self.outputs = last_relevant(self.outputs, self.input_length) target_shape = [batch_size] + list(rnn.target_shape)[1:] else: target_shape = [batch_size] + [num_steps] + list( rnn.target_shape)[1:] # self.target_holders = tf.placeholder(rnn.target_dtype, target_shape, "target_holders") zero_initializer = tf.constant_initializer( value=0, dtype=rnn.target_dtype) self.target_holders = tf.Variable( zero_initializer(shape=target_shape), trainable=False, collections=_input_and_global, name='target_holders') if rnn.has_project: # Reshape outputs and targets into [batch_size * num_steps, feature_dims] outputs = tf.reshape( self.outputs, [-1, self.outputs.get_shape().as_list()[-1]]) targets = tf.reshape(self.target_holders, [-1]) # rnn has output project, do manual projection for speed self.projected_outputs = rnn.project_output(outputs) self.loss = rnn.loss_func(self.projected_outputs, targets) self.accuracy = tf.reduce_mean( tf.cast( tf.nn.in_top_k(self.projected_outputs, targets, 1), data_type())) else: self.projected_outputs = tf.reshape( self.outputs, [-1, self.outputs.get_shape().as_list()[-1]]) self.loss = rnn.loss_func(self.outputs, self.target_holders) self.accuracy = tf.reduce_mean( tf.cast( tf.nn.in_top_k(self.outputs, self.target_holders, 1), data_type())) # Append self to rnn's model list rnn.models.append(self)