def test_dynamic_bigru_output_consumed_only(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") gru_list = [] if True: # bigru, no scope cell1 = rnn.GRUBlockCell( units) cell2 = rnn.GRUBlockCell( units) outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell1, cell2, x, dtype=tf.float32) gru_list.append(outputs) _ = tf.identity(outputs, name="output") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3)
def test_dynamic_bigru_state_consumed_only(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # bigru, no scope cell1 = rnn.GRUBlockCell(units) cell2 = rnn.GRUBlockCell(units) _, cell_state = tf.nn.bidirectional_dynamic_rnn(cell1, cell2, x, dtype=tf.float32) _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def bigru_layer(self): embed_ = tf.nn.embedding_lookup(self.embeddings, self.x_input) with tf.variable_scope("char_bigru"): lstm_fw_cell = rnn.GRUBlockCell(self.lstm_dim) lstm_bw_cell = rnn.GRUBlockCell(self.lstm_dim) outputs, outputs_state = tf.nn.bidirectional_dynamic_rnn( lstm_fw_cell, lstm_bw_cell, embed_, dtype=tf.float32) x_in_ = tf.concat(outputs, axis=2) return x_in_
def test_single_dynamic_gru_seq_length_is_const(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.GRUBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32, sequence_length=[5]) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3, atol=1e-06, graph_validator=lambda g: check_gru_count(g, 1))
def test_single_dynamic_gru_ch_zero_state_initializer(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.GRUBlockCell( units) # defining initial state initial_state = cell.zero_state(batch_size, dtype=tf.float32) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, initial_state=initial_state, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03)
def lstmnet_link(input_tensor, output_tensor, Hin, pkeep, phase, reuse_weights): # input_tensor: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION] # output_tensor: [ BATCH_SIZE, DIMENSION ] # Hin: [ BATCH_SIZE, INTERNALSIZE*NLAYERS ] with tf.variable_scope('NeuralNet', reuse=tf.AUTO_REUSE) as scope: if reuse_weights: scope.reuse_variables() X = tf.reshape(input_tensor, [config.batch_size, config.link_size, config.dimension]) # X: [ BATCH_SIZE, LINK_SIZE, DIMENSION] cells = [rnn.GRUBlockCell(config.hidden_layer_size) for _ in range(config.hidden_layer_depth)] # "naive dropout" implementation dropcells = [rnn.DropoutWrapper(cell,input_keep_prob=pkeep) for cell in cells] multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False) multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep) # dropout for the softmax layer Yr, H = tf.nn.dynamic_rnn(multicell, X, dtype=tf.float32, initial_state=Hin) H = tf.identity(H, name='H') # just to give it a name # Yr: [ BATCH_SIZE, LINK_SIZE, INTERNALSIZE ] # H: [ BATCH_SIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence # Select last output. output = tf.transpose(Yr, [1, 0, 2]) # output: [ LINK_SIZE, BATCH_SIZE, DIMENSION ] last = tf.gather(output, int(output.get_shape()[0])-1) # last: [ BATCH_SIZE, DIMENSION ] # Last layer to evaluate INTERNALSIZE LSTM output to function values Y = layers.fully_connected(last, config.dimension, activation_fn=None, reuse=reuse_weights, scope='NeuralNet') # Y: [ BATCH_SIZE, DIMENSION ] return H, Y
def test_single_dynamic_gru_seq_length_is_not_const(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.], [5., 5.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") y_val = np.array([4, 3, 4, 5, 2, 1], dtype=np.int32) seq_length = tf.placeholder(tf.int32, y_val.shape, name="input_2") # no scope cell = rnn.GRUBlockCell(units) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32, sequence_length=tf.identity(seq_length)) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val, "input_2:0": y_val} input_names_with_port = ["input_1:0", "input_2:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-03)
def test_multiple_dynamic_gru(self): units = 5 batch_size = 1 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") _ = tf.placeholder(tf.float32, x_val.shape, name="input_2") gru_output_list = [] gru_cell_state_list = [] if True: # no scope cell = rnn.GRUBlockCell( units) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32) gru_output_list.append(outputs) gru_cell_state_list.append(cell_state) if True: # given scope cell = rnn.GRUBlockCell( units) with variable_scope.variable_scope("root1") as scope: outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32, sequence_length=[4], scope=scope) gru_output_list.append(outputs) gru_cell_state_list.append(cell_state) _ = tf.identity(gru_output_list, name="output") _ = tf.identity(gru_cell_state_list, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3)
def build_cell(idx): with tf.variable_scope('decoder_cell', initializer=self.default_init(idx)): cell = rnn.GRUBlockCell(self.hparams.rnn_depth) has_dropout = hparams.decoder_input_dropout[idx] < 1 \ or hparams.decoder_state_dropout[idx] < 1 or hparams.decoder_output_dropout[idx] < 1 if self.is_train and has_dropout: attn_depth = attn_features.shape[-1].value if attn_features is not None else 0 input_size = attn_depth + prediction_inputs.shape[-1].value + 1 if idx == 0 else self.hparams.rnn_depth cell = rnn.DropoutWrapper(cell, dtype=tf.float32, input_size=input_size, variational_recurrent=hparams.decoder_variational_dropout[idx], input_keep_prob=hparams.decoder_input_dropout[idx], output_keep_prob=hparams.decoder_output_dropout[idx], state_keep_prob=hparams.decoder_state_dropout[idx], seed=self.seed + idx) return cell
def build_model(self): config = self.config data_generator = self.data_generator logging.info('Building the model...') # Placeholders self.inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs') self.inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length') self.targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets') self.targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length') vocab_size = len(data_generator.vocab) embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32) with tf.variable_scope('decoder'): with tf.variable_scope('output') as output_scope: # This variable-scope-trick is used to ensure that # output_fn has a proper scope regardless of a caller's # scope. def output_fn(cell_outputs): return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None, scope=output_scope) self.rnn_cell = rnn.GRUBlockCell(config.sentence_dim) self.encoder_state = self.encode(cell=self.rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length, scope='encoder') self.decoder_outputs = self.decode_train(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state, targets=self.targets[:, :-1], targets_length=self.targets_length - 1, scope='decoder') self.generated = self.decode_inference(cell=self.rnn_cell, embeddings=embeddings, encoder_state=self.encoder_state, output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'], eos_id=data_generator.vocab['<EOS>'], max_length=config.max_length, scope='decoder', reuse=True) self.loss = self.loss(decoder_outputs=self.decoder_outputs, output_fn=output_fn, targets=targets[:, 1:], targets_length=self.targets_length - 1) self.global_step = get_or_create_global_step() self.train_op = slim.optimize_loss(loss=self.loss, global_step=self.global_step, learning_rate=None, optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0) self.summary_writer = tf.summary.FileWriter(logdir=os.path.join(config.save_dir, 'log')) self.summary = tf.summary.merge_all() tf.get_variable_scope().set_initializer(tf.random_normal_initializer(mean=0.0, stddev=0.01)) tf.global_variables_initializer().run() self.saver = tf.train.Saver(max_to_keep=20)
def test_single_dynamic_gru_random_weights2(self): hidden_size = 128 batch_size = 1 x_val = np.random.randn(1, 133).astype('f') x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.GRUBlockCell(hidden_size) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.01)
def test_single_dynamic_gru_placeholder_input(self): units = 5 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * 1) x = tf.placeholder(tf.float32, shape=(None, 4, 2), name="input_1") # no scope cell = rnn.GRUBlockCell( units) outputs, cell_state = tf.nn.dynamic_rnn( cell, x, dtype=tf.float32) # by default zero initializer is used _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, rtol=1e-3)
def test_single_dynamic_gru_random_weights(self): hidden_size = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") # no scope cell = rnn.GRUBlockCell(hidden_size) outputs, cell_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") _ = tf.identity(cell_state, name="cell_state") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0", "cell_state:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.0001)
def test_dynamic_gru_output_consumed_only(self): units = 5 batch_size = 6 x_val = np.array([[1., 1.], [2., 2.], [3., 3.]], dtype=np.float32) x_val = np.stack([x_val] * batch_size) x = tf.placeholder(tf.float32, x_val.shape, name="input_1") cell1 = rnn.GRUBlockCell(units) outputs, _ = tf.nn.dynamic_rnn(cell1, x, dtype=tf.float32) _ = tf.identity(outputs, name="output") feed_dict = {"input_1:0": x_val} input_names_with_port = ["input_1:0"] output_names_with_port = ["output:0"] self.run_test_case(feed_dict, input_names_with_port, output_names_with_port, 0.0001, graph_validator=lambda g: check_gru_count(g, 1))
def build_cell(idx): with tf.variable_scope("decoder_cell", initializer=default_init()): cell = rnn.GRUBlockCell(hparams.rnn_depth) has_dropout = ( hparams.decoder_input_dropout[idx] < 1 or hparams.decoder_state_dropout[idx] < 1 or hparams.decoder_output_dropout[idx] < 1 ) if is_train and has_dropout: input_size = prediction_inputs.shape[-1].value + 1 if idx == 0 else hparams.rnn_depth cell = rnn.DropoutWrapper( cell, dtype=tf.float32, input_size=input_size, variational_recurrent=hparams.decoder_variational_dropout[idx], input_keep_prob=hparams.decoder_input_dropout[idx], output_keep_prob=hparams.decoder_output_dropout[idx], state_keep_prob=hparams.decoder_state_dropout[idx], ) return cell
def build_cell(idx): with tf.variable_scope('rnn_cell', initializer=default_init(self.seed + idx)): cell = rnn.GRUBlockCell(hparams.rnn_depth) has_dropout = hparams.encoder_input_dropout[idx] < 1 \ or hparams.encoder_state_dropout[idx] < 1 or hparams.encoder_output_dropout[idx] < 1 if self.is_train and has_dropout: input_size = train_inputs.shape[ -1].value + 1 if idx == 0 else hparams.rnn_depth cell = rnn.DropoutWrapper( cell, dtype=tf.float32, input_size=input_size, variational_recurrent=hparams. encoder_variational_dropout[idx], input_keep_prob=hparams.encoder_input_dropout[idx], output_keep_prob=hparams.encoder_output_dropout[idx], state_keep_prob=hparams.encoder_state_dropout[idx], seed=self.seed + idx) return cell
def _lstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): # Train graph pkeep = params['pkeep'] else: # Test or inference graph pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) X = tf.identity(X, name='X') # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION] if labels is not None: Labels = tf.reshape(labels, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) else: Labels = None encoder_Hin = params['encoder_Hin'] # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS] seqlen = tf.Variable(params['sequence_length'], name='seqlen') seqlen = tf.reshape(seqlen, shape=[1]) seqdescr = tf.tile(seqlen, multiples=[x.get_shape()[0]]) # seqdescr: [ BATCHSIZE ] inital_time_sample = params['decoder_inital_time_sample'] # inital_time_sample: [ BATCH_SIZE, DIMENSION ] encoder_cells = [ rnn.GRUBlockCell(params['encoder_hidden_layer_size']) for _ in range(params['encoder_hidden_layer_depth']) ] # "naive dropout" implementation encoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in encoder_cells ] encoder_multicell = rnn.MultiRNNCell(encoder_dropcells, state_is_tuple=False) # Input wrapper to keep symmetry with decoder encoder_multicell = rnn.InputProjectionWrapper( encoder_multicell, num_proj=params['bottleneck_size'], activation=None) # dropout for the softmax layer # No dropout in bottleneck layer! # encoder_multicell = rnn.DropoutWrapper(encoder_multicell, output_keep_prob=pkeep) encoded_Yr, encoded_H = tf.nn.dynamic_rnn( encoder_multicell, X, dtype=tf.float32, initial_state=encoder_Hin, scope='EncoderNet', parallel_iterations=params['parallel_iters']) encoded_H = tf.identity(encoded_H, name='encoded_H') # just to give it a name encoded_Yr = tf.identity(encoded_Yr, name='endoded_Yr') # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ] # encoder_H: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence encoded_V = tf.reshape(encoded_H, [x.get_shape()[0], -1]) # encoded_V: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = encoded_H # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper(decoder_multicell, params['dimension'], activation=None) custom_Helper = create_fixed_len_numeric_training_helper( inital_time_sample, params['sequence_length'], X.dtype) #helper = tf.contrib.seq2seq.TrainingHelper(inputs=Labels, # sequence_length=seqdescr, # time_major=False) decoder = seq2seq.BasicDecoder(cell=decoder_multicell, helper=custom_Helper, initial_state=decoder_Hin) decoded_Yr, decoded_H, _ = tf.contrib.seq2seq.dynamic_decode( decoder=decoder, output_time_major=False, impute_finished=False, maximum_iterations=None, parallel_iterations=params['parallel_iters']) decoded_Yr = decoded_Yr.rnn_output print('decoded_Yr') print(decoded_Yr) decoded_Yr.set_shape([ decoded_Yr.get_shape()[0], params['sequence_length'], decoded_Yr.get_shape()[2] ]) print(decoded_Yr) decoded_H = tf.identity(decoded_H, name='decoded_H') decoded_Yr = tf.identity(decoded_Yr, name='decoded_Yr') # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoded_Yr, encoded_V # = encoded_H reshaped
def _lstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('NeuralNet') as scope: if is_test: scope.reuse_variables() x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['input_dimension'] ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, INPUT_DIMENSION] Hin = params['Hin'] # Hin: [ BATCH_SIZE, INTERNALSIZE * NLAYERS] if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 cells = [ rnn.GRUBlockCell(params['hidden_layer_size']) for _ in range(params['hidden_layer_depth']) ] # "naive dropout" implementation dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells ] multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False) # dropout for the softmax layer multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep) Yr, H = tf.nn.dynamic_rnn(multicell, X, dtype=tf.float32, initial_state=Hin, scope='NeuralNet', parallel_iterations=params['parallel_iters']) H = tf.identity(H, name='H') # just to give it a name # Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, INTERNALSIZE ] # H: [ BATCH_SIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence # Select last output. output = tf.transpose(Yr, [1, 0, 2]) # output: [ SEEQLEN, BATCH_SIZE, params.output_dimension] last = tf.gather(output, int(output.get_shape()[0]) - 1) # last: [ BATCH_SIZE , params.output_dimension] # Last layer to evaluate INTERNALSIZE LSTM output to logits # One-Hot-Encoding the answer using new API: YLogits = layers.fully_connected(last, params['output_dimension'], activation_fn=None) # YLogits: [ BATCH_SIZE, params.output_dimension ] return YLogits
def lstmnet(input_tensor, label_tensor, global_step, phase, reuse_weights): # input_tensor: [ BATCH_SIZE, SEQUENCE_LENGTH, INPUT_DIMENSION] # label_tensor: [ BATCH_SIZE ] # global_step: [ 1 ] with tf.variable_scope('NeuralNet', reuse=tf.AUTO_REUSE) as scope: if reuse_weights: scope.reuse_variables() X = tf.reshape(input_tensor, [ config.batch_size, config.sequence_length, config.input_dimension ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, INPUT_DIMENSION] pkeep = tf.placeholder(tf.float32) Hin = tf.placeholder(tf.float32, [ config.batch_size, config.hidden_layer_size * config.hidden_layer_depth ], name='Hin') # Hin: [ BATCH_SIZE, INTERNALSIZE * NLAYERS] cells = [ rnn.GRUBlockCell(config.hidden_layer_size) for _ in range(config.hidden_layer_depth) ] # "naive dropout" implementation dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in cells ] multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False) # dropout for the softmax layer multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep) Yr, H = tf.nn.dynamic_rnn(multicell, X, dtype=tf.float32, initial_state=Hin, parallel_iterations=config.batch_size) H = tf.identity(H, name='H') # just to give it a name Yr_shaped = tf.reshape(Yr, [ config.batch_size, config.sequence_length, config.hidden_layer_size ]) # Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, INTERNALSIZE ] # H: [ BATCH_SIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence Yr_lazy = Yr_shaped[:, config.lazy_cell_num:, :] # Yr_lazy: [ BATCH_SIZE, LABEL_LENGTH, INTERNALSIZE ] Yr_lazys = tf.split(Yr_lazy, config.label_length, axis=1) # Yr_lazys: [ LABEL_LENGTH ][ BATCH_SIZE, INTERNALSIZE ] # Append a fully connected layer after each non-lazy grucell output Ys = list() reuse = reuse_weights for Yl in Yr_lazys: Yl = tf.reshape(Yl, [config.batch_size, config.hidden_layer_size]) with tf.variable_scope('NeuraNetFullyConnLayer', reuse=tf.AUTO_REUSE) as scope: if reuse: scope.reuse_variables() Y = layers.fully_connected(Yl, config.output_dimension, activation_fn=None, reuse=reuse_weights, scope='NeuralNetFullyConnLayer') reuse = True Ys.append(Y) YLogits = tf.stack(Ys, axis=1, name='Ys') # YLogits: [ BATCH_SIZE, LABEL_LENGTH, OUTPUT_DIMENSION ] with tf.variable_scope('TrainingAndLoss', reuse=tf.AUTO_REUSE) as scope: if reuse_weights: scope.reuse_variables() starter_learning_rate = config.learning_rate learning_rate = tf.train.inverse_time_decay(starter_learning_rate, global_step, config.decay_steps, config.decay_rate) y_ = tf.reshape(label_tensor, [config.batch_size]) # y_: [BATCH_SIZE] # int(s) identifying correct function # One-Hot encoode y_ yo_ = tf.one_hot(y_, config.output_dimension, 1.0, 0.0) yos_ = tf.reshape( yo_, shape=[config.batch_size, 1, config.output_dimension]) # yos_: [ BATCH_SIZE, config.output_dimension ] yot_ = tf.tile(yos_, [1, config.label_length, 1]) # yot_: [ BATCHSIZE, LABEL_LENGTH, OUTPUT_DIMENSION ] cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=yot_, logits=YLogits)) train_op = tf.train.RMSPropOptimizer( learning_rate=config.learning_rate, decay=config.decay_rate).minimize(cross_entropy) # accuracy with tf.name_scope('Summary') as scope: # select last output: output = tf.transpose(YLogits, [1, 0, 2]) # output: [ SEEQLEN, BATCH_SIZE, config.output_dimension] Ylast = tf.gather(output, int(output.get_shape()[0]) - 1) # last: [ BATCH_SIZE , config.output_dimension] correct_prediction = tf.equal(tf.argmax(Ylast, 1), tf.argmax(yo_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar(phase + "/loss", cross_entropy) tf.summary.scalar(phase + "/acc", accuracy) summary_op = tf.summary.merge_all() return Hin, pkeep, train_op, summary_op
def _lstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'] ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION] encoder_Hin = params['encoder_Hin'] # encoder_Hin: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS] encoder_cells = [ rnn.GRUBlockCell(params['encoder_hidden_layer_size']) for _ in range(params['encoder_hidden_layer_depth']) ] # "naive dropout" implementation encoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in encoder_cells ] encoder_multicell = rnn.MultiRNNCell(encoder_dropcells, state_is_tuple=False) # dropout for the softmax layer encoder_multicell = rnn.DropoutWrapper(encoder_multicell, output_keep_prob=pkeep) encoder_Yr, encoder_H = tf.nn.dynamic_rnn( encoder_multicell, X, dtype=tf.float32, initial_state=encoder_Hin, scope='EncoderNet', parallel_iterations=params['parallel_iters']) encoder_H = tf.identity(encoder_H, name='encoder_H') # just to give it a name # encoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, ENCODER_INTERNALSIZE ] # encoder_H: [ BATCH_SIZE, ENCODER_INTERNALSIZE * ENCODER_NLAYERS ] # this is the last state in the sequence # Select last output. encoder_output = tf.transpose(encoder_Yr, [1, 0, 2]) # encoder_output: [ SEEQLEN, BATCH_SIZE, ENCODER_INTERNALSIZE ] last = tf.gather(encoder_output, int(encoder_output.get_shape()[0]) - 1) # last: [ BATCH_SIZE , ENCODER_INTERNALSIZE ] # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation bottleneck = layers.fully_connected(last, params['bottleneck_size'], activation_fn=tf.nn.relu) encoded_V = bottleneck # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = params['decoder_Hin'] # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] # tile bottleneck layer tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1), multiples=[1, params['sequence_length'], 1]) # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper( decoder_multicell, params['dimension']) decoded_Yr, decoded_H = tf.nn.dynamic_rnn( decoder_multicell, tiled_bottleneck, dtype=tf.float32, initial_state=decoder_Hin, scope='NetDecoder', parallel_iterations=params['parallel_iters']) decoded_H = tf.identity(decoded_H, name='decoded_H') # just to give it a name # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoded_Yr, encoded_V # = bottleneck
def main(): data_path = args.data vocab_path = args.vocab save_dir = args.save_dir word_dim = args.word_dim sentence_dim = args.sentence_dim omit_prob = args.omit_prob swap_prob = args.swap_prob config_path = args.config batch_size = args.batch_size max_epoch = args.max_epoch max_length = args.max_length if not os.path.exists(save_dir): os.makedirs(save_dir) # Check whether all needed options are given if config_path is not None: assert (word_dim is None and sentence_dim is None and omit_prob is None and swap_prob is None), ( 'Model hyperparameter options must not be provided when ' 'the "config" option is given.') config = ModelConfig.load(config_path) else: assert not ( word_dim is None or sentence_dim is None or omit_prob is None or swap_prob is None), ( 'All model hyperparameter options must be provided when ' 'the "config" option is not given.') config = ModelConfig(word_dim=word_dim, sentence_dim=sentence_dim, omit_prob=omit_prob, swap_prob=swap_prob) config_path = os.path.join(save_dir, 'config.ini') config.save(config_path) logging.info('Initializing the data generator...') data_generator = DataGenerator(data_path=data_path, vocab_path=vocab_path, eos_symbol='<EOS>', unk_symbol='<UNK>', omit_prob=config.omit_prob, swap_prob=config.swap_prob, batch_size=batch_size, max_length=max_length, max_epoch=max_epoch) with tf.Graph().as_default() as graph: with tf.Session() as sess: logging.info('Building the model...') # Placeholders inputs = tf.placeholder(dtype=tf.int32, shape=[None, None], name='inputs') inputs_length = tf.placeholder(dtype=tf.int32, shape=[None], name='inputs_length') targets = tf.placeholder(dtype=tf.int32, shape=[None, None], name='targets') targets_length = tf.placeholder(dtype=tf.int32, shape=[None], name='targets_length') vocab_size = len(data_generator.vocab) embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32) with tf.variable_scope('decoder'): with tf.variable_scope('output') as output_scope: # This variable-scope-trick is used to ensure that # output_fn has a proper scope regardless of a caller's # scope. def output_fn(cell_outputs): return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None, scope=output_scope) rnn_cell = rnn.GRUBlockCell(config.sentence_dim) encoder_state = sae.encode(cell=rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length, scope='encoder') decoder_outputs = sae.decode_train(cell=rnn_cell, embeddings=embeddings, encoder_state=encoder_state, targets=targets[:, :-1], targets_length=targets_length - 1, scope='decoder') generated = sae.decode_inference( cell=rnn_cell, embeddings=embeddings, encoder_state=encoder_state, output_fn=output_fn, vocab_size=vocab_size, bos_id=data_generator.vocab['<EOS>'], eos_id=data_generator.vocab['<EOS>'], max_length=max_length, scope='decoder', reuse=True) loss = sae.loss(decoder_outputs=decoder_outputs, output_fn=output_fn, targets=targets[:, 1:], targets_length=targets_length - 1) global_step = get_or_create_global_step() train_op = slim.optimize_loss(loss=loss, global_step=global_step, learning_rate=None, optimizer=tf.train.AdamOptimizer(), clip_gradients=5.0) summary_writer = tf.summary.FileWriter(logdir=os.path.join( save_dir, 'log'), graph=graph) summary = tf.summary.merge_all() tf.get_variable_scope().set_initializer( tf.random_normal_initializer(mean=0.0, stddev=0.01)) tf.global_variables_initializer().run() saver = tf.train.Saver(max_to_keep=20) logging.info('Training starts!') for data_batch in data_generator: (inputs_v, inputs_length_v, targets_v, targets_length_v) = data_batch summary_v, global_step_v, _ = sess.run( fetches=[summary, global_step, train_op], feed_dict={ inputs: inputs_v, inputs_length: inputs_length_v, targets: targets_v, targets_length: targets_length_v }) summary_writer.add_summary(summary=summary_v, global_step=global_step_v) if global_step_v % 100 == 0: logging.info('{} Iter #{}, Epoch {:.2f}'.format( datetime.now(), global_step_v, data_generator.progress)) num_samples = 2 (inputs_sample_v, inputs_length_sample_v, targets_sample_v, targets_length_sample_v) = ( data_generator.sample(num_samples)) generated_v = sess.run(fetches=generated, feed_dict={ inputs: inputs_sample_v, inputs_length: inputs_length_sample_v }) for i in range(num_samples): logging.info('-' * 60) logging.info('Sample #{}'.format(i)) inputs_sample_words = data_generator.ids_to_words( inputs_sample_v[i][:inputs_length_sample_v[i]]) targets_sample_words = data_generator.ids_to_words( targets_sample_v[i][1:targets_length_sample_v[i]]) generated_words = data_generator.ids_to_words( generated_v[i]) if '<EOS>' in generated_words: eos_index = generated_words.index('<EOS>') generated_words = generated_words[:eos_index + 1] logging.info('Input: {}'.format( ' '.join(inputs_sample_words))) logging.info('Target: {}'.format( ' '.join(targets_sample_words))) logging.info('Generated: {}'.format( ' '.join(generated_words))) logging.info('-' * 60) if global_step_v % 500 == 0: save_path = os.path.join(save_dir, 'model.ckpt') real_save_path = saver.save(sess=sess, save_path=save_path, global_step=global_step_v) logging.info( 'Saved the checkpoint to: {}'.format(real_save_path))
def build_cell(idx): # with tf.variable_scope('encoder_cell', initializer=default_init(seed + idx)): cell = rnn.GRUBlockCell(num_units=hparams.rnn_depth) return cell
def _convlstmnet( features, # This is batch_features from input_fn labels, # This is batch_labels from input_fn mode, # An instance of tf.estimator.ModeKeys params, is_test): with tf.variable_scope('EncoderNet') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 x = tf.feature_column.input_layer( features, feature_columns=params['feature_columns']) X = tf.reshape(x, shape=[ x.get_shape()[0], params['sequence_length'], params['dimension'], 1 ]) # X: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 1 ] print(X) # Convolutional Layer 1 conv1 = tf.layers.conv2d(inputs=X, filters=6, kernel_size=[5, 1], padding="same", activation=tf.nn.relu) # conv1: [ BATCH_SIZE, SEQUENCE_LENGTH, DIMENSION, 12 ] print(conv1) # Conv Layer 2 with some stride conv2 = tf.layers.conv2d(inputs=conv1, filters=10, kernel_size=[5, 1], padding="same", strides=(2, 1), activation=tf.nn.relu) # conv2: [ BATCH_SIZE, SEQUENCE_LENGTH/2, DIMENSION, 24 ] print(conv2) # Conv Layer 3 with big filter size and stride conv3 = tf.layers.conv2d(inputs=conv2, filters=15, kernel_size=[8, 1], padding="same", strides=(4, 1), activation=tf.nn.relu) # last: [ BATCH_SIZE , SEQUENCE_LENGTH/(2*8), DIMENSION, 48 ] print(conv3) # flatten: conv3_flat = tf.reshape( conv3, [conv3.get_shape()[0], 7 * params['dimension'] * 15]) dense = tf.layers.dense(inputs=conv3_flat, units=128, activation=tf.nn.relu) dropout = tf.layers.dropout( inputs=dense, rate=params['pkeep'], training=mode == tf.estimator.ModeKeys.TRAIN) # Last layer to evaluate INTERNALSIZE LSTM output to bottleneck representation bottleneck = layers.fully_connected(dropout, params['bottleneck_size'], activation_fn=tf.nn.relu) encoded_V = bottleneck # bottleneck: [ BATCH_SIZE, BOTTLENECK_SIZE ] with tf.variable_scope('NetDecoder') as scope: if is_test: scope.reuse_variables() if (mode == tf.estimator.ModeKeys.TRAIN and not is_test): pkeep = params['pkeep'] else: pkeep = 1.0 decoder_Hin = params['decoder_Hin'] # decoder_Hin: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS] # tile bottleneck layer tiled_bottleneck = tf.tile(tf.expand_dims(bottleneck, axis=1), multiples=[1, params['sequence_length'], 1]) # bottleneck_tiled: [ BATCH_SIZE, SEQUENCE_LENGTH, BOTTLENECK_SIZE ] decoder_cells = [ rnn.GRUBlockCell(params['decoder_hidden_layer_size']) for _ in range(params['decoder_hidden_layer_depth']) ] # "naive dropout" implementation decoder_dropcells = [ rnn.DropoutWrapper(cell, input_keep_prob=pkeep) for cell in decoder_cells ] decoder_multicell = rnn.MultiRNNCell(decoder_dropcells, state_is_tuple=False) # dropout for the softmax layer decoder_multicell = rnn.DropoutWrapper(decoder_multicell, output_keep_prob=pkeep) # dense layer to adjust dimensions decoder_multicell = rnn.OutputProjectionWrapper( decoder_multicell, params['dimension']) decoder_Yr, decoder_H = tf.nn.dynamic_rnn( decoder_multicell, tiled_bottleneck, dtype=tf.float32, initial_state=decoder_Hin, scope='NetDecoder', parallel_iterations=params['parallel_iters']) decoder_H = tf.identity(decoder_H, name='decoder_H') # just to give it a name # decoder_Yr: [ BATCH_SIZE, SEQUENCE_LENGTHLEN, DIMENSION ] # decoder_H: [ BATCH_SIZE, DECODER_INTERNALSIZE * DECODER_NLAYERS ] # this is the last state in the sequence return decoder_Yr, encoded_V
def main(): model_path = args.model config_path = args.config vocab_path = args.vocab test_data_path = args.test_data out_path = args.out batch_size = args.batch_size config = ModelConfig.load(config_path) data_generator = DataGenerator(data_path=test_data_path, vocab_path=vocab_path, eos_symbol='<EOS>', unk_symbol='<UNK>', omit_prob=0.0, swap_prob=0.0, batch_size=batch_size, max_length=10000, max_epoch=1) out_file = open(out_path, 'w') with tf.Graph().as_default(): with tf.Session() as sess: inputs = tf.placeholder(dtype=tf.int32, shape=[None, None]) inputs_length = tf.placeholder(dtype=tf.int32, shape=[None]) vocab_size = len(data_generator.vocab) embeddings = tf.get_variable(name='embeddings', shape=[vocab_size, config.word_dim], dtype=tf.float32) with tf.variable_scope('decoder'): with tf.variable_scope('output') as output_scope: # This variable-scope-trick is used to ensure that # output_fn has a proper scope regardless of a caller's # scope. def output_fn(cell_outputs): return layers.fully_connected(inputs=cell_outputs, num_outputs=vocab_size, activation_fn=None, scope=output_scope) rnn_cell = rnn.GRUBlockCell(config.sentence_dim) sent_vec = sae.encode(cell=rnn_cell, embeddings=embeddings, inputs=inputs, inputs_length=inputs_length, scope='encoder') saver = tf.train.Saver() saver.restore(sess=sess, save_path=model_path) for data_batch in data_generator: inputs_v, inputs_length_v, _, _ = data_batch sent_vec_v = sess.run(fetches=sent_vec, feed_dict={ inputs: inputs_v, inputs_length: inputs_length_v }) for vec in sent_vec_v: out_file.write(','.join('{:.5f}'.format(x) for x in vec)) out_file.write('\n') out_file.close()