def __init__(self, hidden_num, inputs, cell=None, optimizer=None, reverse=True, decode_without_input=False): """ Args: hidden_num : number of hidden elements of each LSTM unit. inputs : a list of input tensors with size (batch_num x elem_num) cell : an rnn cell object (the default option is `tf.python.ops.rnn_cell.LSTMCell`) optimizer : optimizer for rnn (the default option is `tf.train.AdamOptimizer`) reverse : Option to decode in reverse order. decode_without_input : Option to decode without input. """ self.batch_num = inputs[0].get_shape().as_list()[0] self.elem_num = inputs[0].get_shape().as_list()[1] if cell is None: self._enc_cell = LSTMCell(hidden_num) self._dec_cell = LSTMCell(hidden_num) else : self._enc_cell = cell self._dec_cell = cell with tf.variable_scope('encoder'): self.z_codes, self.enc_state = tf.nn.rnn( self._enc_cell, inputs, dtype=tf.float32) with tf.variable_scope('decoder') as vs: dec_weight_ = tf.Variable( tf.truncated_normal([hidden_num, self.elem_num], dtype=tf.float32), name="dec_weight") dec_bias_ = tf.Variable( tf.constant(0.1, shape=[self.elem_num], dtype=tf.float32), name="dec_bias") # if decode_without_input: # dec_inputs = [tf.zeros(tf.shape(inputs[0]), dtype=tf.float32) # for _ in range(len(inputs))] # dec_outputs, dec_state = tf.nn.rnn( # self._dec_cell, dec_inputs, # initial_state=self.enc_state, dtype=tf.float32) """the shape of each tensor dec_output_ : (step_num x hidden_num) dec_weight_ : (hidden_num x elem_num) dec_bias_ : (elem_num) output_ : (step_num x elem_num) input_ : (step_num x elem_num) """ # if reverse: # dec_outputs = dec_outputs[::-1] # dec_output_ = tf.transpose(tf.pack(dec_outputs), [1,0,2]) # dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0), [self.batch_num,1,1]) # self.output_ = tf.batch_matmul(dec_output_, dec_weight_) + dec_bias_ else :
def __init__(self, num_units, state_is_tuple=True, cell_type='lstm', scope='bi_rnn'): self.num_units = num_units if cell_type == 'gru': self.cell_fw = GRUCell(self.num_units) self.cell_bw = GRUCell(self.num_units) else: # default self.cell_fw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple) self.cell_bw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple) self.scope = scope
def __init__(self, num_layers, num_units, cell_type='lstm', scope='stacked_bi_rnn'): self.num_layers = num_layers self.num_units = num_units if cell_type == 'gru': self.cells_fw = [GRUCell(self.num_units) for _ in range(self.num_layers)] self.cells_bw = [GRUCell(self.num_units) for _ in range(self.num_layers)] else: # default self.cells_fw = [LSTMCell(self.num_units) for _ in range(self.num_layers)] self.cells_bw = [LSTMCell(self.num_units) for _ in range(self.num_layers)] self.scope = scope
def __init__(self, num_layers, num_units, scope='stacked_bi_rnn'): self.num_layers = num_layers self.num_units = num_units self.cells_fw = [ LSTMCell(self.num_units) for _ in range(self.num_layers) ] self.cells_bw = [ LSTMCell(self.num_units) for _ in range(self.num_layers) ] self.scope = scope
def getCell(self, is_training, dp, config): # code for RNN if is_training == True: print("==> Construct ", config.cell_type, " graph for training") else: print("==> Construct ", config.cell_type, " graph for testing") if config.cell_type == "LSTM": if config.num_layer == 1: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = LSTMCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("config.num_layer should be 1:2 ") elif config.cell_type == "RNN": if config.num_layer == 1: basicCell = BasicRNNCell(config.hidden_size) elif config.num_layer == 2: basicCell = BasicRNNCell(config.hidden_size) basicCell_2 = BasicRNNCell(config.hidden_size_2) else: raise ValueError("config.num_layer should be [1-3] ") elif config.cell_type == "GRU": if config.num_layer == 1: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = GRUCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("only support 1-2 layers ") else: raise ValueError("cell type should be GRU,LSTM,RNN") # add dropout layer between hidden layers if is_training and config.keep_prob < 1: if config.num_layer == 1: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) elif config.num_layer == 2: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) basicCell_2 = DropoutWrapper(basicCell_2, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) else: pass if config.num_layer == 1: cell = rnn_cell.MultiRNNCell([basicCell], state_is_tuple=True) elif config.num_layer == 2: cell = rnn_cell.MultiRNNCell([basicCell, basicCell_2], state_is_tuple=True) return cell
def build_decoder_cell(rank, u_emb, batch_size, depth=2): cell = [] for i in range(depth): if i == 0: cell.append(LSTMCell(rank, state_is_tuple=True)) else: cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True))) initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb) initial_state = [initial_state, ] for i in range(1, depth): initial_state.append(cell[i].zero_state(batch_size, tf.float32)) return MultiRNNCell(cell), tuple(initial_state)
def inference_layer(self, inputs): if self.dblstm: with tf.name_scope('deep_bidirectional_rnn'): rnn_outputs, _ = deep_bidirectional_dynamic_rnn( [self._dblstm_cell() for _ in range(self.num_layers)], inputs, sequence_length=self.sequence_lengths) state_dim = self.state_dim else: cell_fw = DropoutWrapper(LSTMCell(num_units=self.state_dim), variational_recurrent=True, state_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob, dtype=tf.float32) cell_bw = DropoutWrapper(LSTMCell(num_units=self.state_dim), variational_recurrent=True, state_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob, dtype=tf.float32) with tf.name_scope('bidirectional_rnn'): rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, sequence_length=self.sequence_lengths, dtype=tf.float32) rnn_outputs = tf.concat(rnn_outputs, 2) state_dim = self.state_dim * 2 with tf.name_scope('linear_projection'): softmax_weights = tf.get_variable( 'softmax_W', [state_dim, self.num_classes], initializer=tf.random_normal_initializer(0, 0.01)) softmax_bias = tf.get_variable('softmax_b', [self.num_classes], initializer=tf.zeros_initializer) time_steps = tf.shape(rnn_outputs)[1] rnn_outputs = tf.reshape( rnn_outputs, [-1, state_dim], name="flatten_rnn_outputs_for_linear_projection") logits = tf.nn.xw_plus_b(x=rnn_outputs, weights=softmax_weights, biases=softmax_bias, name="softmax_projection") self.scores = tf.reshape(logits, [-1, time_steps, self.num_classes], name="unflatten_logits") if self.crf: self.transition_params = tf.get_variable( "transitions", [self.num_classes, self.num_classes])
def _build_pre(self): self.dimA = 20 self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def impress(self, state_code, pre_impress_states): # LSTM, 3 layers self.impress_lay_num = 3 with tf.variable_scope('impress', reuse=tf.AUTO_REUSE): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None:#time = 0 # initialization input = state_code state = state_ emit_output = None loop_state = None else: input = cell_output emit_output = cell_output state = cell_state loop_state = None elements_finished = (time >= 1) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) for _ in range(self.impress_lay_num)], state_is_tuple=True) if pre_impress_states == None: state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32)) else: state_ = pre_impress_states emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn) state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension return state_impress_code, final_loop_state
def RNN(_X, _weights, _biases, lens): if FLAGS.unit == "PLSTM": cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True, state_is_tuple=True) elif FLAGS.unit == "GRU": cell = GRUCell(FLAGS.n_hidden) elif FLAGS.unit == "LSTM": cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True, state_is_tuple=True) else: raise ValueError("Unit '{}' not implemented.".format(FLAGS.unit)) outputs = multiPLSTM(_X, lens, FLAGS.n_layers, FLAGS.n_hidden, n_input) outputs = tf.slice(outputs, [0, 0, 0], [-1, -1, FLAGS.n_hidden]) # TODO better (?) in lack of smart indexing batch_size = tf.shape(outputs)[0] max_len = tf.shape(outputs)[1] out_size = int(outputs.get_shape()[2]) index = tf.range(0, batch_size) * max_len + (lens - 1) flat = tf.reshape(outputs, [-1, out_size]) relevant = tf.gather(flat, index) return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
def __init__(self, num_units, memory, pmemory, cell_type='lstm'): super(AttentionCell, self).__init__() self._cell = LSTMCell(num_units) self.num_units = num_units self.memory = memory self.pmemory = pmemory self.mem_units = memory.get_shape().as_list()[-1]
def Encoder(self, xs): encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) encoder_input = self.WordEmb(encoder_input) if self.args.train: inputs_length = self.inputs_length_PH elif self.args.test: inputs_length = self.inputs_length_test_PH multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) for _ in range(self.encoder_lay_Num)], state_is_tuple=True) (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = ( tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, cell_bw=multirnn_cell, inputs=encoder_input, sequence_length=inputs_length, dtype=self.dtype)) sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2) sentence_code_ = [] for i in range(self.batch_size): sentence_code_.append(sentence_code[i,inputs_length[i]-1,:]) encoder_output = tf.stack(sentence_code_) encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu) return encoder_output
def s2v(self): sqrt3 = math.sqrt(3.0) initializer = tf.random_uniform_initializer(-sqrt3, sqrt3, dtype=self.dtype) # word embedding layer if self.pre_trained_word_emb is not None: self.word_embeddings = tf.get_variable( name='word_embedding', initializer=self.pre_trained_word_emb, dtype=self.dtype) else: self.word_embeddings = tf.get_variable( name='word_embedding', shape=[self.voc_size, self.emb_size], initializer=initializer, dtype=self.dtype) self.embedded_sentence = tf.nn.embedding_lookup( self.word_embeddings, self.sentence) self.embedded_sentence = tf.nn.dropout( self.embedded_sentence, keep_prob=self.keep_word_prob_placeholder) # create the rnn cell if self.rnn_cell_type.lower() == 'gru': rnn_cell = GRUCell else: rnn_cell = LSTMCell rnn_cell = rnn_cell(self.hidden_units) if self.use_lstm_dropout: rnn_cell = DropoutWrapper( rnn_cell, dtype=tf.float32, output_keep_prob=self.keep_lstm_prob_placeholder) if self.rnn_model == 'leap-lstm': self.sentence_emb, self.skip_dis_output = self.leap_lstm(rnn_cell) elif self.rnn_model == 'rnn': if self.rnn_pattern == 1: self.sentence_emb = self.general_rnn(rnn_cell, out='LAST') else: self.sentence_emb = self.general_rnn_for_pattern( rnn_cell, out='LAST') # for test the training time elif self.rnn_model == 'brnn': self.sentence_emb = self.general_brnn() elif self.rnn_model == 'skip-rnn-2017': self.sentence_emb, self.budget_loss, self.updated_states, self.rnn_final_states, self.rnn_outputs = self.skip_rnn_2017( ) elif self.rnn_model == 'skim-rnn': small_rnn_cell = LSTMCell(5) # small size 5 small_rnn_cell = DropoutWrapper( small_rnn_cell, dtype=tf.float32, output_keep_prob=self.keep_lstm_prob_placeholder) self.sentence_emb, self.skip_dis_output, self.skim_loss = self.skim_rnn( rnn_cell, small_rnn_cell) # skim-rnn的设定直接按照github上源码来就可以了 else: print("bad rnn model!") exit()
def _build_pre(self, size): self.dimA = size self.num_of_layers = 2 self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)]) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def _create_rnn_cell(self): cell = GRUCell( self.cfg.num_units) if self.cfg.cell_type == "gru" else LSTMCell( self.cfg.num_units) if self.cfg.use_dropout: cell = DropoutWrapper(cell, output_keep_prob=self.keep_prob) if self.cfg.use_residual: cell = ResidualWrapper(cell) return cell
def Decoder(self, encoder_output): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: #time = 0 # initialization input = tf.concat((encoder_output, encoder_output), axis=1) state = (multirnn_cell.zero_state(self.batch_size, tf.float32)) emit_output = None loop_state = None elements_finished = False else: emit_output = cell_output if self.args.test: #decoder_units to val_size transformed_output = tf.nn.xw_plus_b( cell_output, self.decoder_W, self.decoder_b) #decoder_units to vac_size #argmax transformed_output = tf.argmax(transformed_output, 1) transformed_output = tf.one_hot(transformed_output, self.val_size, on_value=1.0, off_value=0.0, axis=-1) #val_size to decoder_units//2 transformed_output = self.WordEmb(transformed_output) elif self.args.train: ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)], self.val_size, on_value=1.0, off_value=0.0, axis=-1) transformed_output = self.WordEmb(ys_onehot) input = tf.concat([transformed_output, encoder_output], axis=1) state = cell_state loop_state = None elements_finished = (time >= self.max_len) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell( [LSTMCell(self.decoder_units) for _ in range(self.lay_num)], state_is_tuple=True) emit_ta, final_state, final_loop_state = tf.nn.raw_rnn( multirnn_cell, loop_fn) # transpose for putting batch dimension to first dimension outputs = tf.transpose(emit_ta.stack(), [1, 0, 2]) #transform decoder_units to val_size decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units]) decoder_output_transform_flat = tf.nn.xw_plus_b( decoder_output_flat, self.decoder_W, self.decoder_b) decoder_logits = tf.reshape( decoder_output_transform_flat, (self.batch_size, self.max_len, self.val_size)) return decoder_logits
def _create_single_rnn_cell(self, num_units): cell = GRUCell( num_units) if self.cfg["cell_type"] == "gru" else LSTMCell( num_units) if self.cfg["use_dropout"]: cell = DropoutWrapper(cell, output_keep_prob=self.rnn_keep_prob) if self.cfg["use_residual"]: cell = ResidualWrapper(cell) return cell
def _build_model_op(self): with tf.variable_scope('encoder'): cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_embeddings, self.seq_lengths) enc_outputs = tf.concat(outputs, axis=-1) print('encoder output shape: {}'.format( enc_outputs.get_shape().as_list())) '''with tf.variable_scope('attention'): attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=self.cfg.num_units, memory=enc_outputs, memory_sequence_length=self.seq_lengths) cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) attn_cell_fw = tf.contrib.seq2seq.AttentionWrapper(cell_fw, attention_mechanism) attn_cell_bw = tf.contrib.seq2seq.AttentionWrapper(cell_bw, attention_mechanism) outputs, _ = bidirectional_dynamic_rnn(attn_cell_fw, attn_cell_bw, enc_outputs, self.seq_lengths) attn_outputs = tf.concat(outputs, axis=-1) print('bidirectional attention output shape: {}'.format(attn_outputs.get_shape().as_list()))''' with tf.variable_scope('self_attention'): self_att = dot_attention(enc_outputs, enc_outputs, self.cfg.num_units, keep_prob=self.keep_prob, is_train=self.is_train) print('self-attention output shape: {}'.format( self_att.get_shape().as_list())) '''with tf.variable_scope('decoder'): cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self_att, self.seq_lengths) dec_outputs = tf.concat(outputs, axis=-1) print('decoder output shape: {}'.format(dec_outputs.get_shape().as_list()))''' with tf.variable_scope('project'): self.logits = dense(self_att, self.cfg.tag_vocab_size, use_bias=True) print('projected output (logits) shape: {}'.format( self.logits.get_shape().as_list()))
def generator(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, cond_dim=0, c=None, learn_scale=True): """ If parameters are supplied, initialise as such """ with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() if parameters is None: W_out_G_initializer = tf.truncated_normal_initializer() b_out_G_initializer = tf.truncated_normal_initializer() scale_out_G_initializer = tf.constant_initializer(value=1.0) lstm_initializer = None bias_start = 1.0 else: W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) try: scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) except KeyError: scale_out_G_initializer = tf.constant_initializer(value=1) assert learn_scale lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) bias_start = parameters['generator/rnn/lstm_cell/biases:0'] W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer) b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer) scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=learn_scale) if cond_dim > 0: # CGAN! assert not c is None repeated_encoding = tf.stack([c]*seq_length, axis=1) inputs = tf.concat([z, repeated_encoding], axis=2) #repeated_encoding = tf.tile(c, [1, tf.shape(z)[1]]) #repeated_encoding = tf.reshape(repeated_encoding, [tf.shape(z)[0], tf.shape(z)[1], cond_dim]) #inputs = tf.concat([repeated_encoding, z], 2) else: inputs = z cell = LSTMCell(num_units=hidden_units_g, state_is_tuple=True, initializer=lstm_initializer, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=[seq_length]*batch_size, inputs=inputs) rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G # output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G) output_2d = tf.nn.tanh(logits_2d) output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features]) return output_3d
def __init__(self, num_layers, num_units, cell_type='lstm', scope='stack_bi_rnn'): if type(num_units) == list: assert len( num_units ) == num_layers, "if num_units is a list, then its size should equal to num_layers" self.cells_fw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units[i]) for i in range(num_layers)] self.cells_bw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units[i]) for i in range(num_layers)] else: self.cells_fw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units) for _ in range(num_layers)] self.cells_bw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units) for _ in range(num_layers)] self.num_layers = num_layers self.scope = scope
def __init__(self, cfg): # fed by 'feed_dict' self.context = tf.placeholder(name='context', shape=[None, None], dtype=tf.int32) self.seq_len = tf.placeholder(name='sequence_length', shape=[None], dtype=tf.int32) self.labels = tf.placeholder(name='labels', shape=[None, cfg.num_classes], dtype=tf.float32) self.lr = tf.placeholder(name='learning_rate', dtype=tf.float32) with tf.device('/gpu:0'): with tf.variable_scope('context_lookup_table'): with open(params['default_word_emb_pkl_path'], 'rb') as f: word_emb = pickle.load(f) word_embeddings = tf.constant(word_emb, dtype=tf.float32) # make lookup table for given review context context_emb = tf.nn.embedding_lookup(word_embeddings, self.context) with tf.variable_scope('context_representation'): cell_fw = LSTMCell(num_units = cfg.num_units) cell_bw = LSTMCell(num_units = cfg.num_units) h,_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, context_emb, sequence_length=self.seq_len, dtype=tf.float32, time_major=False) #concat forward and backward hidden states h = tf.concat(h, axis=-1) h = self.self_attention(h) weight = tf.get_variable(name='weight', shape=[2*cfg.num_units, 2*cfg.num_units], dtype=tf.float32) ### h = tf.nn.tanh(tf.matmul(h, weight)) with tf.variable_scope('compute_logits'): context_logits = self.ffn_layer(h, cfg.hidden_units, cfg.num_classes, scope='ffn_layer') with tf.variable_scope('compute_loss'): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=context_logits, labels=self.labels)) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) with tf.variable_scope('accuracy'): #pred is 0 (neg) or 1 (pos) self.pred = tf.argmax(tf.nn.softmax(context_logits),1,name='prediction') num_correct_pred = tf.equal(self.pred, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(num_correct_pred, tf.float32))
def stacked_rnn_step(input_vocabulary_size, hidden_size=13, emb_dim=11, n_layers=2, variable_scope='encdec'): with tf.variable_scope(variable_scope, reuse=None): rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] * n_layers) # stacked LSTM proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim) embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size, emb_dim) return embedding_wrapper
def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0, is_train=None, scope="native_lstm", activation=tf.nn.tanh): self.num_layers = num_layers self.grus = [] self.inits = [] self.dropout_mask = [] self.scope = scope for layer in range(num_layers): input_size_ = input_size if layer == 0 else 2 * num_units gru_fw = LSTMCell(num_units, activation=activation) gru_bw = LSTMCell(num_units, activation=activation) # init_fw = tf.tile(tf.Variable( # tf.zeros([1, num_units])), [batch_size, 1]) # init_bw = tf.tile(tf.Variable( # tf.zeros([1, num_units])), [batch_size, 1]) mask_fw = Dropout(tf.ones([batch_size, 1, input_size_], dtype=tf.float32), keep_prob=keep_prob, is_train=is_train, mode='') mask_bw = Dropout(tf.ones([batch_size, 1, input_size_], dtype=tf.float32), keep_prob=keep_prob, is_train=is_train, mode='') self.grus.append(( gru_fw, gru_bw, )) self.dropout_mask.append(( mask_fw, mask_bw, ))
def context_representation(inputs, seq_len, num_units, activation=tf.nn.tanh, use_bias=False, reuse=None, name="context_rep"): with tf.variable_scope(name, reuse=reuse, dtype=tf.float32): cell_fw = LSTMCell(num_units=num_units) cell_bw = LSTMCell(num_units=num_units) context_features, _ = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=seq_len, dtype=tf.float32, time_major=False, scope="bidirectional_dynamic_rnn") context_features = tf.concat(context_features, axis=-1) # self-attention context_features = self_attention(context_features, num_units=num_units, return_alphas=False, reuse=reuse, name="self_attention") # dense layer project context_features = tf.layers.dense( context_features, units=num_units, use_bias=use_bias, kernel_initializer=tf.glorot_uniform_initializer(), activation=activation, name="context_project") return context_features
def model(data, weights, biases): cell = LSTMCell(NUM_NEURONS) # Or LSTMCell(num_neurons) cell = MultiRNNCell([cell] * NUM_LAYERS) output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) out_size = int(train_labels_node.get_shape()[1]) prediction = tf.nn.softmax( tf.matmul(last, weights['out']) + biases['out']) # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction)) return prediction
def encoder(self): ####Encoder with tf.variable_scope(self.model_name + "encoder_model"): if self.Bidirection == False: encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.num_units) self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn( cell=encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, time_major=False, dtype=tf.float32) self.hidden_units = self.num_units elif self.Bidirection == True: encoder_cell_fw = LSTMCell(self.num_units) encoder_cell_bw = LSTMCell(self.num_units) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell_fw, cell_bw=encoder_cell_bw, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, dtype=tf.float32, time_major=False)) # Concatenates tensors along one dimension. encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) # TF Tuple used by LSTM Cells for state_size, zero_state, and output state. self.encoder_final_state = LSTMStateTuple( c=encoder_final_state_c, h=encoder_final_state_h) self.hidden_units = 2 * self.num_units
def __init__(self, feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer): super(LSTM4Rec, self).__init__(feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer) # RNN layer with tf.name_scope('item_rnn'): _, item_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.item_part_emb, sequence_length=self.item_len_ph, dtype=tf.float32, scope='lstm1') item_part = item_part_final_state with tf.name_scope('user_rnn'): _, user_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.user_part_emb, sequence_length=self.user_len_ph, dtype=tf.float32, scope='lstm2') user_part = user_part_final_state if use_hist_i and use_hist_u: inp = tf.concat([item_part, user_part], axis=1) elif use_hist_i and not use_hist_u: inp = item_part elif not use_hist_i and use_hist_u: inp = user_part # fully connected layer self.build_fc_net(inp) self.build_loss()
def Encoder(self): # a list that length is batch_size, every element refers to the time_steps of corresponding input inputs_length = tf.fill([tf.shape(self.xs)[0]], self.input_timestep) rnn_cell = LSTMCell(self.encoder_units) # use bidirectional rnn as encoder architecture (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=rnn_cell, cell_bw=rnn_cell, inputs=self.xs, sequence_length=inputs_length, dtype=self.dtype)) # merge every forward and backward output as total output output = tf.add(fw_outputs, bw_outputs) / 2 # merge every forward and backward final state as final state state_c = tf.concat([fw_final_state.c, bw_final_state.c], axis=1) state_h = tf.concat([fw_final_state.h, bw_final_state.h], axis=1) final_state = LSTMStateTuple(c=state_c, h=state_h) return output, final_state
def build_graph(self): with tf.variable_scope('lstm'): lstm_cell = LSTMCell(self.layer_size) rnn_cell = MultiRNNCell([lstm_cell] * self.layers) cell_output, self.init_state = rnn_cell(self.model_input, self.init_state) print("%i layers created" % self.layers) self.output_layer = self.__add_output_layer( "fc_out", cell_output, self.layer_size, self.output_dim) self.output_layer = tf.Print( self.output_layer, [self.output_layer, tf.convert_to_tensor(self.ground_truth)], 'Value of output layer and ground truth:', summarize=6) tf.histogram_summary('lstm_output', self.output_layer) return self.output_layer
def build_decoder_cell(self): # No beam search currently # Attention # TODO: other attention mechanism? attention_mechanism = BahdanauAttention( num_units=self.config.hidden_units, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length) decoder_cells = [LSTMCell(self.config.hidden_units) ] * self.config.decoder_depth decoder_initial_state = list(self.encoder_last_state) def attn_decoder_input_fn(inputs, attention): if not self.config.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.config.hidden_units, dtype=tf.float32, name='attn_input_feeding') return _input_layer(concat([inputs, attention], -1)) #Add an attentionWrapper in the lastest layer of decoder decoder_cells[-1] = AttentionWrapper( cell=decoder_cells[-1], attention_mechanism=attention_mechanism, attention_layer_size=self.config.hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=decoder_initial_state[-1], alignment_history=False, name='Attention_Wrapper') decoder_initial_state[-1] = decoder_cells[-1].zero_state( batch_size=self.batch_size, dtype=tf.float32) decoder_initial_state = tuple(decoder_initial_state) return MultiRNNCell(decoder_cells), decoder_initial_state
def __init__(self, data, FLAGS): with tf.variable_scope("history_length"): history_length = data.train_set["features"].shape[1] encoder_lstm_size = 16 encoder_embedding_size = 16 * 2 encoder_vocabulary_length = len(data.idx2word_history) with tf.variable_scope("encoder_sequence_length"): encoder_sequence_length = data.train_set["features"].shape[2] decoder_lstm_size = 16 decoder_embedding_size = 16 decoder_vocabulary_length = len(data.idx2word_target) with tf.variable_scope("decoder_sequence_length"): decoder_sequence_length = data.train_set["targets"].shape[1] # inference model with tf.name_scope("model"): features = tf.placeholder("int32", name="features") targets = tf.placeholder("int32", name="true_targets") use_dropout_prob = tf.placeholder("float32", name="use_dropout_prob") with tf.variable_scope("batch_size"): batch_size = tf.shape(features)[0] encoder_embedding = embedding( input=features, length=encoder_vocabulary_length, size=encoder_embedding_size, name="encoder_embedding" ) with tf.name_scope("UtterancesEncoder"): with tf.name_scope("RNNForwardUtteranceEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNForwardUtteranceEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True, ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) # the input data has this dimensions # [ # #batch, # #utterance in a history (a dialogue), # #word in an utterance (a sentence), # embedding dimension # ] # encode all utterances along the word axis encoder_states_2d = [] for utterance in range(history_length): encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_embedding[:, utterance, word, :] for word in range(encoder_sequence_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name="RNNUtteranceBidirectionalLayer", reuse=True if utterance > 0 else None, ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name="RNNUtteranceForwardEncoder", reuse=True if utterance > 0 else None, ) # print(encoder_states[-1]) encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1)) # print(encoder_states) encoder_states_2d.append(encoder_states) encoder_states_2d = tf.concat(1, encoder_states_2d) # print('encoder_states_2d', encoder_states_2d) with tf.name_scope("HistoryEncoder"): # encode all histories along the utterance axis with tf.name_scope("RNNForwardHistoryEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardHistoryEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32) with tf.name_scope("RNNForwardHistoryEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True, ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name="RNNHistoryBidirectionalLayer", reuse=None, ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name="RNNHistoryForwardEncoder", reuse=None, ) with tf.name_scope("Decoder"): use_inputs_prob = tf.placeholder("float32", name="use_inputs_prob") with tf.name_scope("RNNDecoderCell"): cell = LSTMCell( num_units=decoder_lstm_size, input_size=decoder_embedding_size + cell_fw_2.state_size, use_peepholes=True, ) initial_state = cell.zero_state(batch_size, tf.float32) # decode all histories along the utterance axis final_encoder_state = encoder_states[-1] decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder( cell=cell, inputs=[targets[:, word] for word in range(decoder_sequence_length)], static_input=final_encoder_state, initial_state=initial_state, # final_encoder_state, embedding_size=decoder_embedding_size, embedding_length=decoder_vocabulary_length, sequence_length=decoder_sequence_length, name="RNNDecoder", reuse=False, use_inputs_prob=use_inputs_prob, ) targets_given_features = tf.concat(1, decoder_outputs_softmax) # print(p_o_i) if FLAGS.print_variables: for v in tf.trainable_variables(): print(v.name) with tf.name_scope("loss"): one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length) loss = tf.reduce_mean(-one_hot_labels * tf.log(targets_given_features), name="loss") for v in tf.trainable_variables(): for n in ["/W_", "/W:", "/B:"]: if n in v.name: print("Regularization using", v.name) loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2)) tf.scalar_summary("loss", loss) with tf.name_scope("accuracy"): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_given_features, 2)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) tf.scalar_summary("accuracy", accuracy) self.data = data self.train_set = data.train_set self.test_set = data.test_set self.idx2word_history = data.idx2word_history self.word2idx_history = data.word2idx_history self.idx2word_target = data.idx2word_target self.word2idx_target = data.word2idx_target self.history_length = history_length self.encoder_sequence_length = encoder_sequence_length self.features = features self.targets = targets self.batch_size = batch_size self.use_inputs_prob = use_inputs_prob self.targets_given_features = targets_given_features self.loss = loss self.accuracy = accuracy
def train(train_set, test_set, idx2word_history, word2idx_history, idx2word_target, word2idx_target): with tf.variable_scope("history_length"): history_length = train_set['features'].shape[1] encoder_lstm_size = 16*4 encoder_embedding_size = 16*8 encoder_vocabulary_length = len(idx2word_history) with tf.variable_scope("encoder_sequence_length"): encoder_sequence_length = train_set['features'].shape[2] decoder_lstm_size = 16*4 decoder_embedding_size = 16*4 decoder_vocabulary_length = len(idx2word_target) with tf.variable_scope("decoder_sequence_length"): decoder_sequence_length = train_set['targets'].shape[1] # inference model with tf.name_scope('model'): features = tf.placeholder("int32", name='features') targets = tf.placeholder("int32", name='true_targets') use_dropout_prob = tf.placeholder("float32", name='use_dropout_prob') with tf.variable_scope("batch_size"): batch_size = tf.shape(features)[0] encoder_embedding = embedding( input=features, length=encoder_vocabulary_length, size=encoder_embedding_size, name='encoder_embedding' ) with tf.name_scope("UtterancesEncoder"): with tf.name_scope("RNNForwardUtteranceEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNForwardUtteranceEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) # the input data has this dimensions # [ # #batch, # #utterance in a history (a dialogue), # #word in an utterance (a sentence), # embedding dimension # ] # encode all utterances along the word axis encoder_states_2d = [] for utterance in range(history_length): encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_embedding[:, utterance, word, :] for word in range(encoder_sequence_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name='RNNUtteranceBidirectionalLayer', reuse=True if utterance > 0 else None ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name='RNNUtteranceForwardEncoder', reuse=True if utterance > 0 else None ) # print(encoder_states[-1]) encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1)) # print(encoder_states) encoder_states_2d.append(encoder_states) encoder_states_2d = tf.concat(1, encoder_states_2d) # print('encoder_states_2d', encoder_states_2d) with tf.name_scope("HistoryEncoder"): # encode all histories along the utterance axis with tf.name_scope("RNNFrowardHistoryEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardHistoryEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32) with tf.name_scope("RNNFrowardHistoryEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name='RNNHistoryBidirectionalLayer', reuse=None ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name='RNNHistoryForwardEncoder', reuse=None ) with tf.name_scope("Decoder"): use_inputs_prob = tf.Variable(1.0, name='use_inputs_prob', trainable=False) use_inputs_prob_decay_op = use_inputs_prob.assign(use_inputs_prob * FLAGS.use_inputs_prob_decay) with tf.name_scope("RNNDecoderCell"): cell = LSTMCell( num_units=decoder_lstm_size, input_size=decoder_embedding_size, use_peepholes=True, ) # decode all histories along the utterance axis final_encoder_state = encoder_states[-1] decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder( cell=cell, inputs=[targets[:, word] for word in range(decoder_sequence_length)], initial_state=final_encoder_state, embedding_size=decoder_embedding_size, embedding_length=decoder_vocabulary_length, sequence_length=decoder_sequence_length, name='RNNDecoder', reuse=False, use_inputs_prob=use_inputs_prob ) targets_give_features = tf.concat(1, decoder_outputs_softmax) # print(p_o_i) if FLAGS.print_variables: for v in tf.trainable_variables(): print(v.name) with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length) loss = tf.reduce_mean(- one_hot_labels * tf.log(targets_give_features), name='loss') for v in tf.trainable_variables(): for n in ['/W_', '/W:', '/B:']: if n in v.name: print('Regularization using', v.name) loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2)) tf.scalar_summary('loss', loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_give_features, 2)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.scalar_summary('accuracy', accuracy) # with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: with tf.Session() as sess: # Merge all the summaries and write them out to ./log merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('./log', sess.graph_def) saver = tf.train.Saver() # training tvars = tf.trainable_variables() # tvars = [v for v in tvars if 'embedding_table' not in v.name] # all variables except embeddings learning_rate = tf.Variable(float(FLAGS.learning_rate), trainable=False) # train_op = tf.train.GradientDescentOptimizer( train_op = AdamPlusOptimizer( learning_rate=learning_rate, beta1=FLAGS.beta1, beta2=FLAGS.beta2, epsilon=FLAGS.epsilon, pow=FLAGS.pow, use_locking=False, name='trainer') learning_rate_decay_op = learning_rate.assign(learning_rate * FLAGS.decay) global_step = tf.Variable(0, trainable=False) gradients = tf.gradients(loss, tvars) clipped_gradients, _ = tf.clip_by_global_norm(gradients, FLAGS.max_gradient_norm) train_op = train_op.apply_gradients(zip(clipped_gradients, tvars), global_step=global_step) tf.initialize_all_variables().run() # prepare batch indexes train_set_size = train_set['features'].shape[0] print('Train set size:', train_set_size) batch_size = FLAGS.batch_size print('Batch size:', batch_size) batch_indexes = [[i, i + batch_size] for i in range(0, train_set_size, batch_size)] print('#Batches:', len(batch_indexes)) # print('Batch indexes', batch_indexes) previous_accuracies = [] previous_losses = [] for epoch in range(FLAGS.max_epochs): print('Batch: ', end=' ', flush=True) for b, batch in enumerate(batch_indexes): print(b, end=' ', flush=True) sess.run( train_op, feed_dict={ features: train_set['features'][batch[0]:batch[1]], targets: train_set['targets'][batch[0]:batch[1]], } ) print() shuffle(batch_indexes) if epoch % max(min(int(FLAGS.max_epochs / 100), 100), 1) == 0: summary, lss, acc = sess.run([merged, loss, accuracy], feed_dict={features: test_set['features'], targets: test_set['targets']}) writer.add_summary(summary, epoch) print() print('Epoch: {epoch}'.format(epoch=epoch)) print(' - accuracy = {acc:f}'.format(acc=acc)) print(' - loss = {lss:f}'.format(lss=lss)) print(' - learning rate = {lr:f}'.format(lr=learning_rate.eval())) print(' - use inputs prob = {uip:f}'.format(uip=use_inputs_prob.eval())) print() # decrease learning rate if no improvement was seen over last 3 times. if len(previous_losses) > 2 and lss > max(previous_losses[-3:]): sess.run(learning_rate_decay_op) previous_losses.append(lss) # stop when reached a threshold maximum or when no improvement in the last 20 steps previous_accuracies.append(acc) if acc > 0.9999 or max(previous_accuracies) > max(previous_accuracies[-20:]): break sess.run(use_inputs_prob_decay_op) save_path = saver.save(sess, ".rnn-model.ckpt") print() print("Model saved in file: %s" % save_path) print() # print('Test features') # print(test_set['features']) # print('Test targets') print('Shape of targets:', test_set['targets'].shape) # print(test_set['targets']) print('Predictions') targets_give_features = sess.run(targets_give_features, feed_dict={features: test_set['features'], targets: test_set['targets']}) targets_given_features_argmax = np.argmax(targets_give_features, 2) print('Shape of predictions:', targets_give_features.shape) print('Argmax predictions') # print(p_o_i_argmax) print() for features in range(0, targets_given_features_argmax.shape[0], max(int(targets_given_features_argmax.shape[0]/10), 1)): print('History', features) for j in range(test_set['features'].shape[1]): utterance = [] for k in range(test_set['features'].shape[2]): w = idx2word_history[test_set['features'][features, j, k]] if w not in ['_SOS_', '_EOS_']: utterance.append(w) print('U {j}: {c:80}'.format(j=j, c=' '.join(utterance))) prediction = [] for j in range(targets_given_features_argmax.shape[1]): w = idx2word_target[targets_given_features_argmax[features, j]] if w not in ['_SOS_', '_EOS_']: prediction.append(w) print('P : {t:80}'.format(t=' '.join(prediction))) target = [] for j in range(test_set['targets'].shape[1]): w = idx2word_target[test_set['targets'][features, j]] if w not in ['_SOS_', '_EOS_']: target.append(w) print('T : {t:80}'.format(t=' '.join(target))) print()
def __init__(self, data, FLAGS): with tf.variable_scope("history_length"): history_length = data.train_set['features'].shape[1] encoder_embedding_size = 32 * 4 encoder_vocabulary_length = len(data.idx2word_history) with tf.variable_scope("encoder_sequence_length"): encoder_sequence_length = data.train_set['features'].shape[2] decoder_lstm_size = 16 * 2 decoder_embedding_size = 16 * 2 decoder_vocabulary_length = len(data.idx2word_target) with tf.variable_scope("decoder_sequence_length"): decoder_sequence_length = data.train_set['targets'].shape[1] # inference model with tf.name_scope('model'): features = tf.placeholder("int32", name='features') targets = tf.placeholder("int32", name='true_targets') use_dropout_prob = tf.placeholder("float32", name='use_dropout_prob') with tf.variable_scope("batch_size"): batch_size = tf.shape(features)[0] encoder_embedding = embedding( input=features, length=encoder_vocabulary_length, size=encoder_embedding_size, name='encoder_embedding' ) with tf.name_scope("UtterancesEncoder"): conv3 = encoder_embedding # conv3 = conv2d( # input=conv3, # filter=[1, 3, encoder_embedding_size, encoder_embedding_size], # name='conv_utt_size_3_layer_1' # ) # conv_s3 = conv2d( # input=conv_s3, # filter=[1, 3, encoder_embedding_size, encoder_embedding_size], # name='conv_utt_size_3_layer_2' # ) # print(conv3) # k = encoder_sequence_length # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1]) # print(mp_s3) # encoded_utterances = mp_s3 encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True) with tf.name_scope("HistoryEncoder"): conv3 = encoded_utterances # conv3 = conv2d( # input=conv3, # filter=[3, 1, encoder_embedding_size, encoder_embedding_size], # name='conv_hist_size_3_layer_1' # ) # conv_s3 = conv2d( # input=conv_s3, # filter=[3, 1, encoder_embedding_size, encoder_embedding_size], # name='conv_hist_size_3_layer_2' # ) # print(conv3) # k = encoder_sequence_length # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1]) # print(mp_s3) encoded_history = tf.reduce_max(conv3, [1, 2]) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=encoder_embedding_size, # name='linear_projection_1' # ) # encoded_history = tf.nn.relu(projection) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=encoder_embedding_size, # name='linear_projection_2' # ) # encoded_history = tf.nn.relu(projection) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=decoder_lstm_size * 2, # name='linear_projection_3' # ) # encoded_history = tf.nn.relu(projection) with tf.name_scope("Decoder"): use_inputs_prob = tf.placeholder("float32", name='use_inputs_prob') with tf.name_scope("RNNDecoderCell"): cell = LSTMCell( num_units=decoder_lstm_size, input_size=decoder_embedding_size+encoder_embedding_size, use_peepholes=True, ) initial_state = cell.zero_state(batch_size, tf.float32) # decode all histories along the utterance axis final_encoder_state = encoded_history decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder( cell=cell, inputs=[targets[:, word] for word in range(decoder_sequence_length)], static_input=final_encoder_state, initial_state=initial_state, #final_encoder_state, embedding_size=decoder_embedding_size, embedding_length=decoder_vocabulary_length, sequence_length=decoder_sequence_length, name='RNNDecoder', reuse=False, use_inputs_prob=use_inputs_prob ) targets_given_features = tf.concat(1, decoder_outputs_softmax) # print(p_o_i) if FLAGS.print_variables: for v in tf.trainable_variables(): print(v.name) with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(targets, decoder_vocabulary_length) loss = tf.reduce_mean(- one_hot_labels * tf.log(targets_given_features), name='loss') for v in tf.trainable_variables(): for n in ['/W_', '/W:', '/B:']: if n in v.name: print('Regularization using', v.name) loss += FLAGS.regularization * tf.reduce_mean(tf.pow(v, 2)) tf.scalar_summary('loss', loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(targets_given_features, 2)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.scalar_summary('accuracy', accuracy) self.data = data self.train_set = data.train_set self.test_set = data.test_set self.idx2word_history = data.idx2word_history self.word2idx_history = data.word2idx_history self.idx2word_target = data.idx2word_target self.word2idx_target = data.word2idx_target self.history_length = history_length self.encoder_sequence_length = encoder_sequence_length self.features = features self.targets = targets self.batch_size = batch_size self.use_inputs_prob = use_inputs_prob self.targets_given_features = targets_given_features self.loss = loss self.accuracy = accuracy
n_steps = 5 seq_width = 6 initializer = tf.random_uniform_initializer(-1,1) #sequence we will provide at runtime seq_input = tf.placeholder(tf.float32, [n_steps, batch_seq_len, seq_width]) #what timestep we want to stop at early_stop = tf.placeholder(tf.int32) #inputs for rnn needs to be a list, each item being a timestep. #we need to split our input into each timestep, and reshape it because split keeps dims by default inputs = [tf.reshape(i, (batch_seq_len, seq_width)) for i in tf.split(0, n_steps, seq_input)] cell = LSTMCell(size, seq_width, initializer=initializer) initial_state = cell.zero_state(batch_seq_len, tf.float32) outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state, sequence_length=early_stop) #set up lstm iop = tf.global_variables_initializer() #create initialize op, this needs to be run by the session! session = tf.Session() session.run(iop) #actually initialize, if you don't do this you get errors about uninitialized stuff # 4 X 10 X 5 feed = {early_stop:5, seq_input:np.random.rand(n_steps, batch_seq_len, seq_width).astype('float32')}
def __init__(self, data, FLAGS): super(Model, self).__init__(data, FLAGS) encoder_embedding_size = 16 encoder_lstm_size = 16 encoder_vocabulary_length = len(data.idx2word_history) encoder_sequence_length = data.train_set['histories'].shape[2] history_length = data.train_set['histories'].shape[1] action_templates_vocabulary_length = len(data.idx2word_action_template) with tf.name_scope('data'): batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False) batch_actions_template = tf.Variable(data.batch_actions_template, name='actions', trainable=False) histories = tf.gather(batch_histories, self.batch_idx) actions_template = tf.gather(batch_actions_template, self.batch_idx) with tf.name_scope('model'): with tf.variable_scope("batch_size"): batch_size = tf.shape(histories)[0] encoder_embedding = embedding( input=histories, length=encoder_vocabulary_length, size=encoder_embedding_size, name='encoder_embedding' ) with tf.name_scope("UtterancesEncoder"): with tf.name_scope("RNNForwardUtteranceEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardUtteranceEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=True ) initial_state_bw_1 = cell_bw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNForwardUtteranceEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) # the input data has this dimensions # [ # #batch, # #utterance in a history (a dialogue), # #word in an utterance (a sentence), # embedding dimension # ] # encode all utterances along the word axis encoder_states_2d = [] for utterance in range(history_length): encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_embedding[:, utterance, word, :] for word in range(encoder_sequence_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name='RNNUtteranceBidirectionalLayer', reuse=True if utterance > 0 else None ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name='RNNUtteranceForwardEncoder', reuse=True if utterance > 0 else None ) # print(encoder_states[-1]) encoder_states = tf.concat(1, tf.expand_dims(encoder_states[-1], 1)) # print(encoder_states) encoder_states_2d.append(encoder_states) encoder_states_2d = tf.concat(1, encoder_states_2d) # print('encoder_states_2d', encoder_states_2d) with tf.name_scope("HistoryEncoder"): # encode all histories along the utterance axis with tf.name_scope("RNNForwardHistoryEncoderCell_1"): cell_fw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_fw_1 = cell_fw_1.zero_state(batch_size, tf.float32) with tf.name_scope("RNNBackwardHistoryEncoderCell_1"): cell_bw_1 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_2.state_size, use_peepholes=True ) initial_state_bw_1 = cell_fw_2.zero_state(batch_size, tf.float32) with tf.name_scope("RNNForwardHistoryEncoderCell_2"): cell_fw_2 = LSTMCell( num_units=encoder_lstm_size, input_size=cell_fw_1.output_size + cell_bw_1.output_size, use_peepholes=True ) initial_state_fw_2 = cell_fw_2.zero_state(batch_size, tf.float32) encoder_outputs, _ = brnn( cell_fw=cell_fw_1, cell_bw=cell_bw_1, inputs=[encoder_states_2d[:, utterance, :] for utterance in range(history_length)], initial_state_fw=initial_state_fw_1, initial_state_bw=initial_state_bw_1, name='RNNHistoryBidirectionalLayer', reuse=None ) _, encoder_states = rnn( cell=cell_fw_2, inputs=encoder_outputs, initial_state=initial_state_fw_2, name='RNNHistoryForwardEncoder', reuse=None ) with tf.name_scope("Decoder"): linear_size = cell_fw_2.state_size # decode all histories along the utterance axis activation = tf.nn.relu(encoder_states[-1]) activation = tf.nn.dropout(activation, self.dropout_keep_prob) projection = linear( input=activation, input_size=linear_size, output_size=linear_size, name='linear_projection_1' ) activation = tf.nn.relu(projection) activation = tf.nn.dropout(activation, self.dropout_keep_prob) projection = linear( input=activation, input_size=linear_size, output_size=linear_size, name='linear_projection_2' ) activation = tf.nn.relu(projection) activation = tf.nn.dropout(activation, self.dropout_keep_prob) projection = linear( input=activation, input_size=linear_size, output_size=action_templates_vocabulary_length, name='linear_projection_3' ) self.predictions = tf.nn.softmax(projection, name="softmax_output") # print(self.predictions) if FLAGS.print_variables: for v in tf.trainable_variables(): print(v.name) with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(actions_template, action_templates_vocabulary_length) self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)), name='loss') tf.scalar_summary('loss', self.loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 1), tf.argmax(self.predictions, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.scalar_summary('accuracy', self.accuracy)
def __init__(self, data, FLAGS): super(Model, self).__init__(data, FLAGS) encoder_embedding_size = 32 * 4 encoder_vocabulary_length = len(data.idx2word_history) decoder_lstm_size = 16 * 2 decoder_embedding_size = 16 * 2 decoder_sequence_length = data.batch_actions.shape[2] decoder_vocabulary_length = len(data.idx2word_action) with tf.name_scope('data'): batch_histories = tf.Variable(data.batch_histories, name='histories', trainable=False) batch_actions = tf.Variable(data.batch_actions, name='actions', trainable=False) histories = tf.gather(batch_histories, self.batch_idx) actions = tf.gather(batch_actions, self.batch_idx) with tf.name_scope('model'): batch_size = tf.shape(histories)[0] encoder_embedding = embedding( input=histories, length=encoder_vocabulary_length, size=encoder_embedding_size, name='encoder_embedding' ) with tf.name_scope("UtterancesEncoder"): conv3 = encoder_embedding # conv3 = conv2d( # input=conv3, # filter=[1, 3, encoder_embedding_size, encoder_embedding_size], # name='conv_utt_size_3_layer_1' # ) # conv_s3 = conv2d( # input=conv_s3, # filter=[1, 3, encoder_embedding_size, encoder_embedding_size], # name='conv_utt_size_3_layer_2' # ) # print(conv3) # k = encoder_sequence_length # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1]) # print(mp_s3) # encoded_utterances = mp_s3 encoded_utterances = tf.reduce_max(conv3, [2], keep_dims=True) with tf.name_scope("HistoryEncoder"): conv3 = encoded_utterances # conv3 = conv2d( # input=conv3, # filter=[3, 1, encoder_embedding_size, encoder_embedding_size], # name='conv_hist_size_3_layer_1' # ) # conv_s3 = conv2d( # input=conv_s3, # filter=[3, 1, encoder_embedding_size, encoder_embedding_size], # name='conv_hist_size_3_layer_2' # ) # print(conv3) # k = encoder_sequence_length # mp_s3 = max_pool(conv_s3, ksize=[1, 1, k, 1], strides=[1, 1, k, 1]) # print(mp_s3) encoded_history = tf.reduce_max(conv3, [1, 2]) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=encoder_embedding_size, # name='linear_projection_1' # ) # encoded_history = tf.nn.relu(projection) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=encoder_embedding_size, # name='linear_projection_2' # ) # encoded_history = tf.nn.relu(projection) # projection = linear( # input=encoded_history, # input_size=encoder_embedding_size, # output_size=decoder_lstm_size * 2, # name='linear_projection_3' # ) # encoded_history = tf.nn.relu(projection) with tf.name_scope("Decoder"): with tf.name_scope("RNNDecoderCell"): cell = LSTMCell( num_units=decoder_lstm_size, input_size=decoder_embedding_size + encoder_embedding_size, use_peepholes=True, ) initial_state = cell.zero_state(batch_size, tf.float32) # decode all histories along the utterance axis final_encoder_state = encoded_history decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder( cell=cell, inputs=[actions[:, word] for word in range(decoder_sequence_length)], static_input=final_encoder_state, initial_state=initial_state, # final_encoder_state, embedding_size=decoder_embedding_size, embedding_length=decoder_vocabulary_length, sequence_length=decoder_sequence_length, name='RNNDecoder', reuse=False, use_inputs_prob=self.use_inputs_prob ) self.predictions = tf.concat(1, decoder_outputs_softmax) if FLAGS.print_variables: for v in tf.trainable_variables(): print(v.name) with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(actions, decoder_vocabulary_length) self.loss = tf.reduce_mean(- one_hot_labels * tf.log(tf.clip_by_value(self.predictions, 1e-10, 1.0)), name='loss') tf.scalar_summary('loss', self.loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(self.predictions, 2)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.scalar_summary('accuracy', self.accuracy)
def train(train_set, test_set, idx2word, word2idx): embedding_size = 5 vocabulary_length = len(idx2word) sequence_size = train_set['features'].shape[1] lstm_size = 5 # inference model with tf.name_scope('model'): i = tf.placeholder("int32", name='input') o = tf.placeholder("int32", name='true_output') with tf.variable_scope("batch_size"): batch_size = tf.shape(i)[0] e = embedding( input=i, length=vocabulary_length, size=embedding_size, name='embedding' ) with tf.name_scope("RNNCell"): cell = LSTMCell(lstm_size, input_size=embedding_size) state = cell.zero_state(batch_size, tf.float32) outputs, states = rnn( cell=cell, inputs=[e[:, j, :] for j in range(sequence_size)], initial_state=state, name='RNN' ) final_state = states[-1] l = linear( input=final_state, input_size=cell.state_size, output_size=vocabulary_length, name='linear' ) p_o_i = tf.nn.softmax(l, name="softmax_output") with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(o, vocabulary_length) loss = tf.reduce_mean(-one_hot_labels * tf.log(p_o_i), name='loss') tf.scalar_summary('loss', loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 1), tf.argmax(p_o_i, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) tf.scalar_summary('accuracy', accuracy) with tf.Session() as sess: # Merge all the summaries and write them out to ./log merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('./log', sess.graph_def) saver = tf.train.Saver() # training train_op = tf.train.AdamOptimizer(FLAGS.learning_rate, name='trainer').minimize(loss) tf.initialize_all_variables().run() for epoch in range(FLAGS.max_epochs): sess.run(train_op, feed_dict={i: train_set['features'], o: train_set['targets']}) if epoch % max(int(FLAGS.max_epochs / 100), 1) == 0: summary, lss, acc = sess.run([merged, loss, accuracy], feed_dict={i: test_set['features'], o: test_set['targets']}) writer.add_summary(summary, epoch) print() print('Epoch: {epoch}'.format(epoch=epoch)) print(' - accuracy = {acc}'.format(acc=acc)) print(' - loss = {lss}'.format(lss=lss)) save_path = saver.save(sess, "model.ckpt") print() print("Model saved in file: %s" % save_path) print() print('Test features') print(test_set['features']) print('Test targets') print(test_set['targets']) # print('Predictions') p_o_i = sess.run(p_o_i, feed_dict={i: test_set['features'], o: test_set['targets']}) # print(p_o_i) print('Argmax predictions') print(np.argmax(p_o_i, 1).reshape((-1, 1)))
def train(train_set, test_set, idx2word, word2idx): encoder_lstm_size = 5 encoder_embedding_size = 5 encoder_vocabulary_length = len(idx2word) encoder_sequence_length = train_set['features'].shape[1] decoder_lstm_size = 5 decoder_embedding_size = 5 decoder_vocabulary_length = len(idx2word) decoder_sequence_length = train_set['targets'].shape[1] # inference model with tf.name_scope('model'): i = tf.placeholder("int32", name='input') o = tf.placeholder("int32", name='true_output') with tf.variable_scope("batch_size"): batch_size = tf.shape(i)[0] encoder_embedding = embedding( input=i, length=encoder_vocabulary_length, size=encoder_embedding_size, name='encoder_embedding' ) with tf.name_scope("RNNEncoderCell"): cell = LSTMCell( num_units=encoder_lstm_size, input_size=encoder_embedding_size, use_peepholes=False ) initial_state = cell.zero_state(batch_size, tf.float32) encoder_outputs, encoder_states = rnn( cell=cell, inputs=[encoder_embedding[:, j, :] for j in range(encoder_sequence_length)], initial_state=initial_state, name='RNNForwardEncoder' ) final_encoder_state = encoder_states[-1] with tf.name_scope("RNNDecoderCell"): cell = LSTMCell( num_units=decoder_lstm_size, input_size=decoder_embedding_size, use_peepholes=False, ) decoder_states, decoder_outputs, decoder_outputs_softmax = rnn_decoder( cell=cell, initial_state=final_encoder_state, embedding_size=decoder_embedding_size, embedding_length=decoder_vocabulary_length, sequence_length=decoder_sequence_length, name='RNNDecoder' ) p_o_i = tf.concat(1, decoder_outputs_softmax) with tf.name_scope('loss'): one_hot_labels = dense_to_one_hot(o, decoder_vocabulary_length) loss = tf.reduce_mean(-one_hot_labels * tf.log(p_o_i), name='loss') # loss = tf.constant(0.0, dtype=tf.float32) tf.scalar_summary('loss', loss) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(one_hot_labels, 2), tf.argmax(p_o_i, 2)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) # accuracy = tf.constant(0.0, dtype=tf.float32) tf.scalar_summary('accuracy', accuracy) with tf.Session() as sess: # Merge all the summaries and write them out to ./log merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter('./log', sess.graph_def) saver = tf.train.Saver() # training train_op = tf.train.AdamOptimizer(FLAGS.learning_rate, name='trainer').minimize(loss) tf.initialize_all_variables().run() for epoch in range(FLAGS.max_epochs): sess.run(train_op, feed_dict={i: train_set['features'], o: train_set['targets']}) if epoch % max(int(FLAGS.max_epochs / 100), 1) == 0: summary, lss, acc = sess.run([merged, loss, accuracy], feed_dict={i: test_set['features'], o: test_set['targets']}) writer.add_summary(summary, epoch) print() print('Epoch: {epoch}'.format(epoch=epoch)) print(' - accuracy = {acc}'.format(acc=acc)) print(' - loss = {lss}'.format(lss=lss)) save_path = saver.save(sess, "model.ckpt") print() print("Model saved in file: %s" % save_path) print() print('Test features') print(test_set['features']) print('Test targets') print('Shape of targets:', test_set['targets'].shape) print(test_set['targets']) print('Predictions') p_o_i = sess.run(p_o_i, feed_dict={i: test_set['features'], o: test_set['targets']}) p_o_i_argmax = np.argmax(p_o_i, 2) print('Shape of predictions:', p_o_i.shape) print('Argmax predictions') print(p_o_i_argmax) print() for i in range(p_o_i_argmax.shape[0]): for j in range(p_o_i_argmax.shape[1]): w = idx2word[p_o_i_argmax[i, j]] if w not in ['_SOS_', '_EOS_']: print(w, end=' ') print()