def __init__(self, hidden_num, inputs, cell=None, optimizer=None, reverse=True, decode_without_input=False): """ Args: hidden_num : number of hidden elements of each LSTM unit. inputs : a list of input tensors with size (batch_num x elem_num) cell : an rnn cell object (the default option is `tf.python.ops.rnn_cell.LSTMCell`) optimizer : optimizer for rnn (the default option is `tf.train.AdamOptimizer`) reverse : Option to decode in reverse order. decode_without_input : Option to decode without input. """ self.batch_num = inputs[0].get_shape().as_list()[0] self.elem_num = inputs[0].get_shape().as_list()[1] if cell is None: self._enc_cell = LSTMCell(hidden_num) self._dec_cell = LSTMCell(hidden_num) else : self._enc_cell = cell self._dec_cell = cell with tf.variable_scope('encoder'): self.z_codes, self.enc_state = tf.nn.rnn( self._enc_cell, inputs, dtype=tf.float32) with tf.variable_scope('decoder') as vs: dec_weight_ = tf.Variable( tf.truncated_normal([hidden_num, self.elem_num], dtype=tf.float32), name="dec_weight") dec_bias_ = tf.Variable( tf.constant(0.1, shape=[self.elem_num], dtype=tf.float32), name="dec_bias") # if decode_without_input: # dec_inputs = [tf.zeros(tf.shape(inputs[0]), dtype=tf.float32) # for _ in range(len(inputs))] # dec_outputs, dec_state = tf.nn.rnn( # self._dec_cell, dec_inputs, # initial_state=self.enc_state, dtype=tf.float32) """the shape of each tensor dec_output_ : (step_num x hidden_num) dec_weight_ : (hidden_num x elem_num) dec_bias_ : (elem_num) output_ : (step_num x elem_num) input_ : (step_num x elem_num) """ # if reverse: # dec_outputs = dec_outputs[::-1] # dec_output_ = tf.transpose(tf.pack(dec_outputs), [1,0,2]) # dec_weight_ = tf.tile(tf.expand_dims(dec_weight_, 0), [self.batch_num,1,1]) # self.output_ = tf.batch_matmul(dec_output_, dec_weight_) + dec_bias_ else :
def __init__(self, num_units, state_is_tuple=True, cell_type='lstm', scope='bi_rnn'): self.num_units = num_units if cell_type == 'gru': self.cell_fw = GRUCell(self.num_units) self.cell_bw = GRUCell(self.num_units) else: # default self.cell_fw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple) self.cell_bw = LSTMCell(self.num_units, state_is_tuple=state_is_tuple) self.scope = scope
def __init__(self, num_layers, num_units, scope='stacked_bi_rnn'): self.num_layers = num_layers self.num_units = num_units self.cells_fw = [ LSTMCell(self.num_units) for _ in range(self.num_layers) ] self.cells_bw = [ LSTMCell(self.num_units) for _ in range(self.num_layers) ] self.scope = scope
def __init__(self, num_layers, num_units, cell_type='lstm', scope='stacked_bi_rnn'): self.num_layers = num_layers self.num_units = num_units if cell_type == 'gru': self.cells_fw = [GRUCell(self.num_units) for _ in range(self.num_layers)] self.cells_bw = [GRUCell(self.num_units) for _ in range(self.num_layers)] else: # default self.cells_fw = [LSTMCell(self.num_units) for _ in range(self.num_layers)] self.cells_bw = [LSTMCell(self.num_units) for _ in range(self.num_layers)] self.scope = scope
def build_decoder_cell(rank, u_emb, batch_size, depth=2): cell = [] for i in range(depth): if i == 0: cell.append(LSTMCell(rank, state_is_tuple=True)) else: cell.append(ResidualWrapper(LSTMCell(rank, state_is_tuple=True))) initial_state = LSTMStateTuple(tf.zeros_like(u_emb), u_emb) initial_state = [initial_state, ] for i in range(1, depth): initial_state.append(cell[i].zero_state(batch_size, tf.float32)) return MultiRNNCell(cell), tuple(initial_state)
def getCell(self, is_training, dp, config): # code for RNN if is_training == True: print("==> Construct ", config.cell_type, " graph for training") else: print("==> Construct ", config.cell_type, " graph for testing") if config.cell_type == "LSTM": if config.num_layer == 1: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = LSTMCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = LSTMCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("config.num_layer should be 1:2 ") elif config.cell_type == "RNN": if config.num_layer == 1: basicCell = BasicRNNCell(config.hidden_size) elif config.num_layer == 2: basicCell = BasicRNNCell(config.hidden_size) basicCell_2 = BasicRNNCell(config.hidden_size_2) else: raise ValueError("config.num_layer should be [1-3] ") elif config.cell_type == "GRU": if config.num_layer == 1: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) elif config.num_layer == 2: basicCell = GRUCell(config.hidden_size, forget_bias=0.0, state_is_tuple=True) basicCell_2 = GRUCell(config.hidden_size_2, forget_bias=0.0, state_is_tuple=True) else: raise ValueError("only support 1-2 layers ") else: raise ValueError("cell type should be GRU,LSTM,RNN") # add dropout layer between hidden layers if is_training and config.keep_prob < 1: if config.num_layer == 1: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) elif config.num_layer == 2: basicCell = DropoutWrapper(basicCell, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) basicCell_2 = DropoutWrapper(basicCell_2, input_keep_prob=config.keep_prob, output_keep_prob=config.keep_prob) else: pass if config.num_layer == 1: cell = rnn_cell.MultiRNNCell([basicCell], state_is_tuple=True) elif config.num_layer == 2: cell = rnn_cell.MultiRNNCell([basicCell, basicCell_2], state_is_tuple=True) return cell
def inference_layer(self, inputs): if self.dblstm: with tf.name_scope('deep_bidirectional_rnn'): rnn_outputs, _ = deep_bidirectional_dynamic_rnn( [self._dblstm_cell() for _ in range(self.num_layers)], inputs, sequence_length=self.sequence_lengths) state_dim = self.state_dim else: cell_fw = DropoutWrapper(LSTMCell(num_units=self.state_dim), variational_recurrent=True, state_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob, dtype=tf.float32) cell_bw = DropoutWrapper(LSTMCell(num_units=self.state_dim), variational_recurrent=True, state_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob, dtype=tf.float32) with tf.name_scope('bidirectional_rnn'): rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, inputs, sequence_length=self.sequence_lengths, dtype=tf.float32) rnn_outputs = tf.concat(rnn_outputs, 2) state_dim = self.state_dim * 2 with tf.name_scope('linear_projection'): softmax_weights = tf.get_variable( 'softmax_W', [state_dim, self.num_classes], initializer=tf.random_normal_initializer(0, 0.01)) softmax_bias = tf.get_variable('softmax_b', [self.num_classes], initializer=tf.zeros_initializer) time_steps = tf.shape(rnn_outputs)[1] rnn_outputs = tf.reshape( rnn_outputs, [-1, state_dim], name="flatten_rnn_outputs_for_linear_projection") logits = tf.nn.xw_plus_b(x=rnn_outputs, weights=softmax_weights, biases=softmax_bias, name="softmax_projection") self.scores = tf.reshape(logits, [-1, time_steps, self.num_classes], name="unflatten_logits") if self.crf: self.transition_params = tf.get_variable( "transitions", [self.num_classes, self.num_classes])
def _build_pre(self): self.dimA = 20 self.cellA = MultiRNNCell([LSTMCell(self.dimA)] * 2) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def __init__(self, num_units, memory, pmemory, cell_type='lstm'): super(AttentionCell, self).__init__() self._cell = LSTMCell(num_units) self.num_units = num_units self.memory = memory self.pmemory = pmemory self.mem_units = memory.get_shape().as_list()[-1]
def impress(self, state_code, pre_impress_states): # LSTM, 3 layers self.impress_lay_num = 3 with tf.variable_scope('impress', reuse=tf.AUTO_REUSE): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None:#time = 0 # initialization input = state_code state = state_ emit_output = None loop_state = None else: input = cell_output emit_output = cell_output state = cell_state loop_state = None elements_finished = (time >= 1) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell([LSTMCell(self.impress_dim) for _ in range(self.impress_lay_num)], state_is_tuple=True) if pre_impress_states == None: state_ = (multirnn_cell.zero_state(self.batch_size, tf.float32)) else: state_ = pre_impress_states emit_ta, states, final_loop_state = tf.nn.raw_rnn(multirnn_cell, loop_fn) state_impress_code = tf.transpose(emit_ta.stack(), [1, 0, 2])[0] # transpose for putting batch dimension to first dimension return state_impress_code, final_loop_state
def Encoder(self, xs): encoder_input = tf.one_hot(tf.cast(xs, tf.int32), self.val_size_x) encoder_input = self.WordEmb(encoder_input) if self.args.train: inputs_length = self.inputs_length_PH elif self.args.test: inputs_length = self.inputs_length_test_PH multirnn_cell = MultiRNNCell([LSTMCell(self.encoder_units) for _ in range(self.encoder_lay_Num)], state_is_tuple=True) (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = ( tf.nn.bidirectional_dynamic_rnn(cell_fw=multirnn_cell, cell_bw=multirnn_cell, inputs=encoder_input, sequence_length=inputs_length, dtype=self.dtype)) sentence_code = tf.concat((fw_outputs, bw_outputs), axis = 2) sentence_code_ = [] for i in range(self.batch_size): sentence_code_.append(sentence_code[i,inputs_length[i]-1,:]) encoder_output = tf.stack(sentence_code_) encoder_output = tf.layers.dense(inputs=encoder_output, units=self.encoder_units, activation=tf.nn.relu) return encoder_output
def RNN(_X, _weights, _biases, lens): if FLAGS.unit == "PLSTM": cell = PhasedLSTMCell(FLAGS.n_hidden, use_peepholes=True, state_is_tuple=True) elif FLAGS.unit == "GRU": cell = GRUCell(FLAGS.n_hidden) elif FLAGS.unit == "LSTM": cell = LSTMCell(FLAGS.n_hidden, use_peepholes=True, state_is_tuple=True) else: raise ValueError("Unit '{}' not implemented.".format(FLAGS.unit)) outputs = multiPLSTM(_X, lens, FLAGS.n_layers, FLAGS.n_hidden, n_input) outputs = tf.slice(outputs, [0, 0, 0], [-1, -1, FLAGS.n_hidden]) # TODO better (?) in lack of smart indexing batch_size = tf.shape(outputs)[0] max_len = tf.shape(outputs)[1] out_size = int(outputs.get_shape()[2]) index = tf.range(0, batch_size) * max_len + (lens - 1) flat = tf.reshape(outputs, [-1, out_size]) relevant = tf.gather(flat, index) return tf.nn.bias_add(tf.matmul(relevant, _weights['out']), _biases['out'])
def s2v(self): sqrt3 = math.sqrt(3.0) initializer = tf.random_uniform_initializer(-sqrt3, sqrt3, dtype=self.dtype) # word embedding layer if self.pre_trained_word_emb is not None: self.word_embeddings = tf.get_variable( name='word_embedding', initializer=self.pre_trained_word_emb, dtype=self.dtype) else: self.word_embeddings = tf.get_variable( name='word_embedding', shape=[self.voc_size, self.emb_size], initializer=initializer, dtype=self.dtype) self.embedded_sentence = tf.nn.embedding_lookup( self.word_embeddings, self.sentence) self.embedded_sentence = tf.nn.dropout( self.embedded_sentence, keep_prob=self.keep_word_prob_placeholder) # create the rnn cell if self.rnn_cell_type.lower() == 'gru': rnn_cell = GRUCell else: rnn_cell = LSTMCell rnn_cell = rnn_cell(self.hidden_units) if self.use_lstm_dropout: rnn_cell = DropoutWrapper( rnn_cell, dtype=tf.float32, output_keep_prob=self.keep_lstm_prob_placeholder) if self.rnn_model == 'leap-lstm': self.sentence_emb, self.skip_dis_output = self.leap_lstm(rnn_cell) elif self.rnn_model == 'rnn': if self.rnn_pattern == 1: self.sentence_emb = self.general_rnn(rnn_cell, out='LAST') else: self.sentence_emb = self.general_rnn_for_pattern( rnn_cell, out='LAST') # for test the training time elif self.rnn_model == 'brnn': self.sentence_emb = self.general_brnn() elif self.rnn_model == 'skip-rnn-2017': self.sentence_emb, self.budget_loss, self.updated_states, self.rnn_final_states, self.rnn_outputs = self.skip_rnn_2017( ) elif self.rnn_model == 'skim-rnn': small_rnn_cell = LSTMCell(5) # small size 5 small_rnn_cell = DropoutWrapper( small_rnn_cell, dtype=tf.float32, output_keep_prob=self.keep_lstm_prob_placeholder) self.sentence_emb, self.skip_dis_output, self.skim_loss = self.skim_rnn( rnn_cell, small_rnn_cell) # skim-rnn的设定直接按照github上源码来就可以了 else: print("bad rnn model!") exit()
def _build_pre(self, size): self.dimA = size self.num_of_layers = 2 self.cellA = MultiRNNCell([LSTMCell(num_units=self.dimA) for _ in range(self.num_of_layers)]) self.b1 = 0.95 self.b2 = 0.95 self.lr = 0.1 self.eps = 1e-8
def Decoder(self, encoder_output): def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: #time = 0 # initialization input = tf.concat((encoder_output, encoder_output), axis=1) state = (multirnn_cell.zero_state(self.batch_size, tf.float32)) emit_output = None loop_state = None elements_finished = False else: emit_output = cell_output if self.args.test: #decoder_units to val_size transformed_output = tf.nn.xw_plus_b( cell_output, self.decoder_W, self.decoder_b) #decoder_units to vac_size #argmax transformed_output = tf.argmax(transformed_output, 1) transformed_output = tf.one_hot(transformed_output, self.val_size, on_value=1.0, off_value=0.0, axis=-1) #val_size to decoder_units//2 transformed_output = self.WordEmb(transformed_output) elif self.args.train: ys_onehot = tf.one_hot(self.ys_PH[:, (time - 1)], self.val_size, on_value=1.0, off_value=0.0, axis=-1) transformed_output = self.WordEmb(ys_onehot) input = tf.concat([transformed_output, encoder_output], axis=1) state = cell_state loop_state = None elements_finished = (time >= self.max_len) return (elements_finished, input, state, emit_output, loop_state) multirnn_cell = MultiRNNCell( [LSTMCell(self.decoder_units) for _ in range(self.lay_num)], state_is_tuple=True) emit_ta, final_state, final_loop_state = tf.nn.raw_rnn( multirnn_cell, loop_fn) # transpose for putting batch dimension to first dimension outputs = tf.transpose(emit_ta.stack(), [1, 0, 2]) #transform decoder_units to val_size decoder_output_flat = tf.reshape(outputs, [-1, self.decoder_units]) decoder_output_transform_flat = tf.nn.xw_plus_b( decoder_output_flat, self.decoder_W, self.decoder_b) decoder_logits = tf.reshape( decoder_output_transform_flat, (self.batch_size, self.max_len, self.val_size)) return decoder_logits
def _create_rnn_cell(self): cell = GRUCell( self.cfg.num_units) if self.cfg.cell_type == "gru" else LSTMCell( self.cfg.num_units) if self.cfg.use_dropout: cell = DropoutWrapper(cell, output_keep_prob=self.keep_prob) if self.cfg.use_residual: cell = ResidualWrapper(cell) return cell
def _create_single_rnn_cell(self, num_units): cell = GRUCell( num_units) if self.cfg["cell_type"] == "gru" else LSTMCell( num_units) if self.cfg["use_dropout"]: cell = DropoutWrapper(cell, output_keep_prob=self.rnn_keep_prob) if self.cfg["use_residual"]: cell = ResidualWrapper(cell) return cell
def _build_model_op(self): with tf.variable_scope('encoder'): cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self.word_embeddings, self.seq_lengths) enc_outputs = tf.concat(outputs, axis=-1) print('encoder output shape: {}'.format( enc_outputs.get_shape().as_list())) '''with tf.variable_scope('attention'): attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( num_units=self.cfg.num_units, memory=enc_outputs, memory_sequence_length=self.seq_lengths) cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) attn_cell_fw = tf.contrib.seq2seq.AttentionWrapper(cell_fw, attention_mechanism) attn_cell_bw = tf.contrib.seq2seq.AttentionWrapper(cell_bw, attention_mechanism) outputs, _ = bidirectional_dynamic_rnn(attn_cell_fw, attn_cell_bw, enc_outputs, self.seq_lengths) attn_outputs = tf.concat(outputs, axis=-1) print('bidirectional attention output shape: {}'.format(attn_outputs.get_shape().as_list()))''' with tf.variable_scope('self_attention'): self_att = dot_attention(enc_outputs, enc_outputs, self.cfg.num_units, keep_prob=self.keep_prob, is_train=self.is_train) print('self-attention output shape: {}'.format( self_att.get_shape().as_list())) '''with tf.variable_scope('decoder'): cell_fw = LSTMCell(num_units=self.cfg.num_units) cell_bw = LSTMCell(num_units=self.cfg.num_units) outputs, _ = bidirectional_dynamic_rnn(cell_fw, cell_bw, self_att, self.seq_lengths) dec_outputs = tf.concat(outputs, axis=-1) print('decoder output shape: {}'.format(dec_outputs.get_shape().as_list()))''' with tf.variable_scope('project'): self.logits = dense(self_att, self.cfg.tag_vocab_size, use_bias=True) print('projected output (logits) shape: {}'.format( self.logits.get_shape().as_list()))
def generator(z, hidden_units_g, seq_length, batch_size, num_generated_features, reuse=False, parameters=None, cond_dim=0, c=None, learn_scale=True): """ If parameters are supplied, initialise as such """ with tf.variable_scope("generator") as scope: if reuse: scope.reuse_variables() if parameters is None: W_out_G_initializer = tf.truncated_normal_initializer() b_out_G_initializer = tf.truncated_normal_initializer() scale_out_G_initializer = tf.constant_initializer(value=1.0) lstm_initializer = None bias_start = 1.0 else: W_out_G_initializer = tf.constant_initializer(value=parameters['generator/W_out_G:0']) b_out_G_initializer = tf.constant_initializer(value=parameters['generator/b_out_G:0']) try: scale_out_G_initializer = tf.constant_initializer(value=parameters['generator/scale_out_G:0']) except KeyError: scale_out_G_initializer = tf.constant_initializer(value=1) assert learn_scale lstm_initializer = tf.constant_initializer(value=parameters['generator/rnn/lstm_cell/weights:0']) bias_start = parameters['generator/rnn/lstm_cell/biases:0'] W_out_G = tf.get_variable(name='W_out_G', shape=[hidden_units_g, num_generated_features], initializer=W_out_G_initializer) b_out_G = tf.get_variable(name='b_out_G', shape=num_generated_features, initializer=b_out_G_initializer) scale_out_G = tf.get_variable(name='scale_out_G', shape=1, initializer=scale_out_G_initializer, trainable=learn_scale) if cond_dim > 0: # CGAN! assert not c is None repeated_encoding = tf.stack([c]*seq_length, axis=1) inputs = tf.concat([z, repeated_encoding], axis=2) #repeated_encoding = tf.tile(c, [1, tf.shape(z)[1]]) #repeated_encoding = tf.reshape(repeated_encoding, [tf.shape(z)[0], tf.shape(z)[1], cond_dim]) #inputs = tf.concat([repeated_encoding, z], 2) else: inputs = z cell = LSTMCell(num_units=hidden_units_g, state_is_tuple=True, initializer=lstm_initializer, reuse=reuse) rnn_outputs, rnn_states = tf.nn.dynamic_rnn( cell=cell, dtype=tf.float32, sequence_length=[seq_length]*batch_size, inputs=inputs) rnn_outputs_2d = tf.reshape(rnn_outputs, [-1, hidden_units_g]) logits_2d = tf.matmul(rnn_outputs_2d, W_out_G) + b_out_G # output_2d = tf.multiply(tf.nn.tanh(logits_2d), scale_out_G) output_2d = tf.nn.tanh(logits_2d) output_3d = tf.reshape(output_2d, [-1, seq_length, num_generated_features]) return output_3d
def __init__(self, num_layers, num_units, cell_type='lstm', scope='stack_bi_rnn'): if type(num_units) == list: assert len( num_units ) == num_layers, "if num_units is a list, then its size should equal to num_layers" self.cells_fw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units[i]) for i in range(num_layers)] self.cells_bw = [LSTMCell(num_units[i]) for i in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units[i]) for i in range(num_layers)] else: self.cells_fw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units) for _ in range(num_layers)] self.cells_bw = [LSTMCell(num_units) for _ in range(num_layers)] if cell_type == 'lstm' else \ [GRUCell(num_units) for _ in range(num_layers)] self.num_layers = num_layers self.scope = scope
def __init__(self, cfg): # fed by 'feed_dict' self.context = tf.placeholder(name='context', shape=[None, None], dtype=tf.int32) self.seq_len = tf.placeholder(name='sequence_length', shape=[None], dtype=tf.int32) self.labels = tf.placeholder(name='labels', shape=[None, cfg.num_classes], dtype=tf.float32) self.lr = tf.placeholder(name='learning_rate', dtype=tf.float32) with tf.device('/gpu:0'): with tf.variable_scope('context_lookup_table'): with open(params['default_word_emb_pkl_path'], 'rb') as f: word_emb = pickle.load(f) word_embeddings = tf.constant(word_emb, dtype=tf.float32) # make lookup table for given review context context_emb = tf.nn.embedding_lookup(word_embeddings, self.context) with tf.variable_scope('context_representation'): cell_fw = LSTMCell(num_units = cfg.num_units) cell_bw = LSTMCell(num_units = cfg.num_units) h,_ = bidirectional_dynamic_rnn(cell_fw, cell_bw, context_emb, sequence_length=self.seq_len, dtype=tf.float32, time_major=False) #concat forward and backward hidden states h = tf.concat(h, axis=-1) h = self.self_attention(h) weight = tf.get_variable(name='weight', shape=[2*cfg.num_units, 2*cfg.num_units], dtype=tf.float32) ### h = tf.nn.tanh(tf.matmul(h, weight)) with tf.variable_scope('compute_logits'): context_logits = self.ffn_layer(h, cfg.hidden_units, cfg.num_classes, scope='ffn_layer') with tf.variable_scope('compute_loss'): self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=context_logits, labels=self.labels)) self.train_op = tf.train.AdamOptimizer(learning_rate=self.lr).minimize(self.loss) with tf.variable_scope('accuracy'): #pred is 0 (neg) or 1 (pos) self.pred = tf.argmax(tf.nn.softmax(context_logits),1,name='prediction') num_correct_pred = tf.equal(self.pred, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_mean(tf.cast(num_correct_pred, tf.float32))
def stacked_rnn_step(input_vocabulary_size, hidden_size=13, emb_dim=11, n_layers=2, variable_scope='encdec'): with tf.variable_scope(variable_scope, reuse=None): rnn_cell = MultiRNNCell([LSTMCell(hidden_size)] * n_layers) # stacked LSTM proj_wrapper = InputProjectionWrapper(rnn_cell, emb_dim) embedding_wrapper = EmbeddingWrapper(proj_wrapper, input_vocabulary_size, emb_dim) return embedding_wrapper
def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0, is_train=None, scope="native_lstm", activation=tf.nn.tanh): self.num_layers = num_layers self.grus = [] self.inits = [] self.dropout_mask = [] self.scope = scope for layer in range(num_layers): input_size_ = input_size if layer == 0 else 2 * num_units gru_fw = LSTMCell(num_units, activation=activation) gru_bw = LSTMCell(num_units, activation=activation) # init_fw = tf.tile(tf.Variable( # tf.zeros([1, num_units])), [batch_size, 1]) # init_bw = tf.tile(tf.Variable( # tf.zeros([1, num_units])), [batch_size, 1]) mask_fw = Dropout(tf.ones([batch_size, 1, input_size_], dtype=tf.float32), keep_prob=keep_prob, is_train=is_train, mode='') mask_bw = Dropout(tf.ones([batch_size, 1, input_size_], dtype=tf.float32), keep_prob=keep_prob, is_train=is_train, mode='') self.grus.append(( gru_fw, gru_bw, )) self.dropout_mask.append(( mask_fw, mask_bw, ))
def context_representation(inputs, seq_len, num_units, activation=tf.nn.tanh, use_bias=False, reuse=None, name="context_rep"): with tf.variable_scope(name, reuse=reuse, dtype=tf.float32): cell_fw = LSTMCell(num_units=num_units) cell_bw = LSTMCell(num_units=num_units) context_features, _ = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, sequence_length=seq_len, dtype=tf.float32, time_major=False, scope="bidirectional_dynamic_rnn") context_features = tf.concat(context_features, axis=-1) # self-attention context_features = self_attention(context_features, num_units=num_units, return_alphas=False, reuse=reuse, name="self_attention") # dense layer project context_features = tf.layers.dense( context_features, units=num_units, use_bias=use_bias, kernel_initializer=tf.glorot_uniform_initializer(), activation=activation, name="context_project") return context_features
def model(data, weights, biases): cell = LSTMCell(NUM_NEURONS) # Or LSTMCell(num_neurons) cell = MultiRNNCell([cell] * NUM_LAYERS) output, _ = tf.nn.rnn(cell, train_data_node, dtype=DATA_TYPE) output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) out_size = int(train_labels_node.get_shape()[1]) prediction = tf.nn.softmax( tf.matmul(last, weights['out']) + biases['out']) # cross_entropy = -tf.reduce_sum(train_labels_node * tf.log(prediction)) return prediction
def encoder(self): ####Encoder with tf.variable_scope(self.model_name + "encoder_model"): if self.Bidirection == False: encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(self.num_units) self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn( cell=encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, time_major=False, dtype=tf.float32) self.hidden_units = self.num_units elif self.Bidirection == True: encoder_cell_fw = LSTMCell(self.num_units) encoder_cell_bw = LSTMCell(self.num_units) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell_fw, cell_bw=encoder_cell_bw, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, dtype=tf.float32, time_major=False)) # Concatenates tensors along one dimension. encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) # TF Tuple used by LSTM Cells for state_size, zero_state, and output state. self.encoder_final_state = LSTMStateTuple( c=encoder_final_state_c, h=encoder_final_state_h) self.hidden_units = 2 * self.num_units
def __init__(self, feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer): super(LSTM4Rec, self).__init__(feature_size, eb_dim, hidden_size, max_len_item, max_len_user, item_part_fnum, user_part_fnum, use_hist_u, use_hist_i, emb_initializer) # RNN layer with tf.name_scope('item_rnn'): _, item_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.item_part_emb, sequence_length=self.item_len_ph, dtype=tf.float32, scope='lstm1') item_part = item_part_final_state with tf.name_scope('user_rnn'): _, user_part_final_state = tf.nn.dynamic_rnn(LSTMCell(hidden_size, state_is_tuple=False), inputs=self.user_part_emb, sequence_length=self.user_len_ph, dtype=tf.float32, scope='lstm2') user_part = user_part_final_state if use_hist_i and use_hist_u: inp = tf.concat([item_part, user_part], axis=1) elif use_hist_i and not use_hist_u: inp = item_part elif not use_hist_i and use_hist_u: inp = user_part # fully connected layer self.build_fc_net(inp) self.build_loss()
def build_graph(self): with tf.variable_scope('lstm'): lstm_cell = LSTMCell(self.layer_size) rnn_cell = MultiRNNCell([lstm_cell] * self.layers) cell_output, self.init_state = rnn_cell(self.model_input, self.init_state) print("%i layers created" % self.layers) self.output_layer = self.__add_output_layer( "fc_out", cell_output, self.layer_size, self.output_dim) self.output_layer = tf.Print( self.output_layer, [self.output_layer, tf.convert_to_tensor(self.ground_truth)], 'Value of output layer and ground truth:', summarize=6) tf.histogram_summary('lstm_output', self.output_layer) return self.output_layer
def Encoder(self): # a list that length is batch_size, every element refers to the time_steps of corresponding input inputs_length = tf.fill([tf.shape(self.xs)[0]], self.input_timestep) rnn_cell = LSTMCell(self.encoder_units) # use bidirectional rnn as encoder architecture (fw_outputs, bw_outputs), (fw_final_state, bw_final_state) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=rnn_cell, cell_bw=rnn_cell, inputs=self.xs, sequence_length=inputs_length, dtype=self.dtype)) # merge every forward and backward output as total output output = tf.add(fw_outputs, bw_outputs) / 2 # merge every forward and backward final state as final state state_c = tf.concat([fw_final_state.c, bw_final_state.c], axis=1) state_h = tf.concat([fw_final_state.h, bw_final_state.h], axis=1) final_state = LSTMStateTuple(c=state_c, h=state_h) return output, final_state
def build_decoder_cell(self): # No beam search currently # Attention # TODO: other attention mechanism? attention_mechanism = BahdanauAttention( num_units=self.config.hidden_units, memory=self.encoder_outputs, memory_sequence_length=self.encoder_inputs_length) decoder_cells = [LSTMCell(self.config.hidden_units) ] * self.config.decoder_depth decoder_initial_state = list(self.encoder_last_state) def attn_decoder_input_fn(inputs, attention): if not self.config.attn_input_feeding: return inputs # Essential when use_residual=True _input_layer = Dense(self.config.hidden_units, dtype=tf.float32, name='attn_input_feeding') return _input_layer(concat([inputs, attention], -1)) #Add an attentionWrapper in the lastest layer of decoder decoder_cells[-1] = AttentionWrapper( cell=decoder_cells[-1], attention_mechanism=attention_mechanism, attention_layer_size=self.config.hidden_units, cell_input_fn=attn_decoder_input_fn, initial_cell_state=decoder_initial_state[-1], alignment_history=False, name='Attention_Wrapper') decoder_initial_state[-1] = decoder_cells[-1].zero_state( batch_size=self.batch_size, dtype=tf.float32) decoder_initial_state = tuple(decoder_initial_state) return MultiRNNCell(decoder_cells), decoder_initial_state