def _create_rnn_cell(n_neurons, n_layers, keep_prob): """Summary Parameters ---------- n_neurons : TYPE Description n_layers : TYPE Description keep_prob : TYPE Description Returns ------- TYPE Description """ import tensorflow.contrib.rnn as rnn cell_fw = rnn.LayerNormBasicLSTMCell(num_units=n_neurons, dropout_keep_prob=keep_prob) # Build deeper recurrent net if using more than 1 layer if n_layers > 1: cells = [cell_fw] for layer_i in range(1, n_layers): with tf.variable_scope('{}'.format(layer_i)): cell_fw = rnn.LayerNormBasicLSTMCell( num_units=n_neurons, dropout_keep_prob=keep_prob) cells.append(cell_fw) cell_fw = rnn.MultiRNNCell(cells) return cell_fw
def _create_rnn_cell(n_neurons, n_layers, keep_prob): cell_fw = rnn.LayerNormBasicLSTMCell( num_units=n_neurons, dropout_keep_prob=keep_prob) # Build deeper recurrent net if using more than 1 layer if n_layers > 1: cells = [cell_fw] for layer_i in range(1, n_layers): with tf.variable_scope('{}'.format(layer_i)): cell_fw = rnn.LayerNormBasicLSTMCell( num_units=n_neurons, dropout_keep_prob=keep_prob) cells.append(cell_fw) cell_fw = rnn.MultiRNNCell(cells) return cell_fw
def create_cell(device): if rnn_type == "GRU": cell = rnn.GRUCell(rnn_size) elif rnn_type == "LSTM": if 'reuse' in inspect.signature(tf.contrib.rnn.BasicLSTMCell.__init__).parameters: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0, reuse=tf.get_variable_scope().reuse) else: cell = rnn.LayerNormBasicLSTMCell(rnn_size, forget_bias=0.0) elif rnn_type == "RWA": cell = RWACell(rnn_size) elif rnn_type == "RAN": cell = RANCell(rnn_size, normalize=self.is_training) cell = SwitchableDropoutWrapper(rnn.DeviceWrapper(cell, device="/gpu:{}".format(device)), is_train=self.is_training) return cell
def RNN(self, x, drop, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, timesteps, n_input) # Required shape: 'timesteps' tensors list of shape (batch_size, n_input) # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input) # here we use static_rnn which requires inputs to be a list of tensors x = tf.unstack(x, self.timesteps, 1) # For dynamic rnn, the require for input data should be in the shape of # (batch_size, timesteps, n_input) if time_major is False # Define a lstm cell with tensorflow # lstm_cell = rnn.LSTMCell(self.num_hidden, use_peepholes=True) # lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=(1 - drop)) # # lstm_cell = cudnn_rnn.CudnnLSTM(self.num_layers, self.num_hidden, # dropout=drop) lstm_cell = rnn.LayerNormBasicLSTMCell(self.num_hidden, forget_bias=0.5, norm_gain=1.0, norm_shift=0.0, dropout_keep_prob=(1 - drop)) # Get lstm cell output outputs, states = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32) # outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights["out"]) + biases["out"]
def create_lstm_cell(layer): if hyperparameters.layer_norm: if hyperparameters.num_proj: raise Exception( 'No support for layer normalization together with projection layer.' ) cell = rnn.LayerNormBasicLSTMCell( hyperparameters.lstm_state_size, # here, we use the local variable dropout that is set to 0 # if we are evaluating. dropout_keep_prob=1 - dropout, layer_norm=hyperparameters.layer_norm) else: if hyperparameters.num_proj: cell = rnn.LSTMCell(hyperparameters.lstm_state_size, num_proj=hyperparameters.num_proj) else: cell = rnn.LSTMBlockCell(hyperparameters.lstm_state_size, forget_bias=0) if dropout > 0: cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - dropout) return cell
def _single_cell(unit_type, num_units, forget_bias, dropout, mode, residual_connection=False, device_str=None, residual_fn=None): """ 创建一个RNN单元。 :param unit_type: RNN类型 :param num_units: 隐层神经元个数 :param forget_bias: 遗忘门偏置 :param dropout: dropout比例 :param mode: 训练模式(只有train模式下才设置dropout) :param residual_connection: 是否使用残差连接 :param device_str: 设备 :param residual_fn: 残差方法 :return: """ # dropout (= 1 - keep_prob) is set to 0 during eval and infer dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0 # Cell Type if unit_type == "lstm": print(" LSTM, forget_bias=%g" % forget_bias, end='') single_cell = rnn.BasicLSTMCell(num_units, forget_bias=forget_bias) elif unit_type == "gru": print(" GRU", end='') single_cell = rnn.GRUCell(num_units) elif unit_type == "layer_norm_lstm": print(" Layer Normalized LSTM, forget_bias=%g" % forget_bias, end='') single_cell = rnn.LayerNormBasicLSTMCell(num_units, forget_bias=forget_bias, layer_norm=True) elif unit_type == "nas": print(" NASCell", end='') single_cell = rnn.NASCell(num_units) else: raise ValueError("Unknown unit type %s!" % unit_type) # Dropout (= 1 - keep_prob) if dropout > 0.0: single_cell = rnn.DropoutWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout)) print(" %s, dropout=%g " % (type(single_cell).__name__, dropout), end='') # Residual if residual_connection: single_cell = rnn.ResidualWrapper(single_cell, residual_fn=residual_fn) print(" %s" % type(single_cell).__name__, end='') # Device Wrapper if device_str: single_cell = rnn.DeviceWrapper(single_cell, device_str) print(" %s, device=%s" % (type(single_cell).__name__, device_str), end='') return single_cell
def _make_rnn_model(self): """Make rnn model.""" self.y = tf.cast(self.x[:, 1:], dtype=tf.int64) self.y_emb = tf.one_hot(self.y, depth=self._params.emb_size) tf.logging.info('y.shape=%s', self.y.shape) lstm_fw_cell_g = contrib_rnn.LayerNormBasicLSTMCell( self._params.hidden_lstm_size, layer_norm=self._params.norm_lstm, dropout_keep_prob=1 - self.dropout_rate) lstm_hidden, _ = tf.nn.dynamic_rnn(lstm_fw_cell_g, self.x_emb, dtype=tf.float32) # stagger two directional vectors so that the backward RNN does not reveal # medium.com/@plusepsilon/the-bidirectional-language-model-1f3961d1fb27 self.logits = tf.layers.dense(lstm_hidden[:, :-1, :], units=self._params.vocab_size, activation=None, name='logits') tf.logging.info('shape of logits=%s', self.logits.shape) # cross entropy self.loss_i_t = tf.nn.softmax_cross_entropy_with_logits( labels=self.y_emb, logits=self.logits) self.loss_i = tf.reduce_mean(self.loss_i_t, axis=1)
def create_cell(num_units, reuse=False, layer_norm=False, cell="gru", scope="rnn"): with tf.variable_scope(scope): if cell == "gru": if layer_norm: from neural_toolbox.LayerNormBasicGRUCell import LayerNormBasicGRUCell rnn_cell = LayerNormBasicGRUCell(num_units=num_units, layer_norm=layer_norm, activation=tf.nn.tanh, reuse=reuse) else: rnn_cell = tfc_rnn.GRUCell(num_units=num_units, activation=tf.nn.tanh, reuse=reuse) elif cell == "lstm": rnn_cell = tfc_rnn.LayerNormBasicLSTMCell(num_units=num_units, layer_norm=layer_norm, activation=tf.nn.tanh, reuse=reuse) else: assert False, "Invalid RNN cell" return rnn_cell
def __init__(self, word_embeddings, params): self._word_embeddings = word_embeddings self._params = params self._embedding_size = int(self._word_embeddings._dimension_size) # self._lstm_cell = rnn.BasicLSTMCell(num_units=self._embedding_size, reuse=tf.AUTO_REUSE) self._lstm_cell = rnn.LayerNormBasicLSTMCell(num_units=self._embedding_size, reuse=tf.AUTO_REUSE) self._cause_word_table = tf.constant(params['cause_word_table'], name='cause_word_table') self._cause_word_table_length = tf.constant(params['cause_word_table_length'], name='cause_word_table_length')
def __define_network(self): # encoder with tf.variable_scope('encoder'): lstm_encoder = tfrnn.LayerNormBasicLSTMCell(self.num_units, layer_norm=False) initial_state = lstm_encoder.zero_state(batch_size=self.batch_size, dtype=tf.float32) _, encoder_state = tf.nn.dynamic_rnn(cell=lstm_encoder, inputs=self.input, initial_state=initial_state) # decoder with tf.variable_scope('decoder'): reversed_input = tf.reverse(self.input, axis=[1]) reversed_padded_input = tf.pad(reversed_input, paddings=[[0, 0], [1, 0], [0, 0]]) lstm_decoder = tfrnn.LayerNormBasicLSTMCell(self.num_units, layer_norm=False) decoder_outputs, decoder_state = tf.nn.dynamic_rnn( cell=lstm_decoder, inputs=reversed_padded_input[:, :-1, :], initial_state=encoder_state) # output layer with tf.variable_scope('output'): decoder_outputs = tf.reshape(decoder_outputs, [-1, self.num_units]) output = fc_layer(self.input_size, activation=None, input=decoder_outputs) # loss self.point_reconstruction_error = tf.squared_difference( tf.reshape(reversed_input, [-1, self.input_size]), output) self.point_reconstruction_error = tf.reshape( self.point_reconstruction_error, [self.batch_size, -1, self.input_size]) self.point_reconstruction_error = tf.reduce_sum( self.point_reconstruction_error, axis=-1) self.point_reconstruction_error = tf.reverse( self.point_reconstruction_error, axis=[1]) self.loss = tf.reduce_mean(self.point_reconstruction_error)
def decoder(x, decoder_inputs, keep_prob, sequence_length, memory, memory_length, first_attention): with tf.variable_scope("Decoder") as scope: label_embeddings = tf.get_variable(name="embeddings", shape=[n_classes, embedding_size], dtype=tf.float32) train_inputs_embedded = tf.nn.embedding_lookup(label_embeddings, decoder_inputs) lstm = rnn.LayerNormBasicLSTMCell(n_hidden, dropout_keep_prob=keep_prob) output_l = layers_core.Dense(n_classes, use_bias=True) encoder_state = rnn.LSTMStateTuple(x, x) attention_mechanism = BahdanauAttention( embedding_size, memory=memory, memory_sequence_length=memory_length) cell = AttentionWrapper(lstm, attention_mechanism, output_attention=False) cell_state = cell.zero_state(dtype=tf.float32, batch_size=train_batch_size) cell_state = cell_state.clone(cell_state=encoder_state, attention=first_attention) train_helper = TrainingHelper(train_inputs_embedded, sequence_length) train_decoder = BasicDecoder(cell, train_helper, cell_state, output_layer=output_l) decoder_outputs_train, decoder_state_train, decoder_seq_train = dynamic_decode( train_decoder, impute_finished=True) tiled_inputs = tile_batch(memory, multiplier=beam_width) tiled_sequence_length = tile_batch(memory_length, multiplier=beam_width) tiled_first_attention = tile_batch(first_attention, multiplier=beam_width) attention_mechanism = BahdanauAttention( embedding_size, memory=tiled_inputs, memory_sequence_length=tiled_sequence_length) x2 = tile_batch(x, beam_width) encoder_state2 = rnn.LSTMStateTuple(x2, x2) cell = AttentionWrapper(lstm, attention_mechanism, output_attention=False) cell_state = cell.zero_state(dtype=tf.float32, batch_size=test_batch_size * beam_width) cell_state = cell_state.clone(cell_state=encoder_state2, attention=tiled_first_attention) infer_decoder = BeamSearchDecoder(cell, embedding=label_embeddings, start_tokens=[GO] * test_len, end_token=EOS, initial_state=cell_state, beam_width=beam_width, output_layer=output_l) decoder_outputs_infer, decoder_state_infer, decoder_seq_infer = dynamic_decode( infer_decoder, maximum_iterations=4) return decoder_outputs_train, decoder_outputs_infer, decoder_state_infer
def __init__(self, config, scope): self.scope = scope self.config = config max_seq_length = config.max_seq_length self.global_step = tf.get_variable('global_step', shape=[], dtype='int32', initializer=tf.constant_initializer(0), trainable=False) self.x = tf.placeholder(tf.int32, [None, config.max_docs_length], name="x") # [batch_size, max_doc_len] self.x_mask = tf.placeholder(tf.int32, [None, config.max_docs_length], name="x_mask") # [batch_size, max_doc_len] if config.model_name.endswith("flat"): self.y = tf.placeholder(tf.int32, [None, config.n_classes], name="y") else: self.y = tf.placeholder(tf.int32, [None, config.max_seq_length], name="y") print("y", self.y.get_shape()) self.y_mask = tf.placeholder(tf.int32, [None, max_seq_length], name="y_mask") self.y_decoder = tf.placeholder(tf.int32, [None, max_seq_length], name="y-decoder") self.x_seq_length = tf.placeholder(tf.int32, [None], name="x_seq_length") self.y_seq_length = tf.placeholder(tf.int32, [None], name="y_seq_length") self.keep_prob = tf.placeholder(tf.float32, name="keep_prob") self.output_l = layers_core.Dense(config.n_classes, use_bias=True) if config.model_name == "hclf_baseline": config.decode_size = config.hidden_size else: config.decode_size = 2*config.hidden_size if config.project: initializer=tf.random_normal_initializer(stddev=0.1) self.W_projection = tf.get_variable('W_projection', shape = [config.hidden_size*2 + config.word_embedding_size, config.decode_size], initializer = initializer) self.b_projection = tf.get_variable('bias', shape = [config.decode_size]) if config.concat_w2v and not config.project: config.decode_size = 2*config.hidden_size + config.word_embedding_size self.lstm = rnn.LayerNormBasicLSTMCell(config.decode_size, dropout_keep_prob=config.keep_prob) # lstm for decode self.encode_lstm = rnn.LayerNormBasicLSTMCell(config.hidden_size, dropout_keep_prob=config.keep_prob) # lstm for encode # TODO config.emb_mat # self.word_embeddings = tf.constant(config.emb_mat, dtype=tf.float32, name="word_embeddings") self.word_embeddings = tf.get_variable("word_embeddings", dtype='float', shape=[config.vocab_size, config.word_embedding_size], initializer=get_initializer(config.emb_mat)) self.label_embeddings = tf.get_variable(name="label_embeddings", shape=[config.n_classes, config.label_embedding_size], dtype=tf.float32) self.xx = tf.nn.embedding_lookup(self.word_embeddings, self.x) # [None, DL, d] self.yy = tf.nn.embedding_lookup(self.label_embeddings, self.y_decoder) # [None, seq_l, d] self._build_encode(config) self._build_train(config) if not config.model_name.endswith("flat"): self._build_infer(config) self._build_loss(config) #self.infer_set = set() self.summary = tf.summary.merge_all() self.summary = tf.summary.merge(tf.get_collection("summaries", scope=self.scope))
def inference(self, reuse=None): # initialization raw_inputs = self.inputs batch_size = self.batch_size keep_prob = self.keep_probability is_training = self.is_training tf.set_random_seed(SEED) # initialize the random seed at graph level lstm_cell = rnn.LayerNormBasicLSTMCell(lstm_cell_size, dropout_keep_prob=keep_prob, reuse=reuse, dropout_prob_seed=SEED) initial_state = lstm_cell.zero_state(batch_size, tf.float32) init_sw = tf.ones([batch_size, int(bdnn_winlen)]) * 0 # start sign self.mean_bps.append(init_sw) init_sw = tf.cast(tf.greater(init_sw, 0.4), tf.float32) self.sampled_bps.append(init_sw) reuse_recurrent = None init_glimpse = self.get_glimpse(raw_inputs, init_sw, reuse=reuse_recurrent) # (batch_size, glimpse_out) inputs = [0] * nGlimpses outputs = [0] * nGlimpses glimpse = init_glimpse for time_step in range(nGlimpses): if time_step == 0: with tf.variable_scope("core_network", reuse=reuse_recurrent): (cell_output, cell_state) = lstm_cell(glimpse, initial_state) self.cell_outputs.append(initial_state) else: reuse_recurrent = True with tf.variable_scope("core_network", reuse=reuse_recurrent): (cell_output, cell_state) = lstm_cell(glimpse, cell_state) inputs[time_step] = glimpse outputs[time_step] = cell_output if time_step != nGlimpses - 1: # not final time_step glimpse = self.get_next_input(cell_output, reuse=reuse_recurrent) else: # final time_step with tf.variable_scope("baseline", reuse=reuse_recurrent): baseline = tf.sigmoid(affine_transform(((cell_output)), 1, name='baseline')) self.baselines.append(baseline) return outputs
def BiRNN(self, x, weight, bias, initializer, activation): x = tf.unstack(tf.reshape( x, [tf.shape(x)[0], self.STEPS, int(self.X_DIM / self.STEPS)]), self.STEPS, axis=1) # lstm_fw_cell = rnn.CoupledInputForgetGateLSTMCell(self.CELL_SIZE, # forget_bias=10.0, # use_peepholes = True, # proj_clip = 15.0, # initializer = initializer, # activation = activation) # lstm_bw_cell = rnn.CoupledInputForgetGateLSTMCell(self.CELL_SIZE, # forget_bias=10.0, # use_peepholes = True, # proj_clip = 15.0, # initializer = initializer, # activation = activation) lstm_fw_cell = rnn.LayerNormBasicLSTMCell(self.CELL_SIZE, forget_bias=10.0, dropout_keep_prob=self.prob, activation=activation) lstm_bw_cell = rnn.LayerNormBasicLSTMCell(self.CELL_SIZE, forget_bias=10.0, dropout_keep_prob=self.prob, activation=activation) # lstm_fw_cell = rnn.GRUCell(self.CELL_SIZE, activation = tf.nn.relu) # lstm_bw_cell = rnn.GRUCell(self.CELL_SIZE, activation = tf.nn.relu) outputs, _, _ = tf.nn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) #out = tf.layers.batch_normalization(outputs[-1]) return tf.matmul(outputs[-1], weight) + bias, outputs
def create_model(self, x, dropout=1, predict=False): if predict==False: self.create_vars() lstm_cell = [rnn.LayerNormBasicLSTMCell(self.rnn_size, activation=tf.nn.relu,\ reuse=tf.AUTO_REUSE, dropout_keep_prob=dropout) for _ in range(self.num_layer)] lstm_cell = rnn.MultiRNNCell(lstm_cell) outputs, states = tf.nn.dynamic_rnn(lstm_cell, x, dtype=tf.float32) #output shape is (train size, seq len, n classsses) #unstack turns it into list with length seq len, each with shape (train size, n classes) outputs = tf.unstack(outputs, self.seq_len, 1) return tf.matmul(outputs[-1], self.W) + self.b
def create_cell(): if self.config.RNN_CELL == 'lnlstm': cell = rnn.LayerNormBasicLSTMCell(self.config.ENC_RNN_SIZE) elif self.config.RNN_CELL == 'lstm': cell = rnn.BasicLSTMCell(self.config.ENC_RNN_SIZE) elif self.config.RNN_CELL == 'gru': cell = rnn.GRUCell(self.config.ENC_RNN_SIZE) else: logger.error('rnn_cell {} not supported'.format(self.config.RNN_CELL)) if self.is_training: cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=self.config.DROPOUT_KEEP) return cell
def lstm_cell(self, hparams, train): keep_prob = 1.0 - hparams.rec_dropout * tf.to_float(train) recurrent_dropout_cell = contrib_rnn.LayerNormBasicLSTMCell( hparams.hidden_size, layer_norm=hparams.layer_norm, dropout_keep_prob=keep_prob) if hparams.ff_dropout: return contrib_rnn.DropoutWrapper(recurrent_dropout_cell, input_keep_prob=keep_prob) return recurrent_dropout_cell
def __init__(self, ob_space, ac_space, lstm_size=256, **kwargs): self.x = x = tf.placeholder(tf.float32, [None] + list(ob_space)) rank = len(ob_space) if rank == 3: # pixel input for i in range(4): x = tf.nn.elu( conv2d(x, 32, "l{}".format(i + 1), [3, 3], [2, 2])) elif rank == 1: # plain features #x = tf.nn.elu(linear(x, 256, "l1", normalized_columns_initializer(0.01))) pass else: raise TypeError("observation space must have rank 1 or 3, got %d" % rank) # introduce a "fake" batch dimension of 1 after flatten so that we can do LSTM over time dim x = tf.expand_dims(flatten(x), [0]) size = lstm_size lnlstm = rnn.LayerNormBasicLSTMCell(size) self.state_size = lnlstm.state_size step_size = tf.shape(self.x)[:1] c_init = np.zeros((1, lnlstm.state_size.c), np.float32) h_init = np.zeros((1, lnlstm.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lnlstm.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lnlstm.state_size.h]) self.state_in = [c_in, h_in] state_in = rnn.LSTMStateTuple(c_in, h_in) lstm_outputs, lstm_state = tf.nn.dynamic_rnn(lnlstm, x, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state x = tf.reshape(lstm_outputs, [-1, size]) self.logits = linear(x, ac_space, "action", normalized_columns_initializer(0.01)) self.vf = tf.reshape( linear(x, 1, "value", normalized_columns_initializer(1.0)), [-1]) self.state_out = [lstm_c[:1, :], lstm_h[:1, :]] self.sample = categorical_sample(self.logits, ac_space)[0, :] self.var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name)
def _which_cell(self): """ RNN 类型 :return: """ cell_tmp = None if self.cell_type == 'lstm': cell_tmp = rnn.LayerNormBasicLSTMCell( self.hidden_unit, dropout_keep_prob=self.dropout_rate) #cell_tmp = rnn.BasicLSTMCell(self.hidden_unit) elif self.cell_type == 'gru': cell_tmp = rnn.GRUCell(self.hidden_unit) # 是否需要进行dropout if self.dropout_rate is not None: cell_tmp = rnn.DropoutWrapper(cell_tmp, output_keep_prob=self.dropout_rate) return cell_tmp
def _single_cell(unit_type, num_units, forget_bias, dropout, mode, residual_connection=False, residual_fn=None, trainable=True): """Create an instance of a single RNN cell.""" # dropout (= 1 - keep_prob) is set to 0 during eval and infer dropout = dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0 # Cell Type if unit_type == "lstm": single_cell = contrib_rnn.LSTMCell(num_units, forget_bias=forget_bias, trainable=trainable) elif unit_type == "gru": single_cell = contrib_rnn.GRUCell(num_units, trainable=trainable) elif unit_type == "layer_norm_lstm": single_cell = contrib_rnn.LayerNormBasicLSTMCell( num_units, forget_bias=forget_bias, layer_norm=True, trainable=trainable) elif unit_type == "nas": single_cell = contrib_rnn.NASCell(num_units, trainable=trainable) else: raise ValueError("Unknown unit type %s!" % unit_type) # Dropout (= 1 - keep_prob). if dropout > 0.0: single_cell = contrib_rnn.DropoutWrapper(cell=single_cell, input_keep_prob=(1.0 - dropout)) # Residual. if residual_connection: single_cell = contrib_rnn.ResidualWrapper(single_cell, residual_fn=residual_fn) return single_cell
def cell(dims): '''Creates RNN/GRU/LSTM cell to use in multiple layers.''' # RNN if hps.model_type == "BIRNN": cell = rnn.BasicRNNCell(dims) # LSTM elif hps.model_type == "BILSTM": cell = rnn.LSTMCell(dims, state_is_tuple=True) # LSTM with peepholes elif hps.model_type == "BILSTMP": cell = rnn.LSTMCell(dims, state_is_tuple=True, use_peepholes=True) # GRU elif hps.model_type == "BIGRU": cell = rnn.GRUCell(dims) # Update Gate RNN elif hps.model_type == "BIUGRNN": cell = rnn.UGRNNCell(dims) # Bi-directional Layer Norm LSTM elif hps.model_type == "BILNLSTM": cell = rnn.LayerNormBasicLSTMCell(dims) return rnn.DropoutWrapper(cell, output_keep_prob=self.dropout)
def DilatedLSTM(s_t, size, state_in, idx_in, chunks=8, norm=False, dropout=1.0): # lstm = rnn.LSTMCell(size, state_is_tuple=True) lstm = rnn.LayerNormBasicLSTMCell(size, layer_norm=norm, activation=tf.nn.relu, dropout_keep_prob=dropout) c_init = np.zeros((chunks, lstm.state_size.c), np.float32) h_init = np.zeros((chunks, lstm.state_size.h), np.float32) state_init = [c_init, h_init] def dlstm_scan_fn(previous_output, current_input): i = previous_output[2] c = previous_output[1][0] h = previous_output[1][1] # old_state = [tf.expand_dims(c[i],[0]),tf.expand_dims(h[i],[0])] old_state = rnn.LSTMStateTuple(tf.expand_dims(c[i], [0]), tf.expand_dims(h[i], [0])) out, state_out = lstm(current_input, old_state) c = tf.stop_gradient(c) h = tf.stop_gradient(h) co = state_out[0] ho = state_out[1] col = tf.expand_dims(tf.one_hot(i, chunks), [1]) new_mask = col for _ in range(size - 1): new_mask = tf.concat([new_mask, col], axis=1) old_mask = 1 - new_mask c_out = c * old_mask + co * new_mask h_out = h * old_mask + ho * new_mask state_out = [c_out, h_out] i += tf.constant(1) new_i = tf.mod(i, chunks) out = tf.reduce_mean(h_out, axis=0) return out, state_out, new_i rnn_outputs, final_states, out_idx = tf.scan(dlstm_scan_fn, tf.transpose(s_t, [1, 0, 2]), initializer=(state_in[1][0], state_in, idx_in)) state_out = [final_states[0][0, :, :], final_states[1][0, :, :]] return rnn_outputs, state_init, state_out, out_idx[-1]
def _match_model_fn_v6(features, labels, mode, params): ''' this version uses origianl seq2seq, but uses a lstm merges the cause and word embedding_tabel and this version use the input embedding as the attention query ''' # print('aaa') '''set parameters''' with tf.device('/gpu:0'), tf.variable_scope('model', reuse=tf.AUTO_REUSE) as scope: # set hyper parameters embedding_size = params['embedding_size'] num_units = params['num_units'] if mode == tf.estimator.ModeKeys.TRAIN: dropout_keep_prob = params['dropout_keep_prob'] else: dropout_keep_prob = 1 beam_width = params['beam_width'] EOS = params['EOS'] SOS = params['SOS'] # set training parameters max_sequence_length = params['max_sequence_length'] max_cause_length = params['max_cause_length'] vocab_size = params['vocab_size'] num_causes = EOS + 1 '''process input and target''' # input layer input = tf.reshape(features['content'], [-1, max_sequence_length]) batch_size = tf.shape(input)[0] input_length = tf.reshape(features['content_length'], [batch_size]) cause_label = tf.reshape(labels['cause_label'], [batch_size, max_cause_length]) cause_length = tf.reshape(labels['cause_length'], [batch_size]) # necessary cast input = tf.cast(input, dtype=tf.int32) input_length = tf.cast(input_length, dtype=tf.int32) cause_label = tf.cast(cause_label, dtype=tf.int32) cause_length = tf.cast(cause_length, dtype=tf.int32) # word embedding layer embeddings_word = load_embedding(params['word2vec_model'], vocab_size, embedding_size) embedded_input = gen_array_ops.gather_v2(embeddings_word, input, axis=0) # cause-label embedding layer cause_encoder = CauseEncoder(word_embeddings=embeddings_word, params=params) embedded_cause = cause_encoder.apply(cause_label) # cause lookpu_table cause_table = tf.constant(params['cause_table'], dtype=tf.int32) encoder_output = encoders(embedded_input, input_length, params, mode) '''hierarchical multilabel decoder''' # build lstm cell with attention lstm = rnn.LayerNormBasicLSTMCell(num_units=num_units, reuse=tf.AUTO_REUSE, dropout_keep_prob=dropout_keep_prob) # lstm = rnn.DropoutWrapper(lstm, output_keep_prob=dropout_keep_prob) # the subtraction at the end of the line is a ele-wise subtraction supported by tensorflow attention_mechanism = MyBahdanauAttention( num_units=embedding_size, memory=encoder_output.attention_values, memory_sequence_length=encoder_output.attention_values_length) initial_state = rnn.LSTMStateTuple(encoder_output.initial_state, encoder_output.initial_state) cell = MyAttentionWrapper_v2(lstm, attention_mechanism, sot=SOS, output_attention=False, name="MyAttentionWrapper") cell_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size) cell_state = cell_state.clone(cell_state=initial_state, attention=encoder_output.final_state) # extra dense layer to project a rnn output into a classification project_dense = Dense(num_causes, _reuse=tf.AUTO_REUSE, _scope='project_dense_scope', name='project_dense') # train_decoder train_helper = MyTrainingHelper(embedded_cause, cause_label, cause_length) train_decoder = MyBasicDecoder(cell, train_helper, cell_state, lookup_table=cause_table, output_layer=project_dense, hie=params['hie']) decoder_output_train, decoder_state_train, decoder_len_train = dynamic_decode( train_decoder, maximum_iterations=max_cause_length - 1, parallel_iterations=64, scope='decoder') # beam_width = 1 tiled_memory_sequence_length = tile_batch( encoder_output.attention_values_length, multiplier=beam_width) tiled_memory = tile_batch(encoder_output.attention_values, multiplier=beam_width) tiled_encoder_output_initital_state = tile_batch( encoder_output.initial_state, multiplier=beam_width) tiled_initial_state = rnn.LSTMStateTuple( tiled_encoder_output_initital_state, tiled_encoder_output_initital_state) tiled_first_attention = tile_batch(encoder_output.final_state, multiplier=beam_width) attention_mechanism = MyBahdanauAttention( num_units=embedding_size, memory=tiled_memory, memory_sequence_length=tiled_memory_sequence_length) cell = MyAttentionWrapper_v2(lstm, attention_mechanism, sot=SOS, output_attention=False, name="MyAttentionWrapper") cell_state = cell.zero_state(dtype=tf.float32, batch_size=batch_size * beam_width) cell_state = cell_state.clone(cell_state=tiled_initial_state, attention=tiled_first_attention) infer_decoder = MyBeamSearchDecoder(cell, embedding=cause_encoder, sots=tf.fill([batch_size], SOS), start_tokens=tf.fill([batch_size], SOS), end_token=EOS, initial_state=cell_state, beam_width=beam_width, output_layer=project_dense, lookup_table=cause_table, length_penalty_weight=0.7, hie=params['hie']) cause_output_infer, cause_state_infer, cause_length_infer = dynamic_decode( infer_decoder, parallel_iterations=64, maximum_iterations=max_cause_length - 1, scope='decoder') # loss mask_for_cause = tf.sequence_mask(cause_length - 1, max_cause_length - 1, dtype=tf.float32) # loss = sequence_loss(logits=padded_train_output, targets=cause_label, weights=mask_for_cause, name='loss') tmp_padding = tf.pad(decoder_output_train.rnn_output, [[0, 0], [ 0, max_cause_length - 1 - tf.shape(decoder_output_train.rnn_output)[1] ], [0, 0]], constant_values=0) loss = _compute_loss(tmp_padding, cause_label, mask_for_cause, batch_size) # predicted_ids: [batch_size, max_cause_length, beam_width] predicted_and_cause_ids = tf.transpose( cause_output_infer.predicted_ids, perm=[0, 2, 1], name='predicted_cause_ids') # for monitoring cause_label_expanded = tf.reshape(cause_label[:, 1:], [-1, 1, max_cause_length - 1]) predicted_and_cause_ids = tf.pad( predicted_and_cause_ids, [[0, 0], [0, 0], [0, max_cause_length - 1 - tf.shape(predicted_and_cause_ids)[2]]], constant_values=EOS) predicted_and_cause_ids = tf.concat( [predicted_and_cause_ids, cause_label_expanded], axis=1, name='predicted_and_cause_ids') predicted_and_cause_ids = tf.reshape( predicted_and_cause_ids, [-1, beam_width + 1, max_cause_length - 1]) predicted_and_cause_ids_train = tf.concat( [decoder_output_train.sample_id, cause_label[:, 1:]], axis=1, name='predicted_and_cause_ids_train') predictions = { 'predicted_and_cause_ids': predicted_and_cause_ids, } if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) if mode == tf.estimator.ModeKeys.TRAIN: # warm_up_constant = params['warm_up_steps'] ** (-1.5) # embedding_constant = embedding_size ** (-0.5) # global_step = tf.to_float(tf.train.get_global_step()) # learning_rate = tf.minimum(1 / tf.sqrt(global_step), # warm_up_constant * global_step) * embedding_constant # optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-9) optimizer = tf.train.AdamOptimizer() # # train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step()) # '''using gradient clipping''' # loss = tf.Print(loss, [loss, 'to be clear, this is the loss']) grads_and_vars = optimizer.compute_gradients(loss) clipped_gvs = [ ele if ele[0] is None else (tf.clip_by_value(ele[0], -0.1, 0.1), ele[1]) for ele in grads_and_vars ] train_op = optimizer.apply_gradients( clipped_gvs, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) # predicted_cause_ids shape = [batch_size, cause_length] # cause_label = [batch_size, cause_length] # select the predicted cause with the highest possibility # todo: evalutaion # bi_predicted_cause_ids = binarizer(predicted_cause_ids[:, 0, :], num_causes) # bi_cause_label = binarizer(cause_label, num_causes) # todo: now I have to leave the evaluation work be done outside the estimator eval_metric_ops = { 'predicted_and_cause_ids': tf.contrib.metrics.streaming_concat(predicted_and_cause_ids), # 'precision': tf.metrics.precision(bi_cause_label, bi_predicted_cause_ids), # 'recall': tf.metrics.recall(bi_cause_label, bi_predicted_cause_ids), # 'f1-score': f_score(bi_cause_label, bi_predicted_cause_ids), } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
keep_prob = tf.placeholder(tf.float32) xx = tf.nn.embedding_lookup(word_embeddings, x) # [None, DL, d] yy = tf.nn.embedding_lookup(label_embeddings, y_decoder) # [None, seq_l, d] # encode here ''' lstm = rnn.LayerNormBasicLSTMCell(hidden_size/2, dropout_keep_prob=keep_prob) outputs, output_states = tf.nn.bidirectional_dynamic_rnn(lstm, lstm, xx, dtype='float', sequence_length=x_seq_length) xx_context = tf.concat(outputs, 2) # [None, DL, 2*hd] xx_final = tf.concat(output_states, 1) # [None, 2*hd] x_mask = tf.cast(x_mask, "float") first_attention = tf.reduce_mean(xx_context, 1) # [None, 2*hd] ''' lstm = rnn.LayerNormBasicLSTMCell(hidden_size, dropout_keep_prob=keep_prob) outputs, output_states = tf.nn.dynamic_rnn(lstm, xx, dtype='float', sequence_length=x_seq_length) xx_context = outputs # tf.concat(outputs, 2) # [None, DL, 2*hd] xx_final = output_states[0] # tf.concat(output_states, 1) # [None, 2*hd] x_mask = tf.cast(x_mask, "float") first_attention = tf.reduce_mean(xx_context, 1) # [None, 2*hd] # decode output_l = layers_core.Dense(n_classes, use_bias=True) encoder_state = rnn.LSTMStateTuple(xx_final, xx_final) attention_mechanism = BahdanauAttention(hidden_size, memory=xx_context, memory_sequence_length=x_seq_length)
def _rnn_cell(self): cell = rnn.LayerNormBasicLSTMCell(self.rnn_hidden_size, dropout_keep_prob=1 - self.rnn_dropout_rate) return cell
#--------------------------------------Define Graph---------------------------------------------------# graph = tf.Graph() with graph.as_default(): #------------------------------------construct LSTM------------------------------------------# #place hoder X_p = tf.placeholder(dtype=tf.float32, shape=(None, TIME_STEPS, 28), name="input_placeholder") y_p = tf.placeholder(dtype=tf.float32, shape=(None, 10), name="pred_placeholder") #gru instance ln_forward_1 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS1) ln_forward_2 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS) ln_forward = rnn.MultiRNNCell(cells=[ln_forward_1, ln_forward_2]) ln_backward_1 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS1) ln_backward_2 = rnn.LayerNormBasicLSTMCell(num_units=HIDDEN_UNITS) ln_backward = rnn.MultiRNNCell(cells=[ln_backward_1, ln_backward_2]) outputs, states = tf.nn.bidirectional_dynamic_rnn(cell_fw=ln_forward, cell_bw=ln_backward, inputs=X_p, dtype=tf.float32) outputs_fw = outputs[0] outputs_bw = outputs[1] h = outputs_fw[:, -1, :] + outputs_bw[:, -1, :]
def get_rnn_cell(self): return rnn.DropoutWrapper(rnn.LayerNormBasicLSTMCell(self.hidden_dim), input_keep_prob=self.dropout_keep_prob_t, output_keep_prob=self.dropout_keep_prob_t)
def __init__(self, cause_table, initial_state, final_state, params, mode): super(LSTM_Attention, self).__init__(cause_table, initial_state, final_state, params, mode) self._lstm_cell = rnn.LayerNormBasicLSTMCell( num_units=params['num_units'], dropout_keep_prob=self._dropout_keep_prob)
def single_lnlstm(self, layer_size): lnlstm_cell = rnn.LayerNormBasicLSTMCell( layer_size, dropout_keep_prob=self.dropout_rate) return lnlstm_cell
def declare_model(self, skip_preprocess=False): if not skip_preprocess: assert self.data_prepared, "Preprocess your data first" # n classes (total number of unique characters) self.vocab_size = len(list(self.lexicon.keys())) # number of tweets to process self.num_tweets = self.twitter_data.Clean_Tweets.values.shape[0] self.X = tf.placeholder(tf.int32, [self.batch_size, self.fixed_tweet_size - 1], name='X') one_hot_encoded = tf.one_hot(self.X, self.vocab_size) self.y = tf.placeholder(tf.int32, [self.batch_size, self.fixed_tweet_size - 1], name='y') labels = tf.one_hot(self.y, self.vocab_size) self.testX = tf.placeholder(tf.int32, [1, None], name='testX') test_one_hot_encoded = tf.one_hot(self.testX, self.vocab_size) rnn_layers = [ rnn.LayerNormBasicLSTMCell(size, forget_bias=1.0) for size in self.model_shape ] self.multi_rnn_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers) self.lstm_init_value = self.multi_rnn_cell.zero_state( self.batch_size, tf.float32) self.test_lstm_init_value = self.multi_rnn_cell.zero_state( 1, tf.float32) self.outputs, self.states = tf.nn.dynamic_rnn( self.multi_rnn_cell, one_hot_encoded, initial_state=self.lstm_init_value, dtype=tf.float32) self.test_outputs, self.test_states = tf.nn.dynamic_rnn( self.multi_rnn_cell, test_one_hot_encoded, initial_state=self.test_lstm_init_value, dtype=tf.float32) self.flat_outputs = tf.reshape(self.outputs, [-1, self.model_shape[-1]]) self.test_flat_outputs = tf.reshape(self.test_outputs, [-1, self.model_shape[-1]]) self.logits = tf.layers.dense(self.flat_outputs, self.vocab_size, None, True, tf.orthogonal_initializer(), name='dense') self.test_logits = tf.layers.dense(self.test_flat_outputs, self.vocab_size, None, True, tf.orthogonal_initializer(), name='testdense') # might not reuse self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=tf.reshape(labels, [-1, self.vocab_size]))) self.optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize( self.loss)