def __init__(self, num_units, norm=True, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, forget_bias=1, activation=None, reuse=None): super(WeightNormLSTMCell, self).__init__(_reuse=reuse) self._scope = 'wn_lstm_cell' self._num_units = num_units self._norm = norm self._initializer = initializer self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._num_proj = num_proj self._proj_clip = proj_clip self._activation = activation or math_ops.tanh self._forget_bias = forget_bias self._weights_variable_name = "kernel" self._bias_variable_name = "bias" if num_proj: self._state_size = LSTMStateTuple(num_units, num_proj) self._output_size = num_proj else: self._state_size = LSTMStateTuple(num_units, num_units) self._output_size = num_units
def loop_fn(time, cell_output, cell_state, loop_state): #check whether is initial condition if cell_output is None: # time == 0 next_cell_state = cell.zero_state(batch_size, tf.float32) else: next_cell_state = cell_state #check whether finished elements_finished = (time >= tf.cast(sequence_length, tf.int32)) finished = tf.reduce_all(elements_finished) #read given inputs next_input = tf.cond( finished, lambda: tf.zeros( [batch_size, rnn_inputs.get_shape()[-1]], dtype=tf.float32), lambda: inputs_ta.read(time)) switch_embed_flag = tf.cast(tf.reduce_max(tf.abs(next_input), axis=1), tf.bool, name='switch_embed_flag') if cell_output is not None: # time > 0 if len(cell_state) > 1: #LSTM case tilde_cell_state = tf.where(switch_embed_flag, tf.zeros_like(next_input), next_cell_state[0]) tilde_cell_output = tf.where( tf.cast(switch_embed_flag, tf.bool), tf.zeros_like(next_cell_state[1]), next_cell_state[1]) next_cell_state = LSTMStateTuple(tilde_cell_state, tilde_cell_output) else: #GRU case next_cell_state = tf.where(switch_embed_flag, tf.zeros_like(next_input), next_cell_state) #generate reconstruted features if cell_output == None: next_cell_state = LSTMStateTuple(next_input, next_cell_state[1]) with tf.variable_scope('linear_transform'): w_o = tf.get_variable('weights', [cell.output_size, 39],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) b_o = tf.get_variable('bias', [39],\ initializer=tf.constant_initializer(0.1)) else: with tf.variable_scope('linear_transform', reuse=True): w_o = tf.get_variable('weights', [cell.output_size, 39],\ initializer=tf.truncated_normal_initializer(stddev=0.1)) b_o = tf.get_variable('bias', [39],\ initializer=tf.constant_initializer(0.1)) emit_output = cell_output if cell_output == None: # time == 0 next_loop_state = loop_state_ta else: y = tf.add(tf.matmul(cell_output, w_o), b_o, name='reconstruction') next_loop_state = loop_state.write(time - 1, y) #next_input = tf.zeros_like(next_input) return (elements_finished, next_input, next_cell_state, emit_output, next_loop_state)
def call(self, inputs, state): output, new_state = self._cell(inputs, state) if isinstance(self.state_size, LSTMStateTuple): c, h = state new_c, new_h = new_state zoneout_prob_c, zoneout_prob_h = self._zoneout_prob if self.is_training: # Rescales the output of dropout (tf.nn.dropout scales it's output # by a factor of 1 / keep_prob). new_c = (1 - zoneout_prob_c) * tf.nn.dropout( new_c - c, (1 - zoneout_prob_c)) + c new_h = (1 - zoneout_prob_h) * tf.nn.dropout( new_h - h, (1 - zoneout_prob_h)) + h new_state = LSTMStateTuple(c=new_c, h=new_h) else: # Uses expectation at test time. new_c = zoneout_prob_c * c + (1 - zoneout_prob_c) * new_c new_h = zoneout_prob_h * h + (1 - zoneout_prob_h) * new_h new_state = LSTMStateTuple(c=new_c, h=new_h) return new_h, new_state else: if self.is_training: new_state = state + (1 - self._zoneout_prob) * tf.nn.dropout( new_state - state, (1 - self._zoneout_prob)) else: new_state = self._zoneout_prob * state + ( 1 - self._zoneout_prob) * new_state return new_state, new_state
def custom_rnn_seq2seq(encoder_inputs, decoder_inputs, enc_cell, dec_cell, dtype=dtypes.float32, initial_state=None, use_previous=False, scope=None, num_units=0): with variable_scope.variable_scope(scope or "custom_rnn_seq2seq"): _, enc_state = core_rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype, scope=scope, initial_state=initial_state) print(enc_state.get_shape) c = tf.tanh( tf.matmul(tf.get_variable("v", [dim_hidden, dim_hidden]), enc_state)) h_prime_init = tf.tanh( tf.matmul(tf.get_variable("v_prime", [dim_hidden, dim_hidden]), c)) if not use_previous: return seq2seq.rnn_decoder(decoder_inputs, LSTMStateTuple(c, h_prime_init), dec_cell, scope=scope) return infer(LSTMStateTuple(c, h_prime_init), dec_cell, num_units)
def _construct_decoder_initial_state(self): if self._video_output is not None: video_state = self._video_output.final_state else: zero_slice = [ tf.zeros(shape=tf.shape(self._audio_output.final_state[0].c), dtype=self._hparams.dtype) for _ in range(len(self._audio_output.final_state[0])) ] video_state = tuple([ LSTMStateTuple(c=zero_slice[0], h=zero_slice[1]) for _ in range(len(self._hparams.encoder_units_per_layer)) ]) if self._audio_output is not None: audio_state = self._audio_output.final_state else: zero_slice = [ tf.zeros(shape=tf.shape(self._video_output.final_state[0].c), dtype=self._hparams.dtype) for _ in range(len(self._video_output.final_state[0])) ] audio_state = tuple([ LSTMStateTuple(c=zero_slice[0], h=zero_slice[1]) for _ in range(len(self._hparams.encoder_units_per_layer)) ]) if type(video_state) == tuple: final_video_state = video_state[-1] else: final_video_state = video_state if type(audio_state) == tuple: final_audio_state = audio_state[-1] else: final_audio_state = audio_state state_tuple = ( final_video_state, final_audio_state, ) self._decoder_initial_state = _project_state_tuple( state_tuple, num_units=self._hparams.decoder_units_per_layer[0], cell_type=self._hparams.cell_type) dec_layers = len(self._hparams.decoder_units_per_layer) if dec_layers > 1: self._decoder_initial_state = [ self._decoder_initial_state, ] zero_state = self._decoder_cells.zero_state( self._batch_size, self._hparams.dtype) for j in range(dec_layers - 1): self._decoder_initial_state.append(zero_state[j + 1]) self._decoder_initial_state = tuple(self._decoder_initial_state)
def _skipEncoderOutput(self): with tf.variable_scope("Decoder") as scope: batch_size = self._hparams.batch_size[0 if self._mode == 'train' else 1] if 'skip' in self._cell_type: pre_decoder_cells, pre_initial_state = build_rnn_layers( cell_type=self._cell_type, num_units_per_layer=[ self._hparams.decoder_units_per_layer[-1] ], use_dropout=self._hparams.use_dropout, dropout_probability=self._hparams. decoder_dropout_probability, mode=self._mode, batch_size=batch_size, dtype=self._hparams.dtype, ) print('Decoder_pre_decoder_cells', pre_decoder_cells) print('Decoder_pre_initial_state', pre_initial_state) print('Decoder _encoder_output', self._encoder_output) pre_decoder_cells = MultiRNNCell([ pre_decoder_cells, ]) out = tf.nn.dynamic_rnn( cell=pre_decoder_cells, inputs=self._encoder_output.outputs, sequence_length=self._encoder_features_len, parallel_iterations=self._hparams. batch_size[0 if self._mode == 'train' else 1], swap_memory=False, dtype=self._hparams.dtype, initial_state=pre_initial_state, scope=scope) new_encoder_output, new_encoder_final_state = out cell_state = new_encoder_final_state try: cell_state = [ LSTMStateTuple(cs.c, cs.h) for cs in cell_state ] except: cell_state = LSTMStateTuple(cell_state.c, cell_state.h) new_encoder_output, updated_states = new_encoder_output print("Decoder_new_encoder_output", new_encoder_output) print("Decoder_updated_states", updated_states) cost_per_sample = self._hparams.cost_per_sample[2] budget_loss = tf.reduce_mean( tf.reduce_sum(cost_per_sample * updated_states, 1), 0) meanUpdates = tf.reduce_mean(tf.reduce_sum(updated_states, 1), 0) self.skip_infos = SkipInfoTuple(updated_states, meanUpdates, budget_loss) self._encoder_output = self.get_data(new_encoder_output, cell_state) print('Encoder_Output in Decoder after skip', self._encoder_output)
def do_validation(loss, curr_epoch): curr_epoch = int(curr_epoch) j = 0 val_losses = [] val_max = 0 val_norm_max = 0 for val in epoch_val: j += 1 if j >= 2: break print("Running validation...") if model == "LSTM": val_state = tuple([ LSTMStateTuple(np.zeros((nb_v, n_hidden), dtype=np.float), np.zeros((nb_v, n_hidden), dtype=np.float)) for _ in range(n_layers) ]) elif model == "HyperDRUM": val_state = tuple([ LSTMStateTuple( np.zeros((nb_v, n_hyper_hidden), dtype=np.float), np.zeros((nb_v, n_hyper_hidden + n_hidden), dtype=np.float)) for _ in range(n_layers) ]) elif model == "FSRUM": val_state = tuple([ tuple([ tuple([ np.zeros((nb_v, fast_size), dtype=np.float), np.zeros((nb_v, fast_size), dtype=np.float) ]), np.zeros((nb_v, slow_size), dtype=np.float) ]) for _ in range(n_layers) ]) else: val_state = tuple([ np.zeros((nb_v, n_hidden), dtype=np.float) for _ in range(n_layers) ]) for stepb, (X_val, Y_val) in enumerate(val): val_batch_x = X_val val_batch_y = Y_val val_dict = {x: val_batch_x, y: val_batch_y, i_s: val_state} val_acc, val_loss, val_state = sess.run( [accuracy, cost, states], feed_dict=val_dict) val_losses.append(val_loss) print("Validations:", ) validation_losses.append(sum(val_losses) / len(val_losses)) print("Validation Loss= " + "{:.6f}".format(validation_losses[-1])) test_loss = do_test() lr = [v for v in tf.global_variables() if v.name == "learning_rate:0"][0] lr = sess.run(lr) f.write( "Step: %d\t TrLoss: %f\t TestLoss: %f\t ValLoss: %f\t Epoch: %d\t Learning rate: %f\n" % (t, loss, test_loss, validation_losses[-1], curr_epoch, lr)) f.flush()
def __init__(self, num_units, is_training, use_peepholes=False, cell_clip=None, initializer=orthogonal_initializer(), num_proj=None, proj_clip=None, forget_bias=1.0, state_is_tuple=True, activation=tf.tanh): """Initialize the parameters for an LSTM cell. Args: num_units: int, The number of units in the LSTM cell. is_training: bool, set True when training. use_peepholes: bool, set True to enable diagonal/peephole connections. cell_clip: (optional) A float value, if provided the cell state is clipped by this value prior to the cell output activation. initializer: (optional) The initializer to use for the weight matrices. num_proj: (optional) int, The output dimensionality for the projection matrices. If None, no projection is performed. forget_bias: Biases of the forget gate are initialized by default to 1 in order to reduce the scale of forgetting at the beginning of the training. state_is_tuple: If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. If False, they are concatenated along the column axis. activation: Activation function of the inner states. """ if not state_is_tuple: tf.logging.log_first_n( tf.logging.WARN, "%s: Using a concatenated state is slower and " " will soon be deprecated. Use state_is_tuple=True.", 1, self) self.num_units = num_units self.is_training = is_training self.use_peepholes = use_peepholes self.cell_clip = cell_clip self.num_proj = num_proj self.proj_clip = proj_clip self.initializer = initializer self.forget_bias = forget_bias self._state_is_tuple = state_is_tuple self.state_is_tuple = state_is_tuple self.activation = activation if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def __init__(self, num_units, use_peepholes=False, cell_clip=None, initializer=None, num_proj=None, proj_clip=None, num_unit_shards=None, num_proj_shards=None, forget_bias=1.0, state_is_tuple=True, activation=None, reuse=None, normalize_in_to_hidden=False, normalize_in_together=True, normalize_cell=False, normalize_config=None, name=None): super(BNLSTMCell, self).__init__(_reuse=reuse, name=name) if not state_is_tuple: logging.warn( "%s: Using a concatenated state is slower and will soon be " "deprecated. Use state_is_tuple=True.", self) if num_unit_shards is not None or num_proj_shards is not None: logging.warn( "%s: The num_unit_shards and proj_unit_shards parameters are " "deprecated and will be removed in Jan 2017. " "Use a variable scope with a partitioner instead.", self) # Inputs must be 2-dimensional. self.input_spec = base_layer.InputSpec(ndim=2) self._num_units = num_units self._use_peepholes = use_peepholes self._cell_clip = cell_clip self._initializer = initializer self._num_proj = num_proj self._proj_clip = proj_clip self._num_unit_shards = num_unit_shards self._num_proj_shards = num_proj_shards self._forget_bias = forget_bias self._state_is_tuple = state_is_tuple self._activation = activation or math_ops.tanh self._normalize_in_to_hidden = normalize_in_to_hidden self._normalize_in_together = normalize_in_to_hidden and normalize_in_together self._normalize_cell = normalize_cell self._normalize_config = normalize_config if num_proj: self._state_size = (LSTMStateTuple(num_units, num_proj) if state_is_tuple else num_units + num_proj) self._output_size = num_proj else: self._state_size = (LSTMStateTuple(num_units, num_units) if state_is_tuple else 2 * num_units) self._output_size = num_units
def forward(self, X): """ Inspired in part by https://github.com/areiner222/MDLSTM/blob/master/md_lstm.py """ """ X: batch_size X height X width X channels """ """ create H*W arrays """ with tf.variable_scope(self.scope): _, H, W, C = X.get_shape().as_list() N = tf.shape(X)[0] X = tf.reshape(tf.transpose(X, [1,2,0,3]), [-1, C]) X = tf.split(X, H*W, axis=0) """ create dynamic-sized arrays with timesteps = H*W """ inputs = tf.TensorArray(dtype=tf.float32, size=H*W).unstack(X) states = tf.TensorArray(dtype=tf.float32, size=H*W+1, clear_after_read=False) outputs = tf.TensorArray(dtype=tf.float32, size=H*W) """ initialiaze states to zero """ states = states.write(H*W, LSTMStateTuple(tf.zeros([N, self.hidden_dim], tf.float32), tf.zeros([N, self.hidden_dim], tf.float32))) """ define counter """ t = tf.constant(0) """ define operations at each time step """ def body(t_, outputs_, states_):"""TODO: check if first state should use tf.less instead of tf.less_equal""" states_1 = tf.cond(tf.less_equal(t_, tf.constant(W)), lambda: states_.read(H*W), lambda: states_.read(t_ - tf.constant(W))) states_2 = tf.cond(tf.equal(t_ % W, tf.constant(0)), lambda: states_.read(H*W), lambda: states_.read(t_ - tf.constant(1))) prev_hidden_states = LSTMStateTuple(states_1[0], states_2[0]) prev_cell_states = LSTMStateTuple(states_1[1], states_2[1]) out, state = self.step_forward(inputs.read(t_), prev_hidden_states, prev_cell_states) outputs_ = outputs_.write(t_, out) states_ = states_.write(t_, state) return t_+1, outputs_, states_ """ define condition for while loop """ def condition(t_, outputs_, states_): return tf.less(t_, tf.constant(H*W)) """ run while loop """ _, outputs, states = tf.while_loop(condition, body, [t, outputs, states], parallel_iterations=1) """ stack outputs and states to get tensor and reshape outputs appropriately """ outputs = outputs.stack() states = states.stack() outputs = tf.transpose(tf.reshape(outputs, [H, W, -1, self.hidden_dim]), [2,0,1,3])
def initial_state(self): if self.encoder is None: batch_shape = [self.hparams.batch_size, self.hparams.dec_units] return LSTMStateTuple( c=tf.zeros(batch_shape, dtype=tf.float32), h=tf.zeros(batch_shape, dtype=tf.float32), ) else: return LSTMStateTuple( c=tf.zeros_like(self.encoder.state.c), h=tf.zeros_like(self.encoder.state.h), )
def add(self, values, _struct=None): """ Adds single experience frame to rollout. Args: values: [nested] dictionary of values. """ if _struct is None: # Top level: _struct = self self.size += 1 top = True else: top = False try: if isinstance(values, dict): for key, value in values.items(): if key not in _struct.keys(): _struct[key] = {} _struct[key] = self.add(value, _struct[key]) elif isinstance(values, tuple): if not isinstance(_struct, tuple): _struct = ['empty' for entry in values] _struct = tuple( [self.add(*pair) for pair in zip(values, _struct)]) elif isinstance(values, LSTMStateTuple): if not isinstance(_struct, LSTMStateTuple): _struct = LSTMStateTuple(0, 0) c = self.add(values[0], _struct[0]) h = self.add(values[1], _struct[1]) _struct = LSTMStateTuple(c, h) else: if isinstance(_struct, list): _struct += [values] else: _struct = [values] except: print('values:\n', values) print('_struct:\n', _struct) raise RuntimeError if not top: return _struct
def pass_messages(self): with tf.name_scope('pass_messages') as scope: denom = tf.sqrt(tf.cast(self.opts.d, tf.float32)) L_output = tf.tile(tf.div(self.L_init, denom), [self.n_lits, 1]) C_output = tf.tile(tf.div(self.C_init, denom), [self.n_clauses, 1]) L_state = LSTMStateTuple(h=L_output, c=tf.zeros([self.n_lits, self.opts.d])) C_state = LSTMStateTuple(h=C_output, c=tf.zeros([self.n_clauses, self.opts.d])) _, L_state, C_state = tf.while_loop(self.while_cond, self.while_body, [0, L_state, C_state]) self.final_lits = L_state.h self.final_clauses = C_state.h
def create_architecture(self): self.vars.sequence_length = tf.placeholder(tf.int64, [1], name="sequence_length") fc_input = self.get_input_layers() fc1 = fully_connected(fc_input, num_outputs=self.fc_units_num, scope=self._name_scope + "/fc1") fc1_reshaped = tf.reshape(fc1, [1, -1, self.fc_units_num]) self.recurrent_cells = self.ru_class(self._recurrent_units_num) state_c = tf.placeholder(tf.float32, [1, self.recurrent_cells.state_size.c], name="initial_lstm_state_c") state_h = tf.placeholder(tf.float32, [1, self.recurrent_cells.state_size.h], name="initial_lstm_state_h") self.vars.initial_network_state = LSTMStateTuple(state_c, state_h) rnn_outputs, self.ops.network_state = tf.nn.dynamic_rnn( self.recurrent_cells, fc1_reshaped, initial_state=self.vars.initial_network_state, sequence_length=self.vars.sequence_length, time_major=False, scope=self._name_scope) reshaped_rnn_outputs = tf.reshape(rnn_outputs, [-1, self._recurrent_units_num]) self.reset_state() self.ops.pi, self.ops.frameskip_pi, self.ops.v = self.policy_value_frameskip_layer( reshaped_rnn_outputs)
def bidirectional_LSTM(inputs, scope, training): with tf.variable_scope(scope): outputs, (fw_state, bw_state) = tf.nn.bidirectional_dynamic_rnn( # tf.nn.rnn_cell.LSTMCell(hp.enc_units), # tf.nn.rnn_cell.LSTMCell(hp.enc_units), ZoneoutLSTMCell( hp.enc_units, training, zoneout_factor_cell=hp.z_drop, zoneout_factor_output=hp.z_drop, ), ZoneoutLSTMCell( hp.enc_units, training, zoneout_factor_cell=hp.z_drop, zoneout_factor_output=hp.z_drop, ), inputs, dtype=tf.float32) #Concatenate c states and h states from forward #and backward cells encoder_final_state_c = tf.concat((fw_state.c, bw_state.c), 1) encoder_final_state_h = tf.concat((fw_state.h, bw_state.h), 1) #Get the final state to pass as initial state to decoder final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) return tf.concat( outputs, axis=2 ), final_state # Concat forward + backward outputs and final states
def encoder(self, inputs, seq_len, keep_prob=0.9): batch_size = tf.shape(inputs)[0] with tf.variable_scope('encoder'): encoder_cell_fw = self.add_encoder_cell(self.hidden_size, self.cell_type, self.num_layers, keep_prob) encoder_cell_bw = self.add_encoder_cell(self.hidden_size, self.cell_type, self.num_layers, keep_prob) initial_state = encoder_cell_fw.zero_state(batch_size, dtype=tf.float32) encoder_outputs_, encoder_states_ = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell_fw, cell_bw=encoder_cell_bw, inputs=inputs, sequence_length=seq_len, initial_state_fw=initial_state, initial_state_bw=initial_state, dtype=tf.float32, swap_memory=True) encoder_outputs = tf.concat(encoder_outputs_, axis=-1) encoder_states = [] for i in range(self.num_layers): c_fw, h_fw = encoder_states_[0][i] c_bw, h_bw = encoder_states_[1][i] # c_s = tf.concat([c_fw, c_bw], axis=-1) # h_s = tf.concat([h_fw, h_bw], axis=-1) c_s = tf.add(c_fw, c_bw) h_s = tf.add(h_fw, h_bw) encoder_states.append(LSTMStateTuple(c_s, h_s)) encoder_states = tuple(encoder_states) return encoder_outputs, encoder_states
def _init_bidirectional_encoder(self): with tf.variable_scope("BidirectionalEncoder") as scope: ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=self.encoder_cell, cell_bw=self.encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, time_major=True, dtype=tf.float32)) # concatenates tensors along one dimension. self.encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) # isinstance() 会认为子类是一种父类类型, 考虑继承关系 if isinstance(encoder_fw_state, LSTMStateTuple): encoder_state_c = tf.concat( (encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat( (encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') self.encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) elif isinstance(encoder_fw_state, tf.Tensor): self.encoder_state = tf.concat( (encoder_fw_state, encoder_bw_state), 1, name='bidirectional_concat')
def BiLSTM(x, seqlen, weights, biases): cell = LSTMCell(n_hidden) cell = tf.nn.rnn_cell.DropoutWrapper( cell, output_keep_prob=0.5) #giam hien tuong overfitting # cell = tf.nn.rnn_cell.MultiRNNCell([cell] * self.num_layers, state_is_tuple=True) # cell_bw = tf.nn.rnn_cell.MultiRNNCell([self.encoder_cell] * self.num_layers, state_is_tuple=True) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = ( tf.nn.bidirectional_dynamic_rnn( cell_fw=cell, cell_bw=cell, inputs=x, # sequence_length=seqlen, time_major=True, dtype=tf.float32)) encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2) if isinstance(encoder_fw_state, LSTMStateTuple): encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) elif isinstance(encoder_fw_state, tf.Tensor): encoder_state = tf.concat((encoder_fw_state, encoder_bw_state), 1, name='bidirectional_concat') return tf.matmul(encoder_outputs, weights['out']) + biases['out']
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with _checked_scope(self, scope or "basic_lstm_cell", reuse=self._reuse): # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = tf.split(value=state, num_or_size_splits=2, axis=1) all_inputs = tf.concat([inputs, h], 1) concat = tf.nn.bias_add(tf.matmul(all_inputs, self.weight), self.bias) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) new_c = (c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * self._activation(j)) new_h = self._activation(new_c) * tf.sigmoid(o) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, new_h) else: new_state = tf.concat([new_c, new_h], 1) return new_h, new_state
def _encoder(self): word_embeddings = self._get_embeddings(self.input_placeholder) expanded_answer_position = tf.expand_dims(self.answer_position, 2) word_embeddings_answer_position = tf.concat( (word_embeddings, expanded_answer_position), 2) encoder_lstm_cell = LSTMCell( num_units=self.config.encoder_hidden_state_size) ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_lstm_cell, cell_bw=encoder_lstm_cell, inputs=word_embeddings_answer_position, sequence_length=self.input_length_placeholder, dtype=tf.float32) encoder_output = tf.concat([encoder_fw_outputs, encoder_bw_outputs], 2) encoder_final_state_c = tf.concat( (encoder_fw_final_state.c, encoder_bw_final_state.c), 1) encoder_final_state_h = tf.concat( (encoder_fw_final_state.h, encoder_bw_final_state.h), 1) encoder_final_state = LSTMStateTuple(c=encoder_final_state_c, h=encoder_final_state_h) # decoder_lstm_cell = LSTMCell(decoder_hidden_state_size) # eos_step_embedded = self.get_embeddings(self.eos_time_slice) # pad_step_embedded = self.get_embeddings(self.pad_time_slice) return encoder_final_state
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM). @param: inputs (batch,n) @param state: the states and hidden unit of the two cells """ with tf.variable_scope(scope or type(self).__name__): c1, c2, h1, h2 = state # change bias argument to False since LN will add bias via shift concat = _linear([inputs, h1, h2], 5 * self._num_units, False) i, j, f1, f2, o = tf.split(value=concat, num_or_size_splits=5, axis=1) # add layer normalization to each gate i = ln(i, scope='i/') j = ln(j, scope='j/') f1 = ln(f1, scope='f1/') f2 = ln(f2, scope='f2/') o = ln(o, scope='o/') new_c = (c1 * tf.nn.sigmoid(f1 + self._forget_bias) + c2 * tf.nn.sigmoid(f2 + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) # add layer_normalization in calculation of new hidden state new_h = self._activation(ln(new_c, scope='new_h/')) * tf.nn.sigmoid(o) new_state = LSTMStateTuple(new_c, new_h) return new_h, new_state
def __call__(self, inputs, state, scope=None): sigmoid = math_ops.sigmoid # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1) with tf.variable_scope("Weight", initializer=tf.orthogonal_initializer()): weight_matrix = _linear([inputs, h], 5 * self._num_units, True) with tf.variable_scope("transform_input", initializer=tf.orthogonal_initializer()): trans_input = _linear([inputs], self._num_units, True) # i = input_gate, j = new_input, f = forget_gate, o = output_gate batch_size * dim i, j, f, o, t = tf.split(weight_matrix, num_or_size_splits=5, axis=1) i = ln(i, scope='i_LN') j = ln(j, scope='j_LN') f = ln(f, scope='f_LN') o = ln(o, scope='o_LN') t = ln(t, scope='t_LN') new_c = (c * sigmoid(f + self._forget_bias) + sigmoid(i) * self._activation(j)) new_h = self._activation(ln(new_c, scope='new_c_LN')) * sigmoid(o) high_h = sigmoid(t) * new_h + \ (1.0 - sigmoid(t)) * self._activation(ln(trans_input, scope='new_input_LN')) if self._state_is_tuple: new_state = LSTMStateTuple(new_c, high_h) else: new_state = tf.concat([new_c, high_h], 1) return high_h, new_state
def build_bidirectional_encoder(encoder_cell, num_layers, encoder_inputs, encoder_inputs_length, scope=None): assert num_layers > 0 with tf.variable_scope(scope or 'basic_encoder'): current_inputs = encoder_inputs for layer_id in range(num_layers): ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = (tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell, cell_bw=encoder_cell, inputs=current_inputs, sequence_length=encoder_inputs_length, time_major=False, dtype=tf.float32, scope='encoder_l' + str(layer_id))) encoder_outputs = tf.concat( (encoder_fw_outputs, encoder_bw_outputs), 2) # [batch_size, enc_seq_len, 2 * enc_hid_size] current_inputs = encoder_outputs encoder_state_c = tf.concat((encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat((encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') encoder_final_state = LSTMStateTuple( c=encoder_state_c, h=encoder_state_h) # [batch_size, enc_hid_size*2] for c,h return encoder_outputs, encoder_final_state
def _encoder(self, input_seq, input_seq_length, name="", reuse=False): with tf.variable_scope("Encoder") as scope: if reuse: tf.get_variable_scope().reuse_variables() cell_fw = BasicLSTMCell(self.h_dim, state_is_tuple=True, reuse=reuse) cell_fw = SwitchableDropoutWrapper(cell_fw, self.is_train, input_keep_prob = self.config.input_keep_prob) cell_bw = BasicLSTMCell(self.h_dim, state_is_tuple=True, reuse=reuse) cell_bw = SwitchableDropoutWrapper(cell_bw, self.is_train, input_keep_prob = self.config.input_keep_prob) (encoder_outputs, encoder_state) = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs=input_seq, sequence_length=input_seq_length, dtype=tf.float32, scope='enc') # Join outputs since we are using a bidirectional RNN encoder_outputs = tf.concat(encoder_outputs, 2) if isinstance(encoder_state[0], LSTMStateTuple): encoder_state_c = tf.concat( (encoder_state[0].c, encoder_state[1].c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat( (encoder_state[0].h, encoder_state[1].h), 1, name='bidirectional_concat_h') encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) return encoder_outputs, encoder_state
def concatenate_state(fw_state, bw_state): if isinstance(fw_state, LSTMStateTuple): state_c = tf.concat((fw_state.c, bw_state.c), 1, name='bidirectional_concat_c') state_h = tf.concat((fw_state.h, bw_state.h), 1, name='bidirectional_concat_h') state = LSTMStateTuple(c=state_c, h=state_h) return state elif isinstance(fw_state, tf.Tensor): state = tf.concat((fw_state, bw_state), 1, name='bidirectional_concat') return state elif (isinstance(fw_state, tuple) and isinstance(bw_state, tuple) and len(fw_state) == len(bw_state)): # multilayer state = tuple( concatenate_state(fw, bw) for fw, bw in zip(fw_state, bw_state)) return state else: raise ValueError('unknown state type: {}'.format( (fw_state, bw_state)))
def decode_onestep(self, sess, last_tokens, dec_pre_state, encoder_outputs, source_len): ''' Args: last_tokens: tokens to be fed as input into the decoder for this timestep encoder_outputs: [beam_size, seq_len, hidden_size] dec_pre_state: List of bead_size LSTMStateTuples from the previous timestep return: ''' beam_size = len(dec_pre_state) c = [np.expand_dims(state.c, axis=0) for state in dec_pre_state] h = [np.expand_dims(state.h, axis=0) for state in dec_pre_state] new_c = np.concatenate(c, axis=0) new_h = np.concatenate(h, axis=0) dec_pre_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h) feed_dict = { self.decoder_input_train: np.transpose(np.array([last_tokens])), self.dec_inp_state: dec_pre_state, self.encoder_outputs: encoder_outputs, self.source_len: source_len, self.keep_prob: 1.0 } output = { 'idx': self.topk_idx, 'probs': self.topk_log_prob, 'states': self.dec_out_state } output = sess.run(output, feed_dict=feed_dict) dec_states = [ LSTMStateTuple(output['states'].c[i, :], output['states'].h[i, :]) for i in range(beam_size) ] return output['idx'], output['probs'], dec_states
def _init_bidirectional_encoder(self): ''' 双向LSTM encoder ''' with tf.variable_scope("BidirectionalEncoder") as scope: ((encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_state, encoder_bw_state)) = ( tf.nn.bidirectional_dynamic_rnn(cell_fw=self.encoder_cell, cell_bw=self.encoder_cell, inputs=self.encoder_inputs_embedded, sequence_length=self.encoder_inputs_length, time_major=self.time_major, dtype=tf.float32) ) self.encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2) if isinstance(encoder_fw_state, LSTMStateTuple): encoder_state_c = tf.concat( (encoder_fw_state.c, encoder_bw_state.c), 1, name='bidirectional_concat_c') encoder_state_h = tf.concat( (encoder_fw_state.h, encoder_bw_state.h), 1, name='bidirectional_concat_h') self.encoder_state = LSTMStateTuple(c=encoder_state_c, h=encoder_state_h) elif isinstance(encoder_fw_state, tf.Tensor): self.encoder_state = tf.concat((encoder_fw_state, encoder_bw_state), 1, name='bidirectional_concat')
def create_architecture(self): self.vars.sequence_length = tf.placeholder(tf.int64, [1], name="sequence_length") fc_input = self.get_input_layers() fc1 = layers.fully_connected(fc_input, self.fc_units_num, scope=self._name_scope + "/fc1") fc1_reshaped = tf.reshape(fc1, [1, -1, self.fc_units_num]) self.recurrent_cells = self._get_ru_class()(self._recurrent_units_num) state_c = tf.placeholder(tf.float32, [1, self.recurrent_cells.state_size.c], name="initial_lstm_state_c") state_h = tf.placeholder(tf.float32, [1, self.recurrent_cells.state_size.h], name="initial_lstm_state_h") self.vars.initial_network_state = LSTMStateTuple(state_c, state_h) rnn_outputs, self.ops.network_state = tf.nn.dynamic_rnn( self.recurrent_cells, fc1_reshaped, initial_state=self.vars.initial_network_state, sequence_length=self.vars.sequence_length, scope=self._name_scope) reshaped_rnn_outputs = tf.reshape(rnn_outputs, [-1, self._recurrent_units_num]) q = layers.linear(reshaped_rnn_outputs, num_outputs=self.actions_num, scope=self._name_scope + "/q") self.reset_state() return q
def build_cudnn_encoder(encoder_cell, num_layers, encoder_inputs, encoder_inputs_length, time_major=False, scope=None): assert num_layers > 0 batch_size = encoder_inputs.get_shape()[0] num_units = encoder_cell.output_size input_size = encoder_inputs.get_shape()[-1] with tf.variable_scope(scope): model = CudnnLSTM(num_layers, num_units, input_size) params_size_t = calc_cudnn_num_params(num_layers, num_units, input_size) #params_size_t = model.params_size() if not time_major: encoder_inputs = tf.transpose(encoder_inputs, [1, 0, 2]) input_h = tf.zeros([num_layers, batch_size, num_units]) input_c = tf.zeros([num_layers, batch_size, num_units]) params = tf.Variable(tf.random_normal([params_size_t])) output, output_h, output_c = model(is_training=True, input_data=encoder_inputs, input_h=input_h, input_c=input_c, params=params) print("output", output) print("output_h", output_h) if not time_major: output = tf.transpose(output, [1, 0, 2]) return output, LSTMStateTuple(c=output_c[0], h=output_h[0])
def DynRNN(cell_model, num_units, num_layers, emb_inps, enc_lens, keep_prob=1.0, bidi=False, name_scope="encoder", dtype=tf.float32): """A Dynamic RNN Creator" Take embedding inputs and make dynamic rnn process """ with tf.name_scope(name_scope): if bidi: cell_fw = CreateMultiRNNCell(cell_model, num_units, num_layers, keep_prob, name_scope="cell_fw") cell_bw = CreateMultiRNNCell(cell_model, num_units, num_layers, keep_prob, name_scope="cell_bw") enc_outs, enc_states = bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=emb_inps, sequence_length=enc_lens, dtype=dtype, parallel_iterations=16, scope=name_scope) fw_s, bw_s = enc_states enc_states = [] for f, b in zip(fw_s, bw_s): if isinstance(f, LSTMStateTuple): enc_states.append( LSTMStateTuple(tf.concat([f.c, b.c], axis=1), tf.concat([f.h, b.h], axis=1))) else: enc_states.append(tf.concat([f, b], 1)) enc_outs = tf.concat([enc_outs[0], enc_outs[1]], axis=2) mem_size = 2 * num_units enc_state_size = 2 * num_units else: cell = CreateMultiRNNCell(cell_model, num_units, num_layers, keep_prob, name_scope="cell") enc_outs, enc_states = dynamic_rnn(cell=cell, inputs=emb_inps, sequence_length=enc_lens, parallel_iterations=16, dtype=dtype, scope=name_scope) mem_size = num_units enc_state_size = num_units return enc_outs, enc_states, mem_size, enc_state_size