def orthonormal_VanillaLSTMBuilder(lstm_layers, input_dims, lstm_hiddens, dropout_x=0., dropout_h=0., debug=False): assert lstm_layers == 1, 'only accept one layer lstm' W = orthonormal_initializer(lstm_hiddens, lstm_hiddens + input_dims, debug) W_h, W_x = W[:, :lstm_hiddens], W[:, lstm_hiddens:] lstm_cell = rnn.LSTMCell( input_size=input_dims, hidden_size=lstm_hiddens, i2h_weight_initializer=mx.init.Constant(np.concatenate([W_x] * 4, 0)), h2h_weight_initializer=mx.init.Constant(np.concatenate([W_h] * 4, 0))) return lstm_cell
def __init__(self, word_emb, vocab, model_params,ctx): """ 初始化Decoder与Attention :param word_emb: 词嵌入模型,用来获取词向量 :param vocab: 词典 :param model_params: 模型参数 """ super(BaseDecoder, self).__init__() self.model_params = model_params self.cell = rnn.LSTMCell(model_params['decoder_hidden_size']) self.attention = BahdanauAttention(model_params['attention_hidden_size']) self.word_emb = word_emb self.vocab_projection = nn.Dense(vocab.size) self.ctx=ctx
def test_combined(): f = fold.Fold() t = TestBlock() cell = rnn.LSTMCell(20) cell.initialize() t.initialize() fold_output = [] for _ in range(3): length = np.random.randint(3, 20) input_data = mx.nd.random.uniform(shape=(1, length, 5)) cell_out = [ f.record(0, t.predict, r[0]) for r in fold.fold_unroll(cell, f, length, input_data) ] fold_output.extend(cell_out) print(f) f([fold_output])[0]
def orthonormal_VanillaLSTMBuilder(lstm_layers, input_dims, lstm_hiddens, dropout_x=0., dropout_h=0., debug=False): """Build a standard LSTM cell, with variational dropout, with weights initialized to be orthonormal (https://arxiv.org/abs/1312.6120) Parameters ---------- lstm_layers : int Currently only support one layer input_dims : int word vector dimensions lstm_hiddens : int hidden size dropout_x : float dropout on inputs, not used in this implementation, see `biLSTM` below dropout_h : float dropout on hidden states debug : bool set to True to skip orthonormal initialization Returns ------- lstm_cell : VariationalDropoutCell A LSTM cell """ assert lstm_layers == 1, 'only accept one layer lstm' W = orthonormal_initializer(lstm_hiddens, lstm_hiddens + input_dims, debug) W_h, W_x = W[:, :lstm_hiddens], W[:, lstm_hiddens:] b = nd.zeros((4 * lstm_hiddens, )) b[lstm_hiddens:2 * lstm_hiddens] = -1.0 lstm_cell = rnn.LSTMCell( input_size=input_dims, hidden_size=lstm_hiddens, i2h_weight_initializer=mx.init.Constant(np.concatenate([W_x] * 4, 0)), h2h_weight_initializer=mx.init.Constant(np.concatenate([W_h] * 4, 0)), h2h_bias_initializer=mx.init.Constant(b)) wrapper = VariationalDropoutCell(lstm_cell, drop_states=dropout_h) return wrapper
def __init__(self, n_inputs, n_encoder_state, n_decoder_state, n_decoder_output, n_sequence_max, n_alignment, n_layers=1, dropout=0.5): super(Decoder, self).__init__() self.n_sequence_max = n_sequence_max self.n_encoder_state = n_encoder_state self.n_encoder_state = n_decoder_state self.n_layers = n_layers self.n_inputs = n_inputs with self.name_scope(): self.dropout = nn.Dropout(dropout) self.attention = nn.Sequential() with self.attention.name_scope(): self.attention.add( nn.Dense(n_alignment, activation='relu', flatten=False)) self.attention.add( nn.Dense(1, activation='sigmoid', flatten=False)) self.decoder = rnn.LSTMCell(n_decoder_state * 2, input_size=n_inputs + n_encoder_state * 2) # self.dense_output = nn.Dense(n_decoder_output, flatten=False)
def test_rnn_cell(): cell = rnn.LSTMCell(7) cell.initialize() f = fold.Fold() regular_result = [] fold_result = [] for _ in range(3): length = np.random.randint(3, 20) input_data = mx.nd.random.uniform(shape=(1, length, 5)) regular_result.extend( cell.unroll(length, input_data, merge_outputs=False)[0]) state = cell.begin_state(1) outputs = [] split_input = input_data.split(length, squeeze_axis=True) for i in range(length): out, state = f.record(0, cell, split_input[i], state).split(2) state = state.split(2) outputs.append(out) fold_result.extend(outputs) print(f) result = f([fold_result], True)[0] assert_almost_equal(result.asnumpy(), mx.nd.concat(*regular_result, dim=0).asnumpy())
def __init__(self, **kwargs): super(block2HybridBlock, self).__init__(**kwargs) self.stacked_rnn_cells = mx.gluon.rnn.HybridSequentialRNNCell() self.stacked_rnn_cells.add(rnn.LSTMCell(hidden_size=20))
def _get_rnn_cell(mode, num_layers, input_size, hidden_size, dropout, weight_dropout, var_drop_in, var_drop_state, var_drop_out, skip_connection, proj_size=None, cell_clip=None, proj_clip=None): """create rnn cell given specs Parameters ---------- mode : str The type of RNN cell to use. Options are 'lstmpc', 'rnn_tanh', 'rnn_relu', 'lstm', 'gru'. num_layers : int The number of RNN cells in the encoder. input_size : int The initial input size of in the RNN cell. hidden_size : int The hidden size of the RNN cell. dropout : float The dropout rate to use for encoder output. weight_dropout: float The dropout rate to the hidden to hidden connections. var_drop_in: float The variational dropout rate for inputs. Won’t apply dropout if it equals 0. var_drop_state: float The variational dropout rate for state inputs on the first state channel. Won’t apply dropout if it equals 0. var_drop_out: float The variational dropout rate for outputs. Won’t apply dropout if it equals 0. skip_connection : bool Whether to add skip connections (add RNN cell input to output) proj_size : int The projection size of each LSTMPCellWithClip cell. Only available when the mode=lstmpc. cell_clip : float Clip cell state between [-cellclip, cell_clip] in LSTMPCellWithClip cell. Only available when the mode=lstmpc. proj_clip : float Clip projection between [-projclip, projclip] in LSTMPCellWithClip cell Only available when the mode=lstmpc. """ assert mode == 'lstmpc' or proj_size is None, \ 'proj_size takes effect only when mode is lstmpc' assert mode == 'lstmpc' or cell_clip is None, \ 'cell_clip takes effect only when mode is lstmpc' assert mode == 'lstmpc' or proj_clip is None, \ 'proj_clip takes effect only when mode is lstmpc' rnn_cell = rnn.HybridSequentialRNNCell() with rnn_cell.name_scope(): for i in range(num_layers): if mode == 'rnn_relu': cell = rnn.RNNCell(hidden_size, 'relu', input_size=input_size) elif mode == 'rnn_tanh': cell = rnn.RNNCell(hidden_size, 'tanh', input_size=input_size) elif mode == 'lstm': cell = rnn.LSTMCell(hidden_size, input_size=input_size) elif mode == 'gru': cell = rnn.GRUCell(hidden_size, input_size=input_size) elif mode == 'lstmpc': cell = LSTMPCellWithClip(hidden_size, proj_size, cell_clip=cell_clip, projection_clip=proj_clip, input_size=input_size) if var_drop_in + var_drop_state + var_drop_out != 0: cell = contrib.rnn.VariationalDropoutCell( cell, var_drop_in, var_drop_state, var_drop_out) if skip_connection: cell = rnn.ResidualCell(cell) rnn_cell.add(cell) if i != num_layers - 1 and dropout != 0: rnn_cell.add(rnn.DropoutCell(dropout)) if weight_dropout: apply_weight_drop(rnn_cell, 'h2h_weight', rate=weight_dropout) return rnn_cell
def __init__(self, **kwargs): super(NET, self).__init__(**kwargs) with self.name_scope(): self.encoder = rnn.LSTMCell(hidden_size=20) self.batchnorm = nn.BatchNorm(axis=2) self.dense = nn.Dense(1, flatten=True)
def _get_rnn_cell(mode, num_layers, input_size, hidden_size, dropout, var_drop_in, var_drop_state, var_drop_out, skip_connection, proj_size=None): """create rnn cell given specs Parameters ---------- mode : str The type of RNN cell to use. Options are 'rnn_tanh', 'rnn_relu', 'lstm', 'lstmp', 'gru'. num_layers : int The number of RNN cells in the encoder. input_size : int The initial input size of in the RNN cell. hidden_size : int The hidden size of the RNN cell. dropout : float The dropout rate to use for encoder output. var_drop_in: float The variational dropout rate for inputs. Won’t apply dropout if it equals 0. var_drop_state: float The variational dropout rate for state inputs on the first state channel. Won’t apply dropout if it equals 0. var_drop_out: float The variational dropout rate for outputs. Won’t apply dropout if it equals 0. skip_connection : bool Whether to add skip connections (add RNN cell input to output) proj_size : int The projection size of each LSTMPCell cell. Only available when the mode=lstmpc. """ if mode == 'lstmps': assert proj_size is not None, \ 'proj_size takes effect only when mode is lstmp' rnn_cell = rnn.HybridSequentialRNNCell() with rnn_cell.name_scope(): for i in range(num_layers): if mode == 'rnn_relu': cell = rnn.RNNCell(hidden_size, 'relu', input_size=input_size) elif mode == 'rnn_tanh': cell = rnn.RNNCell(hidden_size, 'tanh', input_size=input_size) elif mode == 'lstm': cell = rnn.LSTMCell(hidden_size, input_size=input_size) elif mode == 'lstmp': cell = gluon.contrib.rnn.LSTMPCell(hidden_size, input_size, proj_size) elif mode == 'gru': cell = rnn.GRUCell(hidden_size, input_size=input_size) if var_drop_in + var_drop_state + var_drop_out != 0: cell = gluon.contrib.rnn.VariationalDropoutCell( cell, var_drop_in, var_drop_state, var_drop_out) if skip_connection: cell = rnn.ResidualCell(cell) rnn_cell.add(cell) if i != num_layers - 1 and dropout != 0: rnn_cell.add(rnn.DropoutCell(dropout)) return rnn_cell