def __init__(self, args): super(HierarchicalEncoder, self).__init__() self.args = args self.dropout = args.elmo_dropout self.input_size = args.elmo_input_size self.hidden_size = args.elmo_hidden_size self.num_layers = args.elmo_num_layers self.cell_size = args.elmo_cell_size self.requires_grad = args.elmo_requires_grad forward_layers = [] backward_layers = [] lstm_input_size = self.input_size go_forward = True for layer_index in range(self.num_layers): forward_layer = LstmCellWithProjection(lstm_input_size, self.hidden_size, self.cell_size, go_forward, self.dropout, None, None) backward_layer = LstmCellWithProjection(lstm_input_size, self.hidden_size, self.cell_size, not go_forward, self.dropout, None, None) lstm_input_size = self.hidden_size self.add_module('forward_layer_{}'.format(layer_index), forward_layer) self.add_module('backward_layer_{}'.format(layer_index), backward_layer) forward_layers.append(forward_layer) backward_layers.append(backward_layer) self.forward_layers = forward_layers self.backward_layers = backward_layers
def initialize_lstm_params(lstm: LstmCellWithProjection) -> Dict[str, np.ndarray]: lstm.reset_parameters() w_0, b, w_p_0 = extract_lstm_params_with_serialized_order(lstm) dict_ret = { "W_0":w_0, "B":b, "W_P_0":w_p_0 } return dict_ret
def __init__( self, input_size: int, hidden_size: int, cell_size: int, num_layers: int, requires_grad: bool = False, recurrent_dropout_probability: float = 0.0, memory_cell_clip_value: Optional[float] = None, state_projection_clip_value: Optional[float] = None, ) -> None: super().__init__(stateful=True) # Required to be wrapped with a `PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.cell_size = cell_size self.requires_grad = requires_grad forward_layers = [] backward_layers = [] lstm_input_size = input_size go_forward = True for layer_index in range(num_layers): forward_layer = LstmCellWithProjection( lstm_input_size, hidden_size, cell_size, go_forward, recurrent_dropout_probability, memory_cell_clip_value, state_projection_clip_value, ) backward_layer = LstmCellWithProjection( lstm_input_size, hidden_size, cell_size, not go_forward, recurrent_dropout_probability, memory_cell_clip_value, state_projection_clip_value, ) lstm_input_size = hidden_size self.add_module("forward_layer_{}".format(layer_index), forward_layer) self.add_module("backward_layer_{}".format(layer_index), backward_layer) forward_layers.append(forward_layer) backward_layers.append(backward_layer) self.forward_layers = forward_layers self.backward_layers = backward_layers
def allennlp_lstm_cell(c, input, hidden, cell, batch, timestep, repeat, cuda, output): input = int(input) hidden = int(hidden) cell = int(cell) batch = int(batch) timestep = int(timestep) repeat = int(repeat) lstm = LstmCellWithProjection( input_size=input, hidden_size=hidden, cell_size=cell, ) input_tensor = torch.rand(batch, timestep, input) initial_hidden_state = torch.ones([1, batch, hidden]) initial_cell_state = torch.ones([1, batch, cell]) if cuda == 'cuda': lstm = lstm.cuda() input_tensor = input_tensor.cuda() initial_hidden_state = initial_hidden_state.cuda() initial_cell_state = initial_cell_state.cuda() durations = [] for idx in range(repeat): batch_lengths = [timestep] batch_lengths.extend( [random.randrange(timestep + 1) for _ in range(batch - 1)]) batch_lengths = sorted(batch_lengths, reverse=True) with torch.no_grad(): time_start = time.time() lstm( input_tensor, batch_lengths, (initial_hidden_state, initial_cell_state), ) durations.append((idx, time.time() - time_start), ) with open(output, 'w') as fout: json.dump( { 'type': 'allennlp_lstm_cell', 'cuda': cuda, 'durations': durations }, fout, ensure_ascii=False, indent=2, )
def __init__(self, input_size: int, hidden_size: int, cell_size: int, num_layers: int, requires_grad: bool = False, recurrent_dropout_probability: float = 0.0, memory_cell_clip_value: Optional[float] = None, state_projection_clip_value: Optional[float] = None) -> None: super(ElmoLstm_Oneward, self).__init__(stateful=True) # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.cell_size = cell_size self.requires_grad = requires_grad oneward_layers = [] lstm_input_size = input_size go_forward = True for layer_index in range(num_layers): oneward_layer = LstmCellWithProjection(lstm_input_size, hidden_size, cell_size, go_forward, recurrent_dropout_probability, memory_cell_clip_value, state_projection_clip_value) lstm_input_size = hidden_size self.add_module('oneward_layer_{}'.format(layer_index), oneward_layer) oneward_layers.append(oneward_layer) self.oneward_layers = oneward_layers
def __init__(self, input_size, hidden_size, cell_size, num_layers, requires_grad=False, recurrent_dropout_probability=0.0, memory_cell_clip_value=None, state_projection_clip_value=None): super(ElmoLstm, self).__init__(stateful=True) # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`. self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.cell_size = cell_size self.requires_grad = requires_grad forward_layers = [] backward_layers = [] lstm_input_size = input_size go_forward = True for layer_index in range(num_layers): forward_layer = LstmCellWithProjection( lstm_input_size, hidden_size, cell_size, go_forward, recurrent_dropout_probability, memory_cell_clip_value, state_projection_clip_value) backward_layer = LstmCellWithProjection( lstm_input_size, hidden_size, cell_size, not go_forward, recurrent_dropout_probability, memory_cell_clip_value, state_projection_clip_value) lstm_input_size = hidden_size self.add_module('forward_layer_{}'.format(layer_index), forward_layer) self.add_module('backward_layer_{}'.format(layer_index), backward_layer) forward_layers.append(forward_layer) backward_layers.append(backward_layer) self.forward_layers = forward_layers self.backward_layers = backward_layers
def test_elmo_lstm_cell_completes_forward_pass(self): input_tensor = torch.rand(4, 5, 3) input_tensor[1, 4:, :] = 0.0 input_tensor[2, 2:, :] = 0.0 input_tensor[3, 1:, :] = 0.0 initial_hidden_state = torch.ones([1, 4, 5]) initial_memory_state = torch.ones([1, 4, 7]) lstm = LstmCellWithProjection( input_size=3, hidden_size=5, cell_size=7, memory_cell_clip_value=2, state_projection_clip_value=1, ) output_sequence, lstm_state = lstm( input_tensor, [5, 4, 2, 1], (initial_hidden_state, initial_memory_state)) numpy.testing.assert_array_equal( output_sequence.data[1, 4:, :].numpy(), 0.0) numpy.testing.assert_array_equal( output_sequence.data[2, 2:, :].numpy(), 0.0) numpy.testing.assert_array_equal( output_sequence.data[3, 1:, :].numpy(), 0.0) # Test the state clipping. numpy.testing.assert_array_less(output_sequence.data.numpy(), 1.0) numpy.testing.assert_array_less(-output_sequence.data.numpy(), 1.0) # LSTM state should be (num_layers, batch_size, hidden_size) assert list(lstm_state[0].size()) == [1, 4, 5] # LSTM memory cell should be (num_layers, batch_size, cell_size) assert list((lstm_state[1].size())) == [1, 4, 7] # Test the cell clipping. numpy.testing.assert_array_less(lstm_state[0].data.numpy(), 2.0) numpy.testing.assert_array_less(-lstm_state[0].data.numpy(), 2.0)
def test_unidirectional_single_layer_lstm_with_allennlp(): for lstm_cls, is_cpp in [ (UnidirectionalSingleLayerLstm, True), (PyUnidirectionalSingleLayerLstm, False), ]: input_tensor = torch.rand(4, 5, 3) input_tensor[1, 4:, :] = 0. input_tensor[2, 2:, :] = 0. input_tensor[3, 1:, :] = 0. inputs = pack_padded_sequence(input_tensor, [5, 4, 2, 1], batch_first=True) initial_hidden_state = torch.ones([1, 4, 5]) initial_cell_state = torch.ones([1, 4, 7]) for go_forward in [True, False]: allennlp_lstm = LstmCellWithProjection( input_size=3, hidden_size=5, cell_size=7, go_forward=go_forward, memory_cell_clip_value=2, state_projection_clip_value=1, ) lstm = lstm_cls( input_size=3, hidden_size=5, cell_size=7, go_forward=go_forward, cell_clip=2, proj_clip=1, ) if is_cpp: lstm.named_parameters()['input_linearity_weight'].data.copy_( allennlp_lstm.input_linearity.weight) lstm.named_parameters()['hidden_linearity_weight'].data.copy_( allennlp_lstm.state_linearity.weight) lstm.named_parameters()['hidden_linearity_bias'].data.copy_( allennlp_lstm.state_linearity.bias) lstm.named_parameters()['proj_linearity_weight'].data.copy_( allennlp_lstm.state_projection.weight) else: lstm.input_linearity_weight.data.copy_( allennlp_lstm.input_linearity.weight) lstm.hidden_linearity_weight.data.copy_( allennlp_lstm.state_linearity.weight) lstm.hidden_linearity_bias.data.copy_( allennlp_lstm.state_linearity.bias) lstm.proj_linearity_weight.data.copy_( allennlp_lstm.state_projection.weight) outputs, lstm_state = lstm( inputs.data, inputs.batch_sizes, (initial_hidden_state, initial_cell_state), ) output_sequence, _batch_sizes = pad_packed_sequence( PackedSequence(outputs, inputs.batch_sizes), batch_first=True, ) allennlp_output_sequence, allennlp_lstm_state = allennlp_lstm( input_tensor, [5, 4, 2, 1], (initial_hidden_state, initial_cell_state), ) numpy.testing.assert_array_equal( output_sequence.data.numpy(), allennlp_output_sequence.data.numpy()) numpy.testing.assert_array_equal( lstm_state[0].data.numpy(), allennlp_lstm_state[0].data.numpy()) numpy.testing.assert_array_equal( lstm_state[1].data.numpy(), allennlp_lstm_state[1].data.numpy())