Example #1
0
    def __init__(self, args):
        super(HierarchicalEncoder, self).__init__()
        self.args = args
        self.dropout = args.elmo_dropout
        self.input_size = args.elmo_input_size
        self.hidden_size = args.elmo_hidden_size
        self.num_layers = args.elmo_num_layers
        self.cell_size = args.elmo_cell_size
        self.requires_grad = args.elmo_requires_grad

        forward_layers = []
        backward_layers = []

        lstm_input_size = self.input_size
        go_forward = True
        for layer_index in range(self.num_layers):
            forward_layer = LstmCellWithProjection(lstm_input_size,
                                                   self.hidden_size,
                                                   self.cell_size, go_forward,
                                                   self.dropout, None, None)
            backward_layer = LstmCellWithProjection(lstm_input_size,
                                                    self.hidden_size,
                                                    self.cell_size,
                                                    not go_forward,
                                                    self.dropout, None, None)
            lstm_input_size = self.hidden_size

            self.add_module('forward_layer_{}'.format(layer_index),
                            forward_layer)
            self.add_module('backward_layer_{}'.format(layer_index),
                            backward_layer)
            forward_layers.append(forward_layer)
            backward_layers.append(backward_layer)
        self.forward_layers = forward_layers
        self.backward_layers = backward_layers
Example #2
0
def initialize_lstm_params(lstm: LstmCellWithProjection) -> Dict[str, np.ndarray]:
    lstm.reset_parameters()
    w_0, b, w_p_0 = extract_lstm_params_with_serialized_order(lstm)
    dict_ret = {
        "W_0":w_0,
        "B":b,
        "W_P_0":w_p_0
    }
    return dict_ret
Example #3
0
    def __init__(
        self,
        input_size: int,
        hidden_size: int,
        cell_size: int,
        num_layers: int,
        requires_grad: bool = False,
        recurrent_dropout_probability: float = 0.0,
        memory_cell_clip_value: Optional[float] = None,
        state_projection_clip_value: Optional[float] = None,
    ) -> None:
        super().__init__(stateful=True)

        # Required to be wrapped with a `PytorchSeq2SeqWrapper`.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_size = cell_size
        self.requires_grad = requires_grad

        forward_layers = []
        backward_layers = []

        lstm_input_size = input_size
        go_forward = True
        for layer_index in range(num_layers):
            forward_layer = LstmCellWithProjection(
                lstm_input_size,
                hidden_size,
                cell_size,
                go_forward,
                recurrent_dropout_probability,
                memory_cell_clip_value,
                state_projection_clip_value,
            )
            backward_layer = LstmCellWithProjection(
                lstm_input_size,
                hidden_size,
                cell_size,
                not go_forward,
                recurrent_dropout_probability,
                memory_cell_clip_value,
                state_projection_clip_value,
            )
            lstm_input_size = hidden_size

            self.add_module("forward_layer_{}".format(layer_index),
                            forward_layer)
            self.add_module("backward_layer_{}".format(layer_index),
                            backward_layer)
            forward_layers.append(forward_layer)
            backward_layers.append(backward_layer)
        self.forward_layers = forward_layers
        self.backward_layers = backward_layers
Example #4
0
def allennlp_lstm_cell(c, input, hidden, cell, batch, timestep, repeat, cuda,
                       output):
    input = int(input)
    hidden = int(hidden)
    cell = int(cell)
    batch = int(batch)
    timestep = int(timestep)
    repeat = int(repeat)

    lstm = LstmCellWithProjection(
        input_size=input,
        hidden_size=hidden,
        cell_size=cell,
    )
    input_tensor = torch.rand(batch, timestep, input)

    initial_hidden_state = torch.ones([1, batch, hidden])
    initial_cell_state = torch.ones([1, batch, cell])

    if cuda == 'cuda':
        lstm = lstm.cuda()
        input_tensor = input_tensor.cuda()
        initial_hidden_state = initial_hidden_state.cuda()
        initial_cell_state = initial_cell_state.cuda()

    durations = []
    for idx in range(repeat):
        batch_lengths = [timestep]
        batch_lengths.extend(
            [random.randrange(timestep + 1) for _ in range(batch - 1)])
        batch_lengths = sorted(batch_lengths, reverse=True)

        with torch.no_grad():
            time_start = time.time()
            lstm(
                input_tensor,
                batch_lengths,
                (initial_hidden_state, initial_cell_state),
            )
            durations.append((idx, time.time() - time_start), )

    with open(output, 'w') as fout:
        json.dump(
            {
                'type': 'allennlp_lstm_cell',
                'cuda': cuda,
                'durations': durations
            },
            fout,
            ensure_ascii=False,
            indent=2,
        )
Example #5
0
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
                 cell_size: int,
                 num_layers: int,
                 requires_grad: bool = False,
                 recurrent_dropout_probability: float = 0.0,
                 memory_cell_clip_value: Optional[float] = None,
                 state_projection_clip_value: Optional[float] = None) -> None:
        super(ElmoLstm_Oneward, self).__init__(stateful=True)

        # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_size = cell_size
        self.requires_grad = requires_grad

        oneward_layers = []

        lstm_input_size = input_size
        go_forward = True
        for layer_index in range(num_layers):
            oneward_layer = LstmCellWithProjection(lstm_input_size,
                                                   hidden_size,
                                                   cell_size,
                                                   go_forward,
                                                   recurrent_dropout_probability,
                                                   memory_cell_clip_value,
                                                   state_projection_clip_value)
            lstm_input_size = hidden_size

            self.add_module('oneward_layer_{}'.format(layer_index), oneward_layer)
            oneward_layers.append(oneward_layer)
        self.oneward_layers = oneward_layers
Example #6
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 cell_size,
                 num_layers,
                 requires_grad=False,
                 recurrent_dropout_probability=0.0,
                 memory_cell_clip_value=None,
                 state_projection_clip_value=None):
        super(ElmoLstm, self).__init__(stateful=True)

        # Required to be wrapped with a :class:`PytorchSeq2SeqWrapper`.
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.cell_size = cell_size
        self.requires_grad = requires_grad

        forward_layers = []
        backward_layers = []

        lstm_input_size = input_size
        go_forward = True
        for layer_index in range(num_layers):
            forward_layer = LstmCellWithProjection(
                lstm_input_size, hidden_size, cell_size, go_forward,
                recurrent_dropout_probability, memory_cell_clip_value,
                state_projection_clip_value)
            backward_layer = LstmCellWithProjection(
                lstm_input_size, hidden_size, cell_size, not go_forward,
                recurrent_dropout_probability, memory_cell_clip_value,
                state_projection_clip_value)
            lstm_input_size = hidden_size

            self.add_module('forward_layer_{}'.format(layer_index),
                            forward_layer)
            self.add_module('backward_layer_{}'.format(layer_index),
                            backward_layer)
            forward_layers.append(forward_layer)
            backward_layers.append(backward_layer)
        self.forward_layers = forward_layers
        self.backward_layers = backward_layers
Example #7
0
    def test_elmo_lstm_cell_completes_forward_pass(self):
        input_tensor = torch.rand(4, 5, 3)
        input_tensor[1, 4:, :] = 0.0
        input_tensor[2, 2:, :] = 0.0
        input_tensor[3, 1:, :] = 0.0

        initial_hidden_state = torch.ones([1, 4, 5])
        initial_memory_state = torch.ones([1, 4, 7])

        lstm = LstmCellWithProjection(
            input_size=3,
            hidden_size=5,
            cell_size=7,
            memory_cell_clip_value=2,
            state_projection_clip_value=1,
        )
        output_sequence, lstm_state = lstm(
            input_tensor, [5, 4, 2, 1],
            (initial_hidden_state, initial_memory_state))
        numpy.testing.assert_array_equal(
            output_sequence.data[1, 4:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(
            output_sequence.data[2, 2:, :].numpy(), 0.0)
        numpy.testing.assert_array_equal(
            output_sequence.data[3, 1:, :].numpy(), 0.0)

        # Test the state clipping.
        numpy.testing.assert_array_less(output_sequence.data.numpy(), 1.0)
        numpy.testing.assert_array_less(-output_sequence.data.numpy(), 1.0)

        # LSTM state should be (num_layers, batch_size, hidden_size)
        assert list(lstm_state[0].size()) == [1, 4, 5]
        # LSTM memory cell should be (num_layers, batch_size, cell_size)
        assert list((lstm_state[1].size())) == [1, 4, 7]

        # Test the cell clipping.
        numpy.testing.assert_array_less(lstm_state[0].data.numpy(), 2.0)
        numpy.testing.assert_array_less(-lstm_state[0].data.numpy(), 2.0)
def test_unidirectional_single_layer_lstm_with_allennlp():
    for lstm_cls, is_cpp in [
        (UnidirectionalSingleLayerLstm, True),
        (PyUnidirectionalSingleLayerLstm, False),
    ]:
        input_tensor = torch.rand(4, 5, 3)
        input_tensor[1, 4:, :] = 0.
        input_tensor[2, 2:, :] = 0.
        input_tensor[3, 1:, :] = 0.

        inputs = pack_padded_sequence(input_tensor, [5, 4, 2, 1],
                                      batch_first=True)

        initial_hidden_state = torch.ones([1, 4, 5])
        initial_cell_state = torch.ones([1, 4, 7])

        for go_forward in [True, False]:
            allennlp_lstm = LstmCellWithProjection(
                input_size=3,
                hidden_size=5,
                cell_size=7,
                go_forward=go_forward,
                memory_cell_clip_value=2,
                state_projection_clip_value=1,
            )
            lstm = lstm_cls(
                input_size=3,
                hidden_size=5,
                cell_size=7,
                go_forward=go_forward,
                cell_clip=2,
                proj_clip=1,
            )

            if is_cpp:
                lstm.named_parameters()['input_linearity_weight'].data.copy_(
                    allennlp_lstm.input_linearity.weight)
                lstm.named_parameters()['hidden_linearity_weight'].data.copy_(
                    allennlp_lstm.state_linearity.weight)
                lstm.named_parameters()['hidden_linearity_bias'].data.copy_(
                    allennlp_lstm.state_linearity.bias)
                lstm.named_parameters()['proj_linearity_weight'].data.copy_(
                    allennlp_lstm.state_projection.weight)
            else:
                lstm.input_linearity_weight.data.copy_(
                    allennlp_lstm.input_linearity.weight)
                lstm.hidden_linearity_weight.data.copy_(
                    allennlp_lstm.state_linearity.weight)
                lstm.hidden_linearity_bias.data.copy_(
                    allennlp_lstm.state_linearity.bias)
                lstm.proj_linearity_weight.data.copy_(
                    allennlp_lstm.state_projection.weight)

            outputs, lstm_state = lstm(
                inputs.data,
                inputs.batch_sizes,
                (initial_hidden_state, initial_cell_state),
            )
            output_sequence, _batch_sizes = pad_packed_sequence(
                PackedSequence(outputs, inputs.batch_sizes),
                batch_first=True,
            )

            allennlp_output_sequence, allennlp_lstm_state = allennlp_lstm(
                input_tensor,
                [5, 4, 2, 1],
                (initial_hidden_state, initial_cell_state),
            )

            numpy.testing.assert_array_equal(
                output_sequence.data.numpy(),
                allennlp_output_sequence.data.numpy())
            numpy.testing.assert_array_equal(
                lstm_state[0].data.numpy(),
                allennlp_lstm_state[0].data.numpy())
            numpy.testing.assert_array_equal(
                lstm_state[1].data.numpy(),
                allennlp_lstm_state[1].data.numpy())