Beispiel #1
0
    def setUp(self):
        self.SEQ_LENGTH = 20
        self.INPUT_DIM = 25
        self.MINIBATCH_SIZE = 30
        self.LSTM_OUT_DIM = 12

        self.h_init = torch.randn(1, self.MINIBATCH_SIZE, self.LSTM_OUT_DIM)
        self.c_init = torch.randn(1, self.MINIBATCH_SIZE, self.LSTM_OUT_DIM)
        hidden = (self.h_init, self.c_init)

        self.x = torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH,
                             self.INPUT_DIM)

        self.original_lstm = LSTM(self.INPUT_DIM,
                                  self.LSTM_OUT_DIM,
                                  batch_first=True)
        self.dp_lstm = DPLSTM(self.INPUT_DIM,
                              self.LSTM_OUT_DIM,
                              batch_first=True)

        self.dp_lstm.initialize_weights([
            self.original_lstm.weight_ih_l0,
            self.original_lstm.weight_hh_l0,
            self.original_lstm.bias_ih_l0,
            self.original_lstm.bias_hh_l0,
        ])

        self.lstm_out, self.lstm_state = self.original_lstm(self.x, hidden)
        self.dplstm_out, self.dplstm_state = self.dp_lstm(self.x, hidden)
    def __init__(
        self,
        embedding_size,
        hidden_size,
        output_size,
        num_lstm_layers=1,
        bidirectional=False,
        vocab_size=VOCAB_SIZE,
    ):
        super().__init__()

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.vocab_size = vocab_size

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.lstm = DPLSTM(
            embedding_size,
            hidden_size,
            num_layers=num_lstm_layers,
            bidirectional=bidirectional,
            batch_first=True,
        )
        self.out_layer = nn.Linear(hidden_size, output_size)
    def test_lstm_batch_second(self):
        # input size : 25 output size : 12 minibatch : 30 sequence length : 20

        # Test batch_first=False case
        layer = DPLSTM(25, 12, 1, batch_first=False)
        x = torch.randn(20, 30, 25)
        self._check_one_layer(layer, x, batch_first=False)
 def __init__(self, vocab_size: int, batch_size):
     super().__init__()
     # Embedding dimension: vocab_size + <unk>, <pad>, <eos>, <sos>
     self.emb = nn.Embedding(vocab_size + 4, 100)
     self.h_init = torch.randn(1, batch_size, 100).cuda()
     self.c_init = torch.randn(1, batch_size, 100).cuda()
     self.hidden = (self.h_init, self.c_init)
     self.lstm = DPLSTM(100, 100, batch_first=True)
     self.fc1 = nn.Linear(100, 2)
Beispiel #5
0
    def test_lstm_batch_second(self):
        # input size : 25 output size : 12 minibatch : 30 sequence length : 20
        h_init = torch.zeros(1, 30, 12)
        c_init = torch.zeros(1, 30, 12)
        hidden = (h_init, c_init)

        # Test batch_first=False case
        layer = DPLSTM(25, 12, 1, batch_first=False)
        x = torch.randn(20, 30, 25)
        self._check_one_layer(layer, x, hidden, batch_first=False)
    def __init__(self, input_size, hidden_size, output_size, n_letters, batch_size):
        super(CharNNClassifier, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.batch_size = batch_size

        self.embedding = nn.Embedding(n_letters, input_size)
        self.lstm = DPLSTM(input_size, hidden_size, batch_first=False)
        self.out_layer = nn.Linear(hidden_size, output_size)
Beispiel #7
0
    def setUp(self):
        self.SEQ_LENGTH = 20
        self.INPUT_DIM = 25
        self.MINIBATCH_SIZE = 30
        self.LSTM_OUT_DIM = 12
        self.NUM_LAYERS = 3
        self.bidirectional = True
        self.batch_first = False

        self.num_directions = 2 if self.bidirectional else 1

        self.h_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )
        self.c_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )

        self.original_lstm = LSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )
        self.dp_lstm = DPLSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )

        self.dp_lstm.load_state_dict(self.original_lstm.state_dict())
Beispiel #8
0
class DPLSTMTest(unittest.TestCase):
    def setUp(self):
        self.SEQ_LENGTH = 20
        self.INPUT_DIM = 25
        self.MINIBATCH_SIZE = 30
        self.LSTM_OUT_DIM = 12

        self.h_init = torch.randn(1, self.MINIBATCH_SIZE, self.LSTM_OUT_DIM)
        self.c_init = torch.randn(1, self.MINIBATCH_SIZE, self.LSTM_OUT_DIM)
        hidden = (self.h_init, self.c_init)

        self.x = torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH,
                             self.INPUT_DIM)

        self.original_lstm = LSTM(self.INPUT_DIM,
                                  self.LSTM_OUT_DIM,
                                  batch_first=True)
        self.dp_lstm = DPLSTM(self.INPUT_DIM,
                              self.LSTM_OUT_DIM,
                              batch_first=True)

        self.dp_lstm.initialize_weights([
            self.original_lstm.weight_ih_l0,
            self.original_lstm.weight_hh_l0,
            self.original_lstm.bias_ih_l0,
            self.original_lstm.bias_hh_l0,
        ])

        self.lstm_out, self.lstm_state = self.original_lstm(self.x, hidden)
        self.dplstm_out, self.dplstm_state = self.dp_lstm(self.x, hidden)

    def _reset_seeds(self):
        torch.manual_seed(1337)
        torch.cuda.manual_seed(1337)

    def test_lstm_forward(self):
        params_to_test = [
            (self.lstm_out, self.dplstm_out, "LSTM and DPLSTM output"),
            (self.lstm_state[0], self.dplstm_state[0],
             "LSTM and DPLSTM state `h`"),
            (self.lstm_state[1], self.dplstm_state[1],
             "LSTM and DPLSTM state `c`"),
        ]

        for param, dp_param, message in params_to_test:
            assert_allclose(
                actual=param,
                expected=dp_param.expand_as(param),
                atol=10e-5,
                rtol=10e-3,
                msg=f"Tensor value mismatch between {message}",
            )

    def test_lstm_backward(self):
        y = torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH,
                        self.LSTM_OUT_DIM)
        criterion = nn.MSELoss()

        loss = criterion(y, self.lstm_out)
        loss.backward()

        dp_loss = criterion(y, self.dplstm_out)
        dp_loss.backward()

        params_to_test = [
            (
                self.original_lstm.weight_ih_l0.grad,
                self.dp_lstm.ih.weight.grad,
                "LSTM and DPLSTM `weight_ih_l0` gradients",
            ),
            (
                self.original_lstm.bias_ih_l0.grad,
                self.dp_lstm.ih.bias.grad,
                "LSTM and DPLSTM `bias_ih_l0` gradients",
            ),
            (
                self.original_lstm.weight_hh_l0.grad,
                self.dp_lstm.hh.weight.grad,
                "LSTM and DPLSTM `weight_hh_l0` gradients",
            ),
            (
                self.original_lstm.bias_hh_l0.grad,
                self.dp_lstm.hh.bias.grad,
                "LSTM and DPLSTM `bias_hh_l0` gradients",
            ),
        ]

        for param, dp_param, message in params_to_test:
            assert_allclose(
                actual=param,
                expected=dp_param,
                atol=10e-5,
                rtol=10e-3,
                msg=f"Tensor value mismatch between {message}",
            )
Beispiel #9
0
class SimpleDPLSTMTest(unittest.TestCase):
    def setUp(self):
        self.SEQ_LENGTH = 20
        self.INPUT_DIM = 25
        self.MINIBATCH_SIZE = 30
        self.LSTM_OUT_DIM = 12
        self.NUM_LAYERS = 1
        self.bidirectional = False
        self.batch_first = False

        self.num_directions = 2 if self.bidirectional else 1
        self.h_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )
        self.c_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )

        self.original_lstm = LSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )
        self.dp_lstm = DPLSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )

        self.dp_lstm.load_state_dict(self.original_lstm.state_dict())

    def _reset_seeds(self):
        torch.manual_seed(1337)
        torch.cuda.manual_seed(1337)

    def test_lstm_forward(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        hidden = (self.h_init, self.c_init)

        out, (hn, cn) = self.original_lstm(x, hidden)
        dp_out, (dp_hn, dp_cn) = self.dp_lstm(x, hidden)

        outputs_to_test = [
            (out, dp_out, "LSTM and DPLSTM output"),
            (hn, dp_hn, "LSTM and DPLSTM state `h`"),
            (cn, dp_cn, "LSTM and DPLSTM state `c`"),
        ]

        for output, dp_output, message in outputs_to_test:
            assert_allclose(
                actual=dp_output.expand_as(output),
                expected=output,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch between {message}",
            )

    def test_lstm_backward(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        criterion = nn.MSELoss()

        hidden = (self.h_init, self.c_init)

        out, (hn, cn) = self.original_lstm(x, hidden)
        y = torch.zeros_like(out)
        loss = criterion(out, y)
        loss.backward()

        dp_out, (dp_hn, dp_cn) = self.dp_lstm(x, hidden)
        dp_loss = criterion(dp_out, y)
        dp_loss.backward()

        dp_lstm_params = dict(self.dp_lstm.named_parameters())
        for param_name, param in self.original_lstm.named_parameters():
            dp_param = dp_lstm_params[param_name]
            assert_allclose(
                actual=dp_param,
                expected=param,
                atol=10e-5,
                rtol=10e-3,
                msg=f"Tensor value mismatch in the parameter '{param_name}'",
            )
            assert_allclose(
                actual=dp_param.grad,
                expected=param.grad,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the gradient of parameter '{param_name}'",
            )

    def test_lstm_param_update(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        criterion = nn.MSELoss()

        optimizer = torch.optim.SGD(self.original_lstm.parameters(), lr=0.5)
        dp_optimizer = torch.optim.SGD(self.dp_lstm.parameters(), lr=0.5)

        # Train original LSTM for one step
        logits, (h_n, c_n) = self.original_lstm(x)
        y = torch.zeros_like(logits)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        # Train DP LSTM for one step
        dp_logits, (dp_h_n, dp_c_n) = self.dp_lstm(x)
        dp_loss = criterion(dp_logits, y)
        dp_loss.backward()
        dp_optimizer.step()

        dp_lstm_params = dict(self.dp_lstm.named_parameters())
        for param_name, param in self.original_lstm.named_parameters():
            dp_param = dp_lstm_params[param_name]
            assert_allclose(
                actual=dp_param,
                expected=param,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the parameter '{param_name}'",
            )
            assert_allclose(
                actual=dp_param.grad,
                expected=param.grad,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the gradient of parameter '{param_name}'",
            )
Beispiel #10
0
 def __init__(self, *args, **kwargs):
     super().__init__()
     self.dplstm = DPLSTM(*args, **kwargs)
Beispiel #11
0
    def test_lstm(
        self,
        batch_size: int,
        seq_len: int,
        emb_size: int,
        hidden_size: int,
        num_layers: int,
        bidirectional: bool,
        bias: bool,
        batch_first: bool,
        zero_init: bool,
        packed_input_flag: int,
    ):
        lstm = nn.LSTM(
            emb_size,
            hidden_size,
            num_layers=num_layers,
            batch_first=batch_first,
            bidirectional=bidirectional,
            bias=bias,
        )
        dp_lstm = DPLSTM(
            emb_size,
            hidden_size,
            num_layers=num_layers,
            batch_first=batch_first,
            bidirectional=bidirectional,
            bias=bias,
        )

        dp_lstm.load_state_dict(lstm.state_dict())

        if packed_input_flag == 0:
            x = (torch.randn([batch_size, seq_len, emb_size]) if batch_first
                 else torch.randn([seq_len, batch_size, emb_size]))
        elif packed_input_flag == 1:
            x = _gen_packed_data(batch_size,
                                 seq_len,
                                 emb_size,
                                 batch_first,
                                 sorted_=True)
        elif packed_input_flag == 2:
            x = _gen_packed_data(batch_size,
                                 seq_len,
                                 emb_size,
                                 batch_first,
                                 sorted_=False)

        if zero_init:
            self.compare_forward_outputs(
                lstm,
                dp_lstm,
                x,
                output_names=("out", "hn", "cn"),
                atol=1e-5,
                rtol=1e-3,
            )

            self.compare_gradients(
                lstm,
                dp_lstm,
                lstm_train_fn,
                x,
                atol=1e-5,
                rtol=1e-3,
            )

        else:
            num_directions = 2 if bidirectional else 1
            h0 = torch.randn(
                [num_layers * num_directions, batch_size, hidden_size])
            c0 = torch.randn(
                [num_layers * num_directions, batch_size, hidden_size])
            self.compare_forward_outputs(
                lstm,
                dp_lstm,
                x,
                (h0, c0),
                output_names=("out", "hn", "cn"),
                atol=1e-5,
                rtol=1e-3,
            )
            self.compare_gradients(
                lstm,
                dp_lstm,
                lstm_train_fn,
                x,
                (h0, c0),
                atol=1e-5,
                rtol=1e-3,
            )