Python LSTM.parameters Examples

Programming Language: Python

Namespace/Package Name: torch.nn

Class/Type: LSTM

Method/Function: parameters

Examples at hotexamples.com: 3

Python LSTM.parameters - 3 examples found. These are the top rated real world Python examples of torch.nn.LSTM.parameters extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LSTM(30)

cuda(6)

reset_parameters(6)

flatten_parameters(4)

parameters(3)

state_dict(3)

load_state_dict(1)

named_parameters(1)

num_neurons(1)

to(1)

zero_grad(1)

Example #1

Show file

File: detachnecessary.py Project: Fuchai/mayoehr

def main():
    # lstm=LayeredLSTM()
    lstm = LSTM(input_size=47764,
                hidden_size=512,
                num_layers=8,
                batch_first=True)

    # this has no new sequence reset
    # I wonder if gradient information will increase indefinitely

    # Even so, I think detaching at the beginning of each new sequence is an arbitrary decision.
    optim = torch.optim.Adam(lstm.parameters())
    lstm.cuda()
    criterion = torch.nn.SmoothL1Loss()

    cm = ChannelManager()
    cm.add_channels(2)
    cm.cat_call("init_states")

    for i in range(2000):
        print(i)
        optim.zero_grad()
        target = Variable(torch.rand(2, 1, 512)).cuda()
        output, states = lstm(cm.cat_call("get_input"),
                              cm.cat_call("get_states", 1))
        cm.distribute_call("push_states", states, dim=1)
        loss = criterion(output, target)
        loss.backward()
        optim.step()

        if i % 3 == 0:
            cm[0].new_sequence_reset()

        if i % 5 == 0:
            cm[1].new_sequence_reset()

Example #2

Show file

File: detachnecessary.py Project: Fuchai/mayoehr

def main0():
    # lstm=LayeredLSTM()
    lstm = LSTM(input_size=47764,
                hidden_size=128,
                num_layers=8,
                batch_first=True)

    # this has no new sequence reset
    # I wonder if gradient information will increase indefinitely

    # Even so, I think detaching at the beginning of each new sequence is an arbitrary decision.
    optim = torch.optim.Adam(lstm.parameters())
    lstm.cuda()
    h0 = Variable(torch.rand(8, 2, 128)).cuda()
    c0 = Variable(torch.rand(8, 2, 128)).cuda()
    states = (h0, c0)
    savedetach = [states]

    sd(savedetach)

    for m in range(1000):
        for _ in range(10):
            print(_)
            optim.zero_grad()
            input = Variable(torch.rand(2, 1, 47764)).cuda()
            target = Variable(torch.rand(2, 1, 128)).cuda()
            output, states = lstm(input, savedetach[-1])

            savedetach.append(states)
            sd(savedetach)
            criterion = torch.nn.SmoothL1Loss()
            loss = criterion(output, target)
            loss.backward()
            optim.step()

        for i in range(len(savedetach)):
            del savedetach[0]
        h0 = Variable(torch.rand(8, 128, 128)).cuda()
        c0 = Variable(torch.rand(8, 128, 128)).cuda()
        states = (h0, c0)
        savedetach += [states]

Example #3

Show file

File: dp_layers_test.py Project: haddadi/opacus

class SimpleDPLSTMTest(unittest.TestCase):
    def setUp(self):
        self.SEQ_LENGTH = 20
        self.INPUT_DIM = 25
        self.MINIBATCH_SIZE = 30
        self.LSTM_OUT_DIM = 12
        self.NUM_LAYERS = 1
        self.bidirectional = False
        self.batch_first = False

        self.num_directions = 2 if self.bidirectional else 1
        self.h_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )
        self.c_init = torch.randn(
            self.NUM_LAYERS * self.num_directions,
            self.MINIBATCH_SIZE,
            self.LSTM_OUT_DIM,
        )

        self.original_lstm = LSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )
        self.dp_lstm = DPLSTM(
            self.INPUT_DIM,
            self.LSTM_OUT_DIM,
            batch_first=self.batch_first,
            num_layers=self.NUM_LAYERS,
            bidirectional=self.bidirectional,
        )

        self.dp_lstm.load_state_dict(self.original_lstm.state_dict())

    def _reset_seeds(self):
        torch.manual_seed(1337)
        torch.cuda.manual_seed(1337)

    def test_lstm_forward(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        hidden = (self.h_init, self.c_init)

        out, (hn, cn) = self.original_lstm(x, hidden)
        dp_out, (dp_hn, dp_cn) = self.dp_lstm(x, hidden)

        outputs_to_test = [
            (out, dp_out, "LSTM and DPLSTM output"),
            (hn, dp_hn, "LSTM and DPLSTM state `h`"),
            (cn, dp_cn, "LSTM and DPLSTM state `c`"),
        ]

        for output, dp_output, message in outputs_to_test:
            assert_allclose(
                actual=dp_output.expand_as(output),
                expected=output,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch between {message}",
            )

    def test_lstm_backward(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        criterion = nn.MSELoss()

        hidden = (self.h_init, self.c_init)

        out, (hn, cn) = self.original_lstm(x, hidden)
        y = torch.zeros_like(out)
        loss = criterion(out, y)
        loss.backward()

        dp_out, (dp_hn, dp_cn) = self.dp_lstm(x, hidden)
        dp_loss = criterion(dp_out, y)
        dp_loss.backward()

        dp_lstm_params = dict(self.dp_lstm.named_parameters())
        for param_name, param in self.original_lstm.named_parameters():
            dp_param = dp_lstm_params[param_name]
            assert_allclose(
                actual=dp_param,
                expected=param,
                atol=10e-5,
                rtol=10e-3,
                msg=f"Tensor value mismatch in the parameter '{param_name}'",
            )
            assert_allclose(
                actual=dp_param.grad,
                expected=param.grad,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the gradient of parameter '{param_name}'",
            )

    def test_lstm_param_update(self):
        x = (
            torch.randn(self.MINIBATCH_SIZE, self.SEQ_LENGTH, self.INPUT_DIM)
            if self.batch_first
            else torch.randn(self.SEQ_LENGTH, self.MINIBATCH_SIZE, self.INPUT_DIM)
        )
        criterion = nn.MSELoss()

        optimizer = torch.optim.SGD(self.original_lstm.parameters(), lr=0.5)
        dp_optimizer = torch.optim.SGD(self.dp_lstm.parameters(), lr=0.5)

        # Train original LSTM for one step
        logits, (h_n, c_n) = self.original_lstm(x)
        y = torch.zeros_like(logits)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        # Train DP LSTM for one step
        dp_logits, (dp_h_n, dp_c_n) = self.dp_lstm(x)
        dp_loss = criterion(dp_logits, y)
        dp_loss.backward()
        dp_optimizer.step()

        dp_lstm_params = dict(self.dp_lstm.named_parameters())
        for param_name, param in self.original_lstm.named_parameters():
            dp_param = dp_lstm_params[param_name]
            assert_allclose(
                actual=dp_param,
                expected=param,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the parameter '{param_name}'",
            )
            assert_allclose(
                actual=dp_param.grad,
                expected=param.grad,
                atol=10e-6,
                rtol=10e-5,
                msg=f"Tensor value mismatch in the gradient of parameter '{param_name}'",
            )