def test_backward_lstm_cell(self, sizes, _seed):
        p = sizes
        ref_data = TestLSTMCell.generate_lstm_data(p,
                                                   batch_first=False,
                                                   is_backward=True)
        with torch.no_grad():
            test_data = LSTMTestData(*clone_test_data(ref_data))

        ref_rnn = nn.LSTMCell(p.input_size, p.hidden_size)
        TestLSTMCell.set_weights(ref_rnn, ref_data)
        test_rnn = LSTMCellNNCF(p.input_size, p.hidden_size)
        TestLSTMCell.set_weights(test_rnn, test_data)

        for i in range(p.seq_length):
            ref_result = ref_rnn(ref_data.x[i],
                                 (ref_data.h0[0], ref_data.c0[0]))
            test_result = test_rnn(test_data.x[i],
                                   (test_data.h0[0], test_data.c0[0]))
            ref_result[0].sum().backward()
            test_result[0].sum().backward()
            ref_grads = get_grads([ref_data.h0[0], ref_data.c0[0]])
            ref_grads += get_grads([
                ref_rnn.weight_ih, ref_rnn.weight_hh, ref_rnn.bias_ih,
                ref_rnn.bias_hh
            ])
            test_grads = get_grads([ref_data.h0[0], ref_data.c0[0]])
            test_grads += get_grads([
                test_rnn.weight_ih, test_rnn.weight_hh, test_rnn.bias_ih,
                test_rnn.bias_hh
            ])
            for (ref, test) in list(zip(test_grads, ref_grads)):
                torch.testing.assert_allclose(test, ref)
    def test_backward_lstm(self, sizes, bidirectional, num_layers, bias,
                           batch_first, variable_length, sorted_, is_cuda,
                           empty_initial, dropout, _seed):

        num_directions = 2 if bidirectional else 1

        p = sizes

        ref_data = TestLSTMCell.generate_lstm_data(p, num_layers,
                                                   num_directions,
                                                   variable_length, sorted_,
                                                   batch_first, is_cuda, bias,
                                                   empty_initial, True)

        ref_rnn = nn.LSTM(input_size=p.input_size,
                          hidden_size=p.hidden_size,
                          num_layers=num_layers,
                          bidirectional=bidirectional,
                          batch_first=batch_first,
                          bias=bias,
                          dropout=dropout)
        self.set_ref_lstm_weights(ref_data, ref_rnn, num_layers,
                                  num_directions, bias)
        ref_hidden = None if empty_initial else self.get_ref_lstm_hidden(
            ref_data)

        test_data = LSTMTestData(*clone_test_data(ref_data))
        test_rnn = replace_lstm(copy.deepcopy(ref_rnn))
        test_hidden = None if empty_initial else self.get_test_lstm_hidden(
            test_data)

        if is_cuda:
            ref_rnn.cuda()
            test_rnn.cuda()

        ref_output, _ = ref_rnn(ref_data.x, ref_hidden)
        test_output, _ = test_rnn(test_data.x, test_hidden)

        ref_output[0].sum().backward()
        test_output[0].sum().backward()

        ref_grads = get_grads(self.flatten_nested_lists(ref_rnn.all_weights))
        test_grads = get_grads(self.flatten_nested_lists(test_rnn.all_weights))
        if not empty_initial:
            # TODO: compare gradient of all hidden
            ref_grads += get_grads([ref_data.h0[0], ref_data.c0[0]])
            test_grads += get_grads([test_hidden[0][0], test_hidden[1][0]])
        for (ref, test) in list(zip(test_grads, ref_grads)):
            torch.testing.assert_allclose(test, ref, rtol=1e-1, atol=1e-1)
Example #3
0
        def test_quantize_symmetric_backward(self, _seed, is_signed,
                                             is_weights, input_size, bits,
                                             use_cuda, scale_mode):
            ref_input = generate_input(input_size)

            ref_scale = self.generate_scale(ref_input, scale_mode, is_weights)
            level_low, level_high, levels = self.get_range_level(
                is_signed, is_weights, bits)
            test_input, test_scale = get_test_data([ref_input, ref_scale],
                                                   use_cuda,
                                                   is_backward=True)

            ref_scale = abs(ref_scale) + EPS
            ref_input_low = ref_scale * (level_low / level_high)
            ref_input_range = ref_scale - ref_input_low

            ref_output = ReferenceQuantizeAsymmetric.forward(
                ref_input, ref_input_low, ref_input_range, levels)
            ref_grads = ReferenceQuantizeAsymmetric.backward(
                np.ones(input_size), ref_input, ref_input_low, ref_input_range,
                ref_output, level_low, level_high, True)
            del ref_grads[1]
            test_value = symmetric_quantize(test_input, levels, level_low,
                                            level_high, test_scale, EPS)
            test_value.sum().backward()
            test_grads = get_grads([test_input, test_scale])

            check_equal(ref_output, test_value)
            check_equal(ref_grads, test_grads)
Example #4
0
        def test_quantize_asymmetric_backward(self, _seed, input_size, bits,
                                              use_cuda, is_negative_range,
                                              is_weights, scale_mode):
            level_low, level_high, levels = self.get_range_level(bits)
            ref_input = generate_input(input_size)
            ref_input_low, ref_input_range = self.generate_range(
                ref_input, is_negative_range, scale_mode, is_weights)
            test_input, test_input_low, test_input_range = get_test_data(
                [ref_input, ref_input_low, ref_input_range],
                use_cuda,
                is_backward=True)

            range_sign = np.sign(ref_input_range)
            ref_input_range = abs(ref_input_range) + EPS
            ref_input_low, ref_input_range = ReferenceQuantizeAsymmetric.tune_range(
                ref_input_low, ref_input_range, levels)
            ref_output = ReferenceQuantizeAsymmetric.forward(
                ref_input, ref_input_low, ref_input_range, levels)
            ref_grads = ReferenceQuantizeAsymmetric.backward(
                np.ones(input_size), ref_input, ref_input_low, ref_input_range,
                ref_output, level_low, level_high, range_sign)

            test_value = asymmetric_quantize(test_input,
                                             levels,
                                             level_low,
                                             level_high,
                                             test_input_low,
                                             test_input_range,
                                             eps=EPS)
            test_value.sum().backward()
            test_grads = get_grads(
                [test_input, test_input_low, test_input_range])

            check_equal(ref_grads, test_grads)
    def test_binarize_activations_backward(self, _seed, input_size, use_cuda):
        ref_input = generate_input(input_size)
        ref_scale, ref_threshold = generate_scale_threshold(input_size)

        test_input, test_scale, test_threshold = get_test_data(
            [ref_input, ref_scale, ref_threshold], use_cuda, is_backward=True)

        ref_value = ReferenceActivationBinarize.forward(
            ref_input, ref_scale, ref_threshold)
        ref_grads = ReferenceActivationBinarize.backward(
            np.ones(input_size), ref_input, ref_scale, ref_value)

        test_value = activation_bin_scale_threshold_op(test_input, test_scale,
                                                       test_threshold)
        test_value.sum().backward()
        test_grads = get_grads([test_input, test_scale, test_threshold])

        check_equal(ref_grads, test_grads, rtol=1e-3)