def test_backward_lstm_cell(self, sizes, _seed): p = sizes ref_data = TestLSTMCell.generate_lstm_data(p, batch_first=False, is_backward=True) with torch.no_grad(): test_data = LSTMTestData(*clone_test_data(ref_data)) ref_rnn = nn.LSTMCell(p.input_size, p.hidden_size) TestLSTMCell.set_weights(ref_rnn, ref_data) test_rnn = LSTMCellNNCF(p.input_size, p.hidden_size) TestLSTMCell.set_weights(test_rnn, test_data) for i in range(p.seq_length): ref_result = ref_rnn(ref_data.x[i], (ref_data.h0[0], ref_data.c0[0])) test_result = test_rnn(test_data.x[i], (test_data.h0[0], test_data.c0[0])) ref_result[0].sum().backward() test_result[0].sum().backward() ref_grads = get_grads([ref_data.h0[0], ref_data.c0[0]]) ref_grads += get_grads([ ref_rnn.weight_ih, ref_rnn.weight_hh, ref_rnn.bias_ih, ref_rnn.bias_hh ]) test_grads = get_grads([ref_data.h0[0], ref_data.c0[0]]) test_grads += get_grads([ test_rnn.weight_ih, test_rnn.weight_hh, test_rnn.bias_ih, test_rnn.bias_hh ]) for (ref, test) in list(zip(test_grads, ref_grads)): torch.testing.assert_allclose(test, ref)
def test_backward_lstm(self, sizes, bidirectional, num_layers, bias, batch_first, variable_length, sorted_, is_cuda, empty_initial, dropout, _seed): num_directions = 2 if bidirectional else 1 p = sizes ref_data = TestLSTMCell.generate_lstm_data(p, num_layers, num_directions, variable_length, sorted_, batch_first, is_cuda, bias, empty_initial, True) ref_rnn = nn.LSTM(input_size=p.input_size, hidden_size=p.hidden_size, num_layers=num_layers, bidirectional=bidirectional, batch_first=batch_first, bias=bias, dropout=dropout) self.set_ref_lstm_weights(ref_data, ref_rnn, num_layers, num_directions, bias) ref_hidden = None if empty_initial else self.get_ref_lstm_hidden( ref_data) test_data = LSTMTestData(*clone_test_data(ref_data)) test_rnn = replace_lstm(copy.deepcopy(ref_rnn)) test_hidden = None if empty_initial else self.get_test_lstm_hidden( test_data) if is_cuda: ref_rnn.cuda() test_rnn.cuda() ref_output, _ = ref_rnn(ref_data.x, ref_hidden) test_output, _ = test_rnn(test_data.x, test_hidden) ref_output[0].sum().backward() test_output[0].sum().backward() ref_grads = get_grads(self.flatten_nested_lists(ref_rnn.all_weights)) test_grads = get_grads(self.flatten_nested_lists(test_rnn.all_weights)) if not empty_initial: # TODO: compare gradient of all hidden ref_grads += get_grads([ref_data.h0[0], ref_data.c0[0]]) test_grads += get_grads([test_hidden[0][0], test_hidden[1][0]]) for (ref, test) in list(zip(test_grads, ref_grads)): torch.testing.assert_allclose(test, ref, rtol=1e-1, atol=1e-1)
def test_binarize_activations_backward(self, _seed, input_size, use_cuda): ref_input = generate_input(input_size) ref_scale, ref_threshold = generate_scale_threshold(input_size) test_input, test_scale, test_threshold = get_test_data([ref_input, ref_scale, ref_threshold], use_cuda, is_backward=True) ref_value = ReferenceActivationBinarize.forward(ref_input, ref_scale, ref_threshold) ref_grads = ReferenceActivationBinarize.backward(np.ones(input_size), ref_input, ref_scale, ref_value) test_value = activation_bin_scale_threshold_op(test_input, test_scale, test_threshold) test_value.sum().backward() test_grads = get_grads([test_input, test_scale, test_threshold]) check_equal(test_grads, ref_grads, rtol=1e-3)
def test_quantize_asymmetric_backward(self, _seed, input_size, bits, use_cuda, is_weights, is_fp16, scale_mode): skip_if_half_on_cpu(is_fp16, use_cuda) level_low, level_high, levels = self.get_range_level(bits) ref_input = generate_input(input_size) if is_fp16: ref_input = ref_input.astype(np.float16) ref_input_low, ref_input_range = self.generate_range( ref_input, scale_mode, is_weights, is_fp16) test_input, test_input_low, test_input_range = get_test_data( [ref_input, ref_input_low, ref_input_range], use_cuda, is_backward=True, is_fp16=is_fp16) range_sign = np.sign(ref_input_range) ref_input_range = abs(ref_input_range) + EPS ref_input_low, ref_input_range = ReferenceQuantize.tune_range( ref_input_low, ref_input_range, levels) ref_output = ReferenceQuantize.forward(ref_input, ref_input_low, ref_input_range, levels) mock_prev_output_grads = np.ones( input_size, dtype=np.float16 if is_fp16 else np.float) ref_grads = ReferenceQuantize.backward(mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, ref_output, level_low, level_high, range_sign) test_value = asymmetric_quantize(test_input, levels, level_low, level_high, test_input_low, test_input_range, eps=EPS) test_value.sum().backward() test_grads = get_grads( [test_input, test_input_low, test_input_range]) check_outputs_for_quantization_functions(test_grads, ref_grads, is_fp16)
def test_quantize_symmetric_backward(self, _seed, is_signed, is_weights, is_fp16, input_size, bits, use_cuda, scale_mode): skip_if_half_on_cpu(is_fp16, use_cuda) ref_input = generate_input(input_size) ref_scale = self.generate_scale(ref_input, scale_mode, is_weights) level_low, level_high, levels = self.get_range_level( is_signed, bits) test_input, test_scale = get_test_data([ref_input, ref_scale], use_cuda, is_backward=True, is_fp16=is_fp16) ref_scale = abs(ref_scale) + EPS if is_fp16: ref_input = ref_input.astype(np.float16) ref_scale = ref_scale.astype(np.float16) ref_input_low = ref_scale * (level_low / level_high) ref_input_range = ref_scale - ref_input_low ref_output = ReferenceQuantize.forward(ref_input, ref_input_low, ref_input_range, levels) mock_prev_output_grads = np.ones( input_size, dtype=np.float16 if is_fp16 else np.float) ref_grads = ReferenceQuantize.backward(mock_prev_output_grads, ref_input, ref_input_low, ref_input_range, ref_output, level_low, level_high, True) del ref_grads[1] test_value = symmetric_quantize(test_input, levels, level_low, level_high, test_scale, EPS) test_value.sum().backward() test_grads = get_grads([test_input, test_scale]) check_outputs_for_quantization_functions(test_value, ref_output, is_fp16) check_outputs_for_quantization_functions(test_grads, ref_grads, is_fp16)