def test_from_params(self): params = Params({"type": "pretrained", "weights_file_path": self.temp_file}) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == {} name_overrides = {"a": "b", "c": "d"} params = Params({ "type": "pretrained", "weights_file_path": self.temp_file, "parameter_name_overrides": name_overrides }) initializer = Initializer.from_params(params) assert initializer.weights assert initializer.parameter_name_overrides == name_overrides
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'output_dims': 3, 'pool_sizes': 4, 'dropout': 0.0, 'num_layers': 2 }) maxout = Maxout.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
def setUp(self): super(TestTokenCharactersEncoder, self).setUp() self.vocab = Vocabulary() self.vocab.add_token_to_namespace("1", "token_characters") self.vocab.add_token_to_namespace("2", "token_characters") self.vocab.add_token_to_namespace("3", "token_characters") self.vocab.add_token_to_namespace("4", "token_characters") params = Params({ "embedding": { "embedding_dim": 2, "vocab_namespace": "token_characters" }, "encoder": { "type": "cnn", "embedding_dim": 2, "num_filters": 4, "ngram_filter_sizes": [1, 2], "output_dim": 3 } }) self.encoder = TokenCharactersEncoder.from_params(vocab=self.vocab, params=deepcopy(params)) self.embedding = Embedding.from_params(vocab=self.vocab, params=params["embedding"]) self.inner_encoder = Seq2VecEncoder.from_params(params["encoder"]) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(self.encoder) initializer(self.embedding) initializer(self.inner_encoder)
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm(lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal(pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal(pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_l2_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_forward_does_correct_computation(self): encoder = CnnEncoder(embedding_dim=2, num_filters=1, ngram_filter_sizes=(1, 2)) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(encoder) input_tensor = torch.FloatTensor([[[.7, .8], [.1, 1.5]]]) encoder_output = encoder(input_tensor, None) assert_almost_equal(encoder_output.data.numpy(), numpy.asarray([[1.6 + 1.0, 3.1 + 1.0]]), decimal=6)
def test_l1_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": -1})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def test_registry_has_builtin_initializers(self): all_initializers = { "normal": torch.nn.init.normal_, "uniform": torch.nn.init.uniform_, "orthogonal": torch.nn.init.orthogonal_, "constant": torch.nn.init.constant_, "dirac": torch.nn.init.dirac_, "xavier_normal": torch.nn.init.xavier_normal_, "xavier_uniform": torch.nn.init.xavier_uniform_, "kaiming_normal": torch.nn.init.kaiming_normal_, "kaiming_uniform": torch.nn.init.kaiming_uniform_, "sparse": torch.nn.init.sparse_, "eye": torch.nn.init.eye_, } for key, value in all_initializers.items(): assert Initializer.by_name(key)()._init_function == value
def test_registry_has_builtin_initializers(self): all_initializers = { "normal": torch.nn.init.normal, "uniform": torch.nn.init.uniform, "orthogonal": torch.nn.init.orthogonal, "constant": torch.nn.init.constant, "dirac": torch.nn.init.dirac, "xavier_normal": torch.nn.init.xavier_normal, "xavier_uniform": torch.nn.init.xavier_uniform, "kaiming_normal": torch.nn.init.kaiming_normal, "kaiming_uniform": torch.nn.init.kaiming_uniform, "sparse": torch.nn.init.sparse, "eye": torch.nn.init.eye, } for key, value in all_initializers.items(): # pylint: disable=protected-access assert Initializer.by_name(key)()._init_function == value
def test_frozen_params(self): model = torch.nn.Sequential(torch.nn.Linear(5, 10), torch.nn.Linear(10, 5)) constant_init = Initializer.from_params( Params({ "type": "constant", "val": -1 })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(model) # freeze the parameters of the first linear for name, param in model.named_parameters(): if re.search(r"0.*$", name): param.requires_grad = False value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 55 because of bias (5*10 + 5) assert value.data.numpy() == 55
def test_stacked_bidirectional_lstm_dropout_version_is_different(self, dropout_name: str): stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3) if dropout_name == 'layer_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, layer_dropout_probability=0.9) elif dropout_name == 'recurrent_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, recurrent_dropout_probability=0.9) else: raise ValueError('Do not recognise the following dropout name ' f'{dropout_name}') # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(stacked_lstm) initializer(dropped_stacked_lstm) initial_state = torch.randn([3, 5, 11]) initial_memory = torch.randn([3, 5, 11]) tensor = torch.rand([5, 7, 10]) sequence_lengths = torch.LongTensor([7, 7, 7, 7, 7]) sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(tensor, sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) stacked_output, stacked_state = stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output, dropped_state = dropped_stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) stacked_output_sequence, _ = pad_packed_sequence(stacked_output, batch_first=True) if dropout_name == 'layer_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_output_sequence.data.numpy(), stacked_output_sequence.data.numpy(), decimal=4) if dropout_name == 'recurrent_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[0].data.numpy(), stacked_state[0].data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[1].data.numpy(), stacked_state[1].data.numpy(), decimal=4)
def test_augmented_lstm_computes_same_function_as_pytorch_lstm(self): augmented_lstm = AugmentedLstm(10, 11) pytorch_lstm = LSTM(10, 11, num_layers=1, batch_first=True) # Initialize all weights to be == 1. constant_init = Initializer.from_params( Params({ "type": "constant", "val": 1. })) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(pytorch_lstm) initial_state = torch.zeros([1, 5, 11]) initial_memory = torch.zeros([1, 5, 11]) # Use bigger numbers to avoid floating point instability. sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor * 5., self.sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory)) pytorch_output, pytorch_state = pytorch_lstm( lstm_input, (initial_state, initial_memory)) pytorch_output_sequence, _ = pad_packed_sequence(pytorch_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) numpy.testing.assert_array_almost_equal( pytorch_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal( pytorch_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4) numpy.testing.assert_array_almost_equal( pytorch_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4)
def test_forward_gives_correct_output(self): params = Params({ 'input_dim': 2, 'hidden_dims': 3, 'activations': 'relu', 'num_layers': 2 }) feedforward = FeedForward.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(feedforward) input_tensor = torch.FloatTensor([[-3, 1]]) output = feedforward(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand - ReLU makes output after first hidden layer [0, 0, 0], # which then gets a bias added in the second layer to be [1, 1, 1]. assert_almost_equal(output, [[1, 1, 1]])
def test_dropout_version_is_different_to_no_dropout(self): augmented_lstm = AugmentedLstm(10, 11) dropped_augmented_lstm = AugmentedLstm(10, 11, recurrent_dropout_probability=0.9) # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(augmented_lstm) initializer(dropped_augmented_lstm) initial_state = torch.randn([1, 5, 11]) initial_memory = torch.randn([1, 5, 11]) # If we use too bigger number like in the PyTorch test the dropout has no affect sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length( self.random_tensor, self.sequence_lengths ) lstm_input = pack_padded_sequence( sorted_tensor, sorted_sequence.data.tolist(), batch_first=True ) augmented_output, augmented_state = augmented_lstm( lstm_input, (initial_state, initial_memory) ) dropped_output, dropped_state = dropped_augmented_lstm( lstm_input, (initial_state, initial_memory) ) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) augmented_output_sequence, _ = pad_packed_sequence(augmented_output, batch_first=True) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_output_sequence.data.numpy(), augmented_output_sequence.data.numpy(), decimal=4, ) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[0].data.numpy(), augmented_state[0].data.numpy(), decimal=4 ) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal( dropped_state[1].data.numpy(), augmented_state[1].data.numpy(), decimal=4 )
def test_forward_gives_correct_output(self): params = Params( {"input_dim": 2, "output_dims": 3, "pool_sizes": 4, "dropout": 0.0, "num_layers": 2} ) maxout = Maxout.from_params(params) constant_init = Initializer.from_params(Params({"type": "constant", "val": 1.0})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(maxout) input_tensor = torch.FloatTensor([[-3, 1]]) output = maxout(input_tensor).data.numpy() assert output.shape == (1, 3) # This output was checked by hand # The output of the first maxout layer is [-1, -1, -1], since the # matrix multiply gives us [-2]*12. Reshaping and maxing # produces [-2, -2, -2] and the bias increments these values. # The second layer output is [-2, -2, -2], since the matrix # matrix multiply gives us [-3]*12. Reshaping and maxing # produces [-3, -3, -3] and the bias increments these values. assert_almost_equal(output, [[-2, -2, -2]])
def test_from_params_none(self): Initializer.from_params(params=None)
def test_from_params_string(self): Initializer.from_params(params="eye")