Example #1
0
 def reset_parameters(self):
     # Use sensible default initializations for parameters.
     block_orthogonal(self.linear_for_r.weight.data, [self.hidden_size,self.hidden_size+self.input_size])
     block_orthogonal(self.linear_for_f.weight.data, [self.hidden_size,self.hidden_size])
     block_orthogonal(self.linear_for_new_f.weight.data, [self.hidden_size, self.hidden_size+self.input_size+self.hidden_size])
     block_orthogonal(self.linear_for_new_r.weight.data, [self.hidden_size,self.hidden_size+self.input_size])
     block_orthogonal(self.linear_for_b.weight.data, [self.hidden_size, self.hidden_size])
Example #2
0
    def reset_parameters(self):
        self.bias.data.zero_()
        weight_index = 0
        bias_index = 0
        for i in range(self.num_layers):
            input_size = self.input_size if i == 0 else self.hidden_size

            # Create a tensor of the right size and initialize it.
            init_tensor = self.weight.new_zeros(input_size,
                                                self.hidden_size * 6)
            block_orthogonal(init_tensor, [input_size, self.hidden_size])
            # Copy it into the flat weight.
            self.weight.data[weight_index: weight_index + init_tensor.nelement()]\
                .view_as(init_tensor).copy_(init_tensor)
            weight_index += init_tensor.nelement()

            # Same for the recurrent connection weight.
            init_tensor = self.weight.new_zeros(self.hidden_size,
                                                self.hidden_size * 5)
            block_orthogonal(init_tensor, [self.hidden_size, self.hidden_size])
            self.weight.data[weight_index: weight_index + init_tensor.nelement()]\
                .view_as(init_tensor).copy_(init_tensor)
            weight_index += init_tensor.nelement()

            # Set the forget bias to 1.
            self.bias.data[bias_index + self.hidden_size:bias_index +
                           2 * self.hidden_size].fill_(1)
            bias_index += 5 * self.hidden_size
    def reset_parameters(self) -> None:
        self.bias.data.zero_()
        weight_index = 0
        bias_index = 0
        for i in range(self.num_layers):
            input_size = self.input_size if i == 0 else self.hidden_size

            # Create a tensor of the right size and initialize it.
            init_tensor = self.weight.new_zeros(input_size, self.hidden_size * 6)
            block_orthogonal(init_tensor, [input_size, self.hidden_size])
            # Copy it into the flat weight.
            self.weight.data[weight_index: weight_index + init_tensor.nelement()]\
                .view_as(init_tensor).copy_(init_tensor)
            weight_index += init_tensor.nelement()

            # Same for the recurrent connection weight.
            init_tensor = self.weight.new_zeros(self.hidden_size, self.hidden_size * 5)
            block_orthogonal(init_tensor, [self.hidden_size, self.hidden_size])
            self.weight.data[weight_index: weight_index + init_tensor.nelement()]\
                .view_as(init_tensor).copy_(init_tensor)
            weight_index += init_tensor.nelement()

            # Set the forget bias to 1.
            self.bias.data[bias_index + self.hidden_size:bias_index + 2 * self.hidden_size].fill_(1)
            bias_index += 5 * self.hidden_size
    def reset_parameters(self):
        # Use sensible default initializations for parameters.
        block_orthogonal(self.input_linearity.weight.data, [self.cell_size, self.input_size])
        block_orthogonal(self.state_linearity.weight.data, [self.cell_size, self.hidden_size])

        self.state_linearity.bias.data.fill_(0.0)
        # Initialize forget gate biases to 1.0 as per An Empirical
        # Exploration of Recurrent Network Architectures, (Jozefowicz, 2015).
        self.state_linearity.bias.data[self.cell_size : 2 * self.cell_size].fill_(1.0)
Example #5
0
    def reset_parameters(self):
        # Use sensible default initializations for parameters.
        block_orthogonal(self.input_linearity.weight.data, [self.hidden_size, self.input_size])
        block_orthogonal(self.state_linearity.weight.data, [self.hidden_size, self.hidden_size])

        self.state_linearity.bias.data.fill_(0.0)
        # Initialize forget gate biases to 1.0 as per An Empirical
        # Exploration of Recurrent Network Architectures, (Jozefowicz, 2015).
        self.state_linearity.bias.data[self.hidden_size:2 * self.hidden_size].fill_(1.0)
Example #6
0
 def reset_parameters(self):
     try:
         from allennlp.nn.initializers import block_orthogonal
         block_orthogonal(self.weight_ih,
                          [self.hidden_size, self.input_size])
         block_orthogonal(self.weight_hh,
                          [self.hidden_size, self.hidden_size])
     except ImportError:
         self.weight_initializer(self.weight_ih)
         self.weight_initializer(self.weight_hh)
     I.zeros_(self.bias_hh)
     I.zeros_(self.bias_ih)
Example #7
0
    def test_block_orthogonal_can_initialize(self):
        tensor = torch.zeros([10, 6])
        block_orthogonal(tensor, [5, 3])
        tensor = tensor.data.numpy()

        def test_block_is_orthogonal(block) -> None:
            matrix_product = block.T @ block
            numpy.testing.assert_array_almost_equal(matrix_product,
                                                    numpy.eye(matrix_product.shape[-1]), 6)
        test_block_is_orthogonal(tensor[:5, :3])
        test_block_is_orthogonal(tensor[:5, 3:])
        test_block_is_orthogonal(tensor[5:, 3:])
        test_block_is_orthogonal(tensor[5:, :3])
    def test_block_orthogonal_can_initialize(self):
        tensor = Variable(torch.zeros([10, 6]))
        block_orthogonal(tensor, [5, 3])
        tensor = tensor.data.numpy()

        def test_block_is_orthogonal(block) -> None:
            matrix_product = block.T @ block
            numpy.testing.assert_array_almost_equal(matrix_product,
                                                    numpy.eye(matrix_product.shape[-1]), 6)
        test_block_is_orthogonal(tensor[:5, :3])
        test_block_is_orthogonal(tensor[:5, 3:])
        test_block_is_orthogonal(tensor[5:, 3:])
        test_block_is_orthogonal(tensor[5:, :3])
Example #9
0
 def test_block_orthogonal_raises_on_mismatching_dimensions(self):
     tensor = torch.zeros([10, 6, 8])
     with pytest.raises(ConfigurationError):
         block_orthogonal(tensor, [7, 2, 1])
Example #10
0
    def reset_parameters(self):
        # Use sensible default initializations for parameters.
        block_orthogonal(self.h_context_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])
        block_orthogonal(self.h_input_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])
        block_orthogonal(self.h_global_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])

        block_orthogonal(self.g_input_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])
        block_orthogonal(self.g_hidden_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])
        block_orthogonal(self.g_avg_linearity.weight.data,
                         [self.hidden_size, self.hidden_size])

        self.h_input_linearity.bias.data.fill_(0.0)
        self.g_input_linearity.bias.data.fill_(0.0)
Example #11
0
 def reset_parameters(self):
     # Use sensible default initializations for parameters.
     block_orthogonal(self.input_linearity.weight.data, [self.hidden_size, self.input_size])
     block_orthogonal(self.state_linearity.weight.data, [self.hidden_size, self.hidden_size])
 def test_block_orthogonal_raises_on_mismatching_dimensions(self):
     tensor = torch.zeros([10, 6, 8])
     with pytest.raises(ConfigurationError):
         block_orthogonal(tensor, [7, 2, 1])