def reset_parameters(self): # Use sensible default initializations for parameters. block_orthogonal(self.linear_for_r.weight.data, [self.hidden_size,self.hidden_size+self.input_size]) block_orthogonal(self.linear_for_f.weight.data, [self.hidden_size,self.hidden_size]) block_orthogonal(self.linear_for_new_f.weight.data, [self.hidden_size, self.hidden_size+self.input_size+self.hidden_size]) block_orthogonal(self.linear_for_new_r.weight.data, [self.hidden_size,self.hidden_size+self.input_size]) block_orthogonal(self.linear_for_b.weight.data, [self.hidden_size, self.hidden_size])
def reset_parameters(self): self.bias.data.zero_() weight_index = 0 bias_index = 0 for i in range(self.num_layers): input_size = self.input_size if i == 0 else self.hidden_size # Create a tensor of the right size and initialize it. init_tensor = self.weight.new_zeros(input_size, self.hidden_size * 6) block_orthogonal(init_tensor, [input_size, self.hidden_size]) # Copy it into the flat weight. self.weight.data[weight_index: weight_index + init_tensor.nelement()]\ .view_as(init_tensor).copy_(init_tensor) weight_index += init_tensor.nelement() # Same for the recurrent connection weight. init_tensor = self.weight.new_zeros(self.hidden_size, self.hidden_size * 5) block_orthogonal(init_tensor, [self.hidden_size, self.hidden_size]) self.weight.data[weight_index: weight_index + init_tensor.nelement()]\ .view_as(init_tensor).copy_(init_tensor) weight_index += init_tensor.nelement() # Set the forget bias to 1. self.bias.data[bias_index + self.hidden_size:bias_index + 2 * self.hidden_size].fill_(1) bias_index += 5 * self.hidden_size
def reset_parameters(self) -> None: self.bias.data.zero_() weight_index = 0 bias_index = 0 for i in range(self.num_layers): input_size = self.input_size if i == 0 else self.hidden_size # Create a tensor of the right size and initialize it. init_tensor = self.weight.new_zeros(input_size, self.hidden_size * 6) block_orthogonal(init_tensor, [input_size, self.hidden_size]) # Copy it into the flat weight. self.weight.data[weight_index: weight_index + init_tensor.nelement()]\ .view_as(init_tensor).copy_(init_tensor) weight_index += init_tensor.nelement() # Same for the recurrent connection weight. init_tensor = self.weight.new_zeros(self.hidden_size, self.hidden_size * 5) block_orthogonal(init_tensor, [self.hidden_size, self.hidden_size]) self.weight.data[weight_index: weight_index + init_tensor.nelement()]\ .view_as(init_tensor).copy_(init_tensor) weight_index += init_tensor.nelement() # Set the forget bias to 1. self.bias.data[bias_index + self.hidden_size:bias_index + 2 * self.hidden_size].fill_(1) bias_index += 5 * self.hidden_size
def reset_parameters(self): # Use sensible default initializations for parameters. block_orthogonal(self.input_linearity.weight.data, [self.cell_size, self.input_size]) block_orthogonal(self.state_linearity.weight.data, [self.cell_size, self.hidden_size]) self.state_linearity.bias.data.fill_(0.0) # Initialize forget gate biases to 1.0 as per An Empirical # Exploration of Recurrent Network Architectures, (Jozefowicz, 2015). self.state_linearity.bias.data[self.cell_size : 2 * self.cell_size].fill_(1.0)
def reset_parameters(self): # Use sensible default initializations for parameters. block_orthogonal(self.input_linearity.weight.data, [self.hidden_size, self.input_size]) block_orthogonal(self.state_linearity.weight.data, [self.hidden_size, self.hidden_size]) self.state_linearity.bias.data.fill_(0.0) # Initialize forget gate biases to 1.0 as per An Empirical # Exploration of Recurrent Network Architectures, (Jozefowicz, 2015). self.state_linearity.bias.data[self.hidden_size:2 * self.hidden_size].fill_(1.0)
def reset_parameters(self): try: from allennlp.nn.initializers import block_orthogonal block_orthogonal(self.weight_ih, [self.hidden_size, self.input_size]) block_orthogonal(self.weight_hh, [self.hidden_size, self.hidden_size]) except ImportError: self.weight_initializer(self.weight_ih) self.weight_initializer(self.weight_hh) I.zeros_(self.bias_hh) I.zeros_(self.bias_ih)
def test_block_orthogonal_can_initialize(self): tensor = torch.zeros([10, 6]) block_orthogonal(tensor, [5, 3]) tensor = tensor.data.numpy() def test_block_is_orthogonal(block) -> None: matrix_product = block.T @ block numpy.testing.assert_array_almost_equal(matrix_product, numpy.eye(matrix_product.shape[-1]), 6) test_block_is_orthogonal(tensor[:5, :3]) test_block_is_orthogonal(tensor[:5, 3:]) test_block_is_orthogonal(tensor[5:, 3:]) test_block_is_orthogonal(tensor[5:, :3])
def test_block_orthogonal_can_initialize(self): tensor = Variable(torch.zeros([10, 6])) block_orthogonal(tensor, [5, 3]) tensor = tensor.data.numpy() def test_block_is_orthogonal(block) -> None: matrix_product = block.T @ block numpy.testing.assert_array_almost_equal(matrix_product, numpy.eye(matrix_product.shape[-1]), 6) test_block_is_orthogonal(tensor[:5, :3]) test_block_is_orthogonal(tensor[:5, 3:]) test_block_is_orthogonal(tensor[5:, 3:]) test_block_is_orthogonal(tensor[5:, :3])
def test_block_orthogonal_raises_on_mismatching_dimensions(self): tensor = torch.zeros([10, 6, 8]) with pytest.raises(ConfigurationError): block_orthogonal(tensor, [7, 2, 1])
def reset_parameters(self): # Use sensible default initializations for parameters. block_orthogonal(self.h_context_linearity.weight.data, [self.hidden_size, self.hidden_size]) block_orthogonal(self.h_input_linearity.weight.data, [self.hidden_size, self.hidden_size]) block_orthogonal(self.h_global_linearity.weight.data, [self.hidden_size, self.hidden_size]) block_orthogonal(self.g_input_linearity.weight.data, [self.hidden_size, self.hidden_size]) block_orthogonal(self.g_hidden_linearity.weight.data, [self.hidden_size, self.hidden_size]) block_orthogonal(self.g_avg_linearity.weight.data, [self.hidden_size, self.hidden_size]) self.h_input_linearity.bias.data.fill_(0.0) self.g_input_linearity.bias.data.fill_(0.0)
def reset_parameters(self): # Use sensible default initializations for parameters. block_orthogonal(self.input_linearity.weight.data, [self.hidden_size, self.input_size]) block_orthogonal(self.state_linearity.weight.data, [self.hidden_size, self.hidden_size])