def reset_parameters(self):
        weights_init(self)

        # change initialization because real output is not kqv_size but head_size
        # just coded so for convenience and scalability
        std = math.sqrt(2.0 / (self.kq_size + self.kq_head_size))
        nn.init.normal_(self.key_transform.weight, mean=0, std=std)
        nn.init.normal_(self.query_transform.weight, mean=0, std=std)
        std = math.sqrt(2.0 / (self.value_size + self.value_head_size))
        nn.init.normal_(self.value_transform.weight, mean=0, std=std)
예제 #2
0
 def reset_parameters(self):
     weights_init(self)
     # set the parameter depending on the weight to give to a maxmum distance
     # query. i.e. exp(- (max_dist / sigma).pow(p)) = max_dist_weight
     # => sigma = max_dist / ((- log(max_dist_weight))**(1/p))
     max_dist_sigma = self.max_dist / (
         (-math.log(self.max_dist_weight))**(1 / self.p))
     # inverse_softplus : log(exp(y) - 1)
     max_dist_param = math.log(math.exp(max_dist_sigma) - 1)
     self.length_scale_param = nn.Parameter(torch.tensor([max_dist_param]))
 def reset_parameters(self):
     weights_init(self)
예제 #4
0
    def reset_parameters(self):
        weights_init(self)
        self.bias = nn.Parameter(torch.tensor([0.0]))

        self.temperature = nn.Parameter(torch.tensor([0.0]))
        init_param_(self.temperature)