def reset_parameters(self): weights_init(self) # change initialization because real output is not kqv_size but head_size # just coded so for convenience and scalability std = math.sqrt(2.0 / (self.kq_size + self.kq_head_size)) nn.init.normal_(self.key_transform.weight, mean=0, std=std) nn.init.normal_(self.query_transform.weight, mean=0, std=std) std = math.sqrt(2.0 / (self.value_size + self.value_head_size)) nn.init.normal_(self.value_transform.weight, mean=0, std=std)
def reset_parameters(self): weights_init(self) # set the parameter depending on the weight to give to a maxmum distance # query. i.e. exp(- (max_dist / sigma).pow(p)) = max_dist_weight # => sigma = max_dist / ((- log(max_dist_weight))**(1/p)) max_dist_sigma = self.max_dist / ( (-math.log(self.max_dist_weight))**(1 / self.p)) # inverse_softplus : log(exp(y) - 1) max_dist_param = math.log(math.exp(max_dist_sigma) - 1) self.length_scale_param = nn.Parameter(torch.tensor([max_dist_param]))
def reset_parameters(self): weights_init(self)
def reset_parameters(self): weights_init(self) self.bias = nn.Parameter(torch.tensor([0.0])) self.temperature = nn.Parameter(torch.tensor([0.0])) init_param_(self.temperature)