예제 #1
0
 def reset_parameters(self):
     """Initialize parameters with Xavier uniform distribution."""
     logger.info(
         '===== Initialize %s with Xavier uniform distribution =====' %
         self.__class__.__name__)
     for n, p in self.named_parameters():
         init_with_xavier_uniform(n, p)
예제 #2
0
 def reset_parameters(self):
     """Initialize parameters with Xavier uniform distribution."""
     logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__)
     for layer in self.pe:
         if isinstance(layer, CausalConv1d):
             for n, p in layer.named_parameters():
                 init_with_xavier_uniform(n, p)
예제 #3
0
 def reset_parameters(self):
     """Initialize parameters with Xavier uniform distribution."""
     logger.info(
         '===== Initialize %s with Xavier uniform distribution =====' %
         self.__class__.__name__)
     for layer in [
             self.pointwise_conv1, self.pointwise_conv2, self.depthwise_conv
     ]:
         for n, p in layer.named_parameters():
             init_with_xavier_uniform(n, p)
예제 #4
0
 def reset_parameters(self, bias):
     """Initialize parameters with Xavier uniform distribution."""
     logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__)
     # NOTE: see https://github.com/pytorch/fairseq/blob/master/fairseq/modules/multihead_attention.py
     nn.init.xavier_uniform_(self.w_key.weight, gain=1 / math.sqrt(2))
     nn.init.xavier_uniform_(self.w_query.weight, gain=1 / math.sqrt(2))
     if bias:
         nn.init.constant_(self.w_key.bias, 0.)
         nn.init.constant_(self.w_query.bias, 0.)
     if self.conv1d is not None:
         logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.conv1d.__class__.__name__)
         for n, p in self.conv1d.named_parameters():
             init_with_xavier_uniform(n, p)