def reset_parameters(self): """Initialize parameters with Xavier uniform distribution.""" logger.info( '===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) for n, p in self.named_parameters(): init_with_xavier_uniform(n, p)
def reset_parameters(self): """Initialize parameters with Xavier uniform distribution.""" logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) for layer in self.pe: if isinstance(layer, CausalConv1d): for n, p in layer.named_parameters(): init_with_xavier_uniform(n, p)
def reset_parameters(self): """Initialize parameters with Xavier uniform distribution.""" logger.info( '===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) for layer in [ self.pointwise_conv1, self.pointwise_conv2, self.depthwise_conv ]: for n, p in layer.named_parameters(): init_with_xavier_uniform(n, p)
def reset_parameters(self, bias): """Initialize parameters with Xavier uniform distribution.""" logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.__class__.__name__) # NOTE: see https://github.com/pytorch/fairseq/blob/master/fairseq/modules/multihead_attention.py nn.init.xavier_uniform_(self.w_key.weight, gain=1 / math.sqrt(2)) nn.init.xavier_uniform_(self.w_query.weight, gain=1 / math.sqrt(2)) if bias: nn.init.constant_(self.w_key.bias, 0.) nn.init.constant_(self.w_query.bias, 0.) if self.conv1d is not None: logger.info('===== Initialize %s with Xavier uniform distribution =====' % self.conv1d.__class__.__name__) for n, p in self.conv1d.named_parameters(): init_with_xavier_uniform(n, p)