Ejemplos de _calculate_fan_in_and_fan_out en Python, ejemplos de torch.nn.init._calculate_fan_in_and_fan_out en Python

Ejemplo n.º 1

0

Mostrar archivo

def variance_scaling_(tensor, gain=1.):
    # type: (Tensor, float) -> Tensor
    r"""
    initializer for SeparableConv in Regressor/Classifier
    reference: https://keras.io/zh/initializers/  VarianceScaling
    """
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
    std = math.sqrt(gain / float(fan_in))

    return _no_grad_normal_(tensor, 0., std)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: doresnet.py Proyecto: chenzyhust/torch_classification

    def __init__(self, in_channels, out_channels, kernel_size, D_mul=None, stride=1,
                 padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'):
        super(DOConv2d, self).__init__()

        kernel_size = _pair(kernel_size)
        stride = _pair(stride)
        padding = _pair(padding)
        dilation = _pair(dilation)

        if in_channels % groups != 0:
            raise ValueError('in_channels must be divisible by groups')
        if out_channels % groups != 0:
            raise ValueError('out_channels must be divisible by groups')
        valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'}
        if padding_mode not in valid_padding_modes:
            raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format(
                valid_padding_modes, padding_mode))
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.groups = groups
        self.padding_mode = padding_mode
        self._padding_repeated_twice = tuple(x for x in self.padding for _ in range(2))

        #################################### Initailization of D & W ###################################
        M = self.kernel_size[0]
        N = self.kernel_size[1]
        self.D_mul = M * N if D_mul is None or M * N <= 1 else D_mul
        self.W = Parameter(torch.Tensor(out_channels, in_channels // groups, self.D_mul))
        init.kaiming_uniform_(self.W, a=math.sqrt(5))

        if M * N > 1:
            self.D = Parameter(torch.Tensor(in_channels, M * N, self.D_mul))
            init_zero = np.zeros([in_channels, M * N, self.D_mul], dtype=np.float32)
            self.D.data = torch.from_numpy(init_zero)

            eye = torch.reshape(torch.eye(M * N, dtype=torch.float32), (1, M * N, M * N))
            D_diag = eye.repeat((in_channels, 1, self.D_mul // (M * N)))
            if self.D_mul % (M * N) != 0:  # the cases when D_mul > M * N
                zeros = torch.zeros([in_channels, M * N, self.D_mul % (M * N)])
                self.D_diag = Parameter(torch.cat([D_diag, zeros], dim=2), requires_grad=False)
            else:  # the case when D_mul = M * N
                self.D_diag = Parameter(D_diag, requires_grad=False)
        ##################################################################################################

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.W)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)
        else:
            self.register_parameter('bias', None)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: TD3_pixels.py Proyecto: willwhitney/dyne-td3

def ddpg_init(conv_layers, lin_layers):
    print("doing ddpg_init")
    for layer in [*conv_layers, *lin_layers[:-1]]:
        if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear):
            fan_in, _ = init._calculate_fan_in_and_fan_out(layer.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(layer.weight, -bound, bound)
            init.uniform_(layer.bias, -bound, bound)

    init.uniform_(lin_layers[-1].weight, -3e-4, 3e-4)
    init.uniform_(lin_layers[-1].bias, -3e-4, 3e-4)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: init.py Proyecto: victorcampos7/weightnorm-init

def proposed_weight_norm_g_init(wn_layer, gain=2., version=1):
    """
    Initialize WN's g to preserve the norm of the forward pass
    """
    if version == 1:
        fan_in, fan_out = _calculate_fan_in_and_fan_out(wn_layer.weight)
        wn_layer.weight_g = Parameter(torch.ones_like(wn_layer.weight_g) * math.sqrt(gain * fan_in / fan_out))
    elif version == 13:
        wn_layer.weight_g = Parameter(torch.ones_like(wn_layer.weight_g) * math.sqrt(gain))
    else:
        raise ValueError("proposed_weight_norm_g_init: version should be in {1, 13}")

Ejemplo n.º 5

0

Mostrar archivo

Archivo: BertEmbedding.py Proyecto: cwszz/Semtask

 def reset_parameters(self):
     init.kaiming_uniform_(self.weight1_1, a=math.sqrt(5))
     init.kaiming_uniform_(self.weight1_2, a=math.sqrt(5))
     init.kaiming_uniform_(self.weight2_1, a=math.sqrt(5))
     init.kaiming_uniform_(self.weight2_2, a=math.sqrt(5))
     # init.kaiming_uniform_(self.distribution_1, a=math.sqrt(5))
     # init.kaiming_uniform_(self.distribution_2, a=math.sqrt(5))
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight1_1)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: model.py Proyecto: YoungseogChung/NNKit

 def reset_parameters(self):
     # # init.kaiming_uniform_(self.weight, a=math.sqrt(0)) # kaiming init
     # if (reset_indv_bias is None) or (reset_indv_bias is False):
     #     init.xavier_uniform_(self.weight, gain=1.0)  # xavier init
     # if (reset_indv_bias is None) or ((self.bias is not None) and reset_indv_bias is True):
     #     init.constant_(self.bias, 0)
     init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 7

0

Mostrar archivo

    def reset_parameters(self):
        init.kaiming_uniform_(self.U, a=math.sqrt(5))
        init.orthogonal_(self.V)

        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.U.t())
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)
            init.uniform_(self.alpha, -bound, bound)
            init.uniform_(self.beta1, -bound, bound)
            init.uniform_(self.beta2, -bound, bound)

Ejemplo n.º 8

0

Mostrar archivo

 def __init__(self, weight, bias = None):
     super().__init__()
     self.name = 'Linear2'
     self.weight = weight
     if bias is None:
         self.bias = Parameter(torch.Tensor(weight.size(0)))
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)
     else:
         self.bias = bias

Ejemplo n.º 9

0

Mostrar archivo

def xavier_uniform_n_(w, gain=1., n=4):
    """
    Initialize for LSTM layer
    """
    with torch.no_grad():
        fan_in, fan_out = _calculate_fan_in_and_fan_out(w)
        assert fan_out % n == 0, "fan_out should be divisible by n"
        fan_out = fan_out // n
        std = gain * math.sqrt(2.0 / (fan_in + fan_out))
        a = math.sqrt(3.0) * std
        nn.init.uniform_(w, -a, a)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: modules.py Proyecto: astog/stochastic-sim

def he_init(tensor, dist='uniform'):
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
    n = fan_in

    if dist == "uniform":
        #
        tensor.uniform_(-1.0, 1.0)
        # Scale so that the final variace of this layer is 3/input_features
        tensor.mul_(np.sqrt(3.0 / n) / tensor.std())
    else:
        tensor.normal_(0.0, np.sqrt(3.0 / n))

Ejemplo n.º 11

0

Mostrar archivo

Archivo: conv.py Proyecto: EliaCereda/BayesianNeuralNetworks

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight.mean, a=math.sqrt(5))
        init.normal_(self.weight.scale, -2.0, 0.15)

        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight.mean)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias.mean, -bound, bound)
            init.normal_(self.bias.scale, -2.0, 0.15)

        self.sample()

Ejemplo n.º 12

0

Mostrar archivo

Archivo: linear.py Proyecto: jiangyuang/PruneFL

    def reset_parameters(self, **kwargs):
        if len(kwargs.keys()) == 0:
            # default init, see https://pytorch.org/docs/stable/_modules/torch/nn/modules/linear.html#Linear
            init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        else:
            init.kaiming_uniform_(self.weight, **kwargs)

        if self.bias is not None:
            # default init, see https://pytorch.org/docs/stable/_modules/torch/nn/modules/linear.html#Linear
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: conv2d.py Proyecto: curieuxjy/Self-Tuning-Networks

 def reset_parameters(self):
     """ Initialize the weights and bias.
     :return: None
     """
     init.kaiming_uniform_(self.general_weight, a=math.sqrt(5))
     init.kaiming_uniform_(self.response_weight, a=math.sqrt(5))
     if self.general_bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.general_weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.general_bias, -bound, bound)
         init.uniform_(self.response_bias, -bound, bound)
     self.hyper_bottleneck.weight.data.fill_(0)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: convrf.py Proyecto: wjmolina/Neuron-Segmentation

 def reset_parameters(self, gain=1):
     # init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     super(_ConvNdRF, self).reset_parameters()
     kaiming_uniform_mod(self.weight,
                         a=math.sqrt(5),
                         gain=gain,
                         mode='fan_in',
                         nonlinearity='leaky_relu')
     if self.bias is not None:
         fan_in, _ = _calculate_fan_in_and_fan_out(self.weight)
         bound = gain * (1 / math.sqrt(fan_in))
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 15

0

Mostrar archivo

 def reset_parameters(self) -> None:
     # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
     # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
     # https://github.com/pytorch/pytorch/issues/57109
     for b in range(self.B):
         init.kaiming_uniform_(self.weight[b],
                               a=math.sqrt(5),
                               mode='fan_out')
         if self.bias is not None:
             _, fan_out = init._calculate_fan_in_and_fan_out(self.weight[b])
             bound = 1 / math.sqrt(fan_out) if fan_out > 0 else 0
             init.uniform_(self.bias[b], -bound, bound)

Ejemplo n.º 16

0

Mostrar archivo

    def reset_parameters(self):
        # init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        # if self.bias is not None:
        #     fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
        #     bound = 1 / math.sqrt(fan_in)
        #     init.uniform_(self.bias, -bound, bound)

        kaiming_uniform_multihead(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight[0])
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: init.py Proyecto: victorcampos7/weightnorm-init

def proposed_init_wn(layer):
    """
    He init which preserves the norm in the forward pass for weight-normalized ReLU networks:
        w ~ N(0, 1/fan_in)
    """
    w, b = layer.weight, layer.bias
    fan_in, _ = _calculate_fan_in_and_fan_out(w)
    gain = 1.
    std = gain / math.sqrt(fan_in)
    with torch.no_grad():
        w.normal_(0, std)
        b.zero_()

Ejemplo n.º 18

0

Mostrar archivo

Archivo: init.py Proyecto: adslchen/ADLxMLDS-Final

def ComplexIndependentFilters(tensor, init_criterion, seed):
    if isinstance(tensor, Variable):
        ComplexIndependentFilters(tensor.data, init_criterion, seed)
        return tensor
    filter_type = None
    if len(tensor.size()) == 3:
        filter_type = 'Conv1d'
        num_rows = int(tensor.size()[0] / 2) * tensor.size()[1]
        num_cols = tensor.size()[2]
        kernel_size = (tensor.size()[2], )
    elif len(tensor.size()) == 4:
        filter_type = 'Conv2d'
        num_rows = int(tensor.size()[0] / 2) * tensor.size()[1]
        num_cols = tensor.size()[2] * tensor.size()[3]
        kernel_size = (tensor.size()[2], tensor.size()[3])
    else:
        sys.exit('The convolution type not support.')
    flat_shape = (int(num_rows), int(num_cols))
    rng = RandomState(seed)
    r = rng.uniform(size=flat_shape)
    i = rng.uniform(size=flat_shape)
    z = r + 1j * i
    u, _, v = np.linalg.svd(z)
    unitary_z = np.dot(
        u, np.dot(np.eye(int(num_rows), int(num_cols)),
                  np.conjugate(v).T))
    real_unitary = unitary_z.real
    imag_unitary = unitary_z.imag

    indep_real = np.reshape(real_unitary, (num_rows, ) + kernel_size)
    indep_imag = np.reshape(imag_unitary, (num_rows, ) + kernel_size)
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
    if init_criterion == 'glorot':
        desired_var = 1. / (fan_in + fan_out)
    elif init_criterion == 'he':
        desired_var = 1. / fan_in
    else:
        raise ValueError('invalid init critierion', init_criterion)

    multip_real = np.sqrt(desired_var / np.var(indep_real))
    multip_imag = np.sqrt(desired_var / np.var(indep_imag))
    scaled_real = multip_real * indep_real
    scaled_imag = multip_imag * indep_imag

    kernel_shape = (int(tensor.size()[0] / 2), tensor.size()[1]) + kernel_size
    weight_real = np.reshape(scaled_real, kernel_shape)
    weight_imag = np.reshape(scaled_imag, kernel_shape)

    weight = np.concatenate([weight_real, weight_imag], axis=0)
    temp_weight = torch.from_numpy(weight).float()
    tensor.copy_(temp_weight)
    del temp_weight
    return tensor

Ejemplo n.º 19

0

Mostrar archivo

    def reset_parameters(self):
        with torch.no_grad():
            init.kaiming_uniform_(self.weight_forward, a=math.sqrt(5))
            if self.backward_type == 'feedback_alignment':
                init.kaiming_uniform_(self.weight_backward.data, a=math.sqrt(5))
            elif self.backward_type == 'sign_symmetry':
                self.weight_backward = torch.sign(self.weight_forward.data)

            if self.bias is not None:
                fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight_forward)
                bound = 1 / math.sqrt(fan_in)
                init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 20

0

Mostrar archivo

	def __init__(self, state_dim, action_dim, learning_rate, epsilon, seed, batch_size, tau, nhid = 300):
		self.state_dim = state_dim
		self.action_dim = action_dim
		self.learning_rate = learning_rate 
		self.epsilon = epsilon
		self.seed = seed
		self.batch_size = batch_size
		self.tau = tau
		self.duel_enable = False
		self.duel_type = False
		self.nhid = nhid

		super(CriticNetwork, self).__init__()
	
		self.layer1 = nn.Linear(self.state_dim,24)
		n = weight_init._calculate_fan_in_and_fan_out(self.layer1.weight)[0]
		torch.manual_seed(self.seed)		
		self.layer1.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n))		
		self.layer2 = nn.Linear(24,24)
		n = weight_init._calculate_fan_in_and_fan_out(self.layer2.weight)[0]
		torch.manual_seed(self.seed)		
		self.layer2.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n))

		# RL-LSTM
		self.layerLSTM = torch.nn.LSTMCell(24,nhid,bias=True)
		self.hiddenLSTM = self.init_hidden(self.batch_size)
		self.init_lstmCellWeights()
		self.layerLinearPostLSTM = nn.Linear(nhid,24)
		n = weight_init._calculate_fan_in_and_fan_out(self.layerLinearPostLSTM.weight)[0]
		torch.manual_seed(self.seed)
		self.layerLinearPostLSTM.weight.data.uniform_(-math.sqrt(6. / n), math.sqrt(6. / n))


		self.layer3 = nn.Linear(24,action_dim)
		n = weight_init._calculate_fan_in_and_fan_out(self.layer3.weight)[0]
		torch.manual_seed(self.seed)		
		self.layer3.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n))		

		self.loss_fn = torch.nn.MSELoss(size_average=True)
		self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay = 0.01)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: linear.py Proyecto: Bertie97/pyctlib

 def reset_parameters(self) -> None:
     if get_setting("basic_torch"):
         init.kaiming_uniform_(self.weight, a=math.sqrt(5))
         if self.bias is not None:
             fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
             bound = 1 / math.sqrt(fan_in)
             init.uniform_(self.bias, -bound, bound)
     else:
         # init.kaiming_uniform_(self.weight, a=math.sqrt(5))
         init.kaiming_normal_(self.weight,
                              a=0,
                              mode='fan_in',
                              nonlinearity='relu')

Ejemplo n.º 22

0

Mostrar archivo

Archivo: ecgresnet_varinf.py Proyecto: BalintHompot/uncertainty

 def reset_parameters(self):
     """
     Resets the parameters of the layer
     """
     # ReLU activations are used, so init using Kaiming initialisation
     init.kaiming_uniform_(self.w_mu, a=math.sqrt(5))
     
     # Initialize the log variance uniformly, the exponent will be around 0
     init.uniform_(self.w_log_sigma, self.log_sigma_prior_init-0.1, self.log_sigma_prior_init)
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.w_mu)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 23

0

Mostrar archivo

 def reset_parameters(self):
     if self.is_weight_value is not None and self.is_weight_value is False:
         if self.weight.dtype is torch.half:
             dtype = self.weight.dtype
             weight = self.weight.to(torch.float)
             init.kaiming_uniform_(weight, a=math.sqrt(5))
             self.weight = Parameter(weight.to(dtype))
         else:
             init.kaiming_uniform_(self.weight, a=math.sqrt(5))
     if self.bias is not None and self.is_bias_value is False:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: init.py Proyecto: masonreznov/transformer

def kaiming_uniform_(tensor, gain=1.0):

    _scale = sqrt(3.0)
    if gain is not None and gain > 0.0 and gain != 1.0:
        _scale *= gain

    if tensor.requires_grad and (tensor.dim() > 1):
        with torch.no_grad():
            _fin, _ = _calculate_fan_in_and_fan_out(tensor)
            _bound = _scale / sqrt(float(_fin))
            tensor.uniform_(-_bound, _bound)

    return tensor

Ejemplo n.º 25

0

Mostrar archivo

Archivo: init.py Proyecto: masonreznov/transformer

def init_model_params_kaiming(modin, gain=1.0):

    _scale = sqrt(3.0)
    if gain is not None and gain > 0.0 and gain != 1.0:
        _scale *= gain
    with torch.no_grad():
        for p in modin.parameters():
            if p.requires_grad and (p.dim() > 1):
                _fin, _ = _calculate_fan_in_and_fan_out(p)
                _bound = _scale / sqrt(float(_fin))
                p.uniform_(-_bound, _bound)

    return modin

Ejemplo n.º 26

0

Mostrar archivo

Archivo: normalized_fc.py Proyecto: yhchen12101/FGP-ICL

    def reset_parameters(self):
        ######################Initial#########################
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        #init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        ######################################################
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

        if self.eta is not None:
            self.eta.data.fill_(1)  #for initializaiton of eta

Ejemplo n.º 27

0

Mostrar archivo

 def reset_parameters(self):
     # Initialize primary weights
     init.kaiming_uniform_(self.primary_weight,
                           a=math.sqrt(5),
                           nonlinearity='relu')
     # Initialize adversarial weights - much smaller
     # TODO: This needs to be exactly the epsilon-inf-ball
     # TODO: Epsilon needs to be passed as a parameter
     init.zeros_(self.adversary_weight)
     # Initialize biases
     if self.bias is not None:
         fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
         bound = 1 / math.sqrt(fan_in)
         init.uniform_(self.bias, -bound, bound)

Ejemplo n.º 28

0

Mostrar archivo

Archivo: SparseLinear.py Proyecto: mahkons/Lottery-ticket-hypothesis

    def __init__(self, in_features, out_features):
        super(SparseLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features

        self.weight = torch.empty(out_features, in_features)
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        self.weight = Parameter(self.weight.to_sparse())

        self.bias = torch.Tensor(out_features)
        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.bias, -bound, bound)
        self.bias = Parameter(self.bias)

Ejemplo n.º 29

0

Mostrar archivo

Archivo: minkowski_nn_init.py Proyecto: mayalenE/image_representation

def ME_weights_init_pytorch(m):
    classname = m.__class__.__name__
    if isinstance(m, ME.MinkowskiConvolution):
        kaiming_uniform_(m.kernel, a=math.sqrt(5))
        if m.bias is not None:
            fan_in, _ = _calculate_fan_in_and_fan_out(m.kernel)
            bound = 1 / math.sqrt(fan_in)
            uniform_(m.bias, -bound, bound)

    if isinstance(m, ME.MinkowskiLinear):
        m.linear.reset_parameters()

    if isinstance(m, ME.MinkowskiBatchNorm):
        m.bn.reset_parameters()

Ejemplo n.º 30

0

Mostrar archivo

 def init_weights(self, hidden_dim):
     """
     Ref:
         Linear: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py#L58
         RNN: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py#L120
         Embedding: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/sparse.py#L108
     """
     stdv = 1.0 / math.sqrt(hidden_dim)
     for weight in self.rnn.parameters():
         init.normal_(weight, 0, stdv)
     init.kaiming_normal_(self.linear.weight, a=math.sqrt(5))
     fan_in, _ = init._calculate_fan_in_and_fan_out(self.linear.weight)
     bound = 1 / math.sqrt(fan_in)
     init.normal_(self.linear.bias, -bound, bound)