def variance_scaling_(tensor, gain=1.): # type: (Tensor, float) -> Tensor r""" initializer for SeparableConv in Regressor/Classifier reference: https://keras.io/zh/initializers/ VarianceScaling """ fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) std = math.sqrt(gain / float(fan_in)) return _no_grad_normal_(tensor, 0., std)
def __init__(self, in_channels, out_channels, kernel_size, D_mul=None, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): super(DOConv2d, self).__init__() kernel_size = _pair(kernel_size) stride = _pair(stride) padding = _pair(padding) dilation = _pair(dilation) if in_channels % groups != 0: raise ValueError('in_channels must be divisible by groups') if out_channels % groups != 0: raise ValueError('out_channels must be divisible by groups') valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} if padding_mode not in valid_padding_modes: raise ValueError("padding_mode must be one of {}, but got padding_mode='{}'".format( valid_padding_modes, padding_mode)) self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = dilation self.groups = groups self.padding_mode = padding_mode self._padding_repeated_twice = tuple(x for x in self.padding for _ in range(2)) #################################### Initailization of D & W ################################### M = self.kernel_size[0] N = self.kernel_size[1] self.D_mul = M * N if D_mul is None or M * N <= 1 else D_mul self.W = Parameter(torch.Tensor(out_channels, in_channels // groups, self.D_mul)) init.kaiming_uniform_(self.W, a=math.sqrt(5)) if M * N > 1: self.D = Parameter(torch.Tensor(in_channels, M * N, self.D_mul)) init_zero = np.zeros([in_channels, M * N, self.D_mul], dtype=np.float32) self.D.data = torch.from_numpy(init_zero) eye = torch.reshape(torch.eye(M * N, dtype=torch.float32), (1, M * N, M * N)) D_diag = eye.repeat((in_channels, 1, self.D_mul // (M * N))) if self.D_mul % (M * N) != 0: # the cases when D_mul > M * N zeros = torch.zeros([in_channels, M * N, self.D_mul % (M * N)]) self.D_diag = Parameter(torch.cat([D_diag, zeros], dim=2), requires_grad=False) else: # the case when D_mul = M * N self.D_diag = Parameter(D_diag, requires_grad=False) ################################################################################################## if bias: self.bias = Parameter(torch.Tensor(out_channels)) fan_in, _ = init._calculate_fan_in_and_fan_out(self.W) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) else: self.register_parameter('bias', None)
def ddpg_init(conv_layers, lin_layers): print("doing ddpg_init") for layer in [*conv_layers, *lin_layers[:-1]]: if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.Linear): fan_in, _ = init._calculate_fan_in_and_fan_out(layer.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(layer.weight, -bound, bound) init.uniform_(layer.bias, -bound, bound) init.uniform_(lin_layers[-1].weight, -3e-4, 3e-4) init.uniform_(lin_layers[-1].bias, -3e-4, 3e-4)
def proposed_weight_norm_g_init(wn_layer, gain=2., version=1): """ Initialize WN's g to preserve the norm of the forward pass """ if version == 1: fan_in, fan_out = _calculate_fan_in_and_fan_out(wn_layer.weight) wn_layer.weight_g = Parameter(torch.ones_like(wn_layer.weight_g) * math.sqrt(gain * fan_in / fan_out)) elif version == 13: wn_layer.weight_g = Parameter(torch.ones_like(wn_layer.weight_g) * math.sqrt(gain)) else: raise ValueError("proposed_weight_norm_g_init: version should be in {1, 13}")
def reset_parameters(self): init.kaiming_uniform_(self.weight1_1, a=math.sqrt(5)) init.kaiming_uniform_(self.weight1_2, a=math.sqrt(5)) init.kaiming_uniform_(self.weight2_1, a=math.sqrt(5)) init.kaiming_uniform_(self.weight2_2, a=math.sqrt(5)) # init.kaiming_uniform_(self.distribution_1, a=math.sqrt(5)) # init.kaiming_uniform_(self.distribution_2, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight1_1) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def reset_parameters(self): # # init.kaiming_uniform_(self.weight, a=math.sqrt(0)) # kaiming init # if (reset_indv_bias is None) or (reset_indv_bias is False): # init.xavier_uniform_(self.weight, gain=1.0) # xavier init # if (reset_indv_bias is None) or ((self.bias is not None) and reset_indv_bias is True): # init.constant_(self.bias, 0) init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def reset_parameters(self): init.kaiming_uniform_(self.U, a=math.sqrt(5)) init.orthogonal_(self.V) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.U.t()) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) init.uniform_(self.alpha, -bound, bound) init.uniform_(self.beta1, -bound, bound) init.uniform_(self.beta2, -bound, bound)
def __init__(self, weight, bias = None): super().__init__() self.name = 'Linear2' self.weight = weight if bias is None: self.bias = Parameter(torch.Tensor(weight.size(0))) fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) else: self.bias = bias
def xavier_uniform_n_(w, gain=1., n=4): """ Initialize for LSTM layer """ with torch.no_grad(): fan_in, fan_out = _calculate_fan_in_and_fan_out(w) assert fan_out % n == 0, "fan_out should be divisible by n" fan_out = fan_out // n std = gain * math.sqrt(2.0 / (fan_in + fan_out)) a = math.sqrt(3.0) * std nn.init.uniform_(w, -a, a)
def he_init(tensor, dist='uniform'): fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) n = fan_in if dist == "uniform": # tensor.uniform_(-1.0, 1.0) # Scale so that the final variace of this layer is 3/input_features tensor.mul_(np.sqrt(3.0 / n) / tensor.std()) else: tensor.normal_(0.0, np.sqrt(3.0 / n))
def reset_parameters(self): init.kaiming_uniform_(self.weight.mean, a=math.sqrt(5)) init.normal_(self.weight.scale, -2.0, 0.15) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight.mean) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias.mean, -bound, bound) init.normal_(self.bias.scale, -2.0, 0.15) self.sample()
def reset_parameters(self, **kwargs): if len(kwargs.keys()) == 0: # default init, see https://pytorch.org/docs/stable/_modules/torch/nn/modules/linear.html#Linear init.kaiming_uniform_(self.weight, a=math.sqrt(5)) else: init.kaiming_uniform_(self.weight, **kwargs) if self.bias is not None: # default init, see https://pytorch.org/docs/stable/_modules/torch/nn/modules/linear.html#Linear fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def reset_parameters(self): """ Initialize the weights and bias. :return: None """ init.kaiming_uniform_(self.general_weight, a=math.sqrt(5)) init.kaiming_uniform_(self.response_weight, a=math.sqrt(5)) if self.general_bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.general_weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.general_bias, -bound, bound) init.uniform_(self.response_bias, -bound, bound) self.hyper_bottleneck.weight.data.fill_(0)
def reset_parameters(self, gain=1): # init.kaiming_uniform_(self.weight, a=math.sqrt(5)) super(_ConvNdRF, self).reset_parameters() kaiming_uniform_mod(self.weight, a=math.sqrt(5), gain=gain, mode='fan_in', nonlinearity='leaky_relu') if self.bias is not None: fan_in, _ = _calculate_fan_in_and_fan_out(self.weight) bound = gain * (1 / math.sqrt(fan_in)) init.uniform_(self.bias, -bound, bound)
def reset_parameters(self) -> None: # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see # https://github.com/pytorch/pytorch/issues/57109 for b in range(self.B): init.kaiming_uniform_(self.weight[b], a=math.sqrt(5), mode='fan_out') if self.bias is not None: _, fan_out = init._calculate_fan_in_and_fan_out(self.weight[b]) bound = 1 / math.sqrt(fan_out) if fan_out > 0 else 0 init.uniform_(self.bias[b], -bound, bound)
def reset_parameters(self): # init.kaiming_uniform_(self.weight, a=math.sqrt(5)) # if self.bias is not None: # fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) # bound = 1 / math.sqrt(fan_in) # init.uniform_(self.bias, -bound, bound) kaiming_uniform_multihead(self.weight, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight[0]) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def proposed_init_wn(layer): """ He init which preserves the norm in the forward pass for weight-normalized ReLU networks: w ~ N(0, 1/fan_in) """ w, b = layer.weight, layer.bias fan_in, _ = _calculate_fan_in_and_fan_out(w) gain = 1. std = gain / math.sqrt(fan_in) with torch.no_grad(): w.normal_(0, std) b.zero_()
def ComplexIndependentFilters(tensor, init_criterion, seed): if isinstance(tensor, Variable): ComplexIndependentFilters(tensor.data, init_criterion, seed) return tensor filter_type = None if len(tensor.size()) == 3: filter_type = 'Conv1d' num_rows = int(tensor.size()[0] / 2) * tensor.size()[1] num_cols = tensor.size()[2] kernel_size = (tensor.size()[2], ) elif len(tensor.size()) == 4: filter_type = 'Conv2d' num_rows = int(tensor.size()[0] / 2) * tensor.size()[1] num_cols = tensor.size()[2] * tensor.size()[3] kernel_size = (tensor.size()[2], tensor.size()[3]) else: sys.exit('The convolution type not support.') flat_shape = (int(num_rows), int(num_cols)) rng = RandomState(seed) r = rng.uniform(size=flat_shape) i = rng.uniform(size=flat_shape) z = r + 1j * i u, _, v = np.linalg.svd(z) unitary_z = np.dot( u, np.dot(np.eye(int(num_rows), int(num_cols)), np.conjugate(v).T)) real_unitary = unitary_z.real imag_unitary = unitary_z.imag indep_real = np.reshape(real_unitary, (num_rows, ) + kernel_size) indep_imag = np.reshape(imag_unitary, (num_rows, ) + kernel_size) fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) if init_criterion == 'glorot': desired_var = 1. / (fan_in + fan_out) elif init_criterion == 'he': desired_var = 1. / fan_in else: raise ValueError('invalid init critierion', init_criterion) multip_real = np.sqrt(desired_var / np.var(indep_real)) multip_imag = np.sqrt(desired_var / np.var(indep_imag)) scaled_real = multip_real * indep_real scaled_imag = multip_imag * indep_imag kernel_shape = (int(tensor.size()[0] / 2), tensor.size()[1]) + kernel_size weight_real = np.reshape(scaled_real, kernel_shape) weight_imag = np.reshape(scaled_imag, kernel_shape) weight = np.concatenate([weight_real, weight_imag], axis=0) temp_weight = torch.from_numpy(weight).float() tensor.copy_(temp_weight) del temp_weight return tensor
def reset_parameters(self): with torch.no_grad(): init.kaiming_uniform_(self.weight_forward, a=math.sqrt(5)) if self.backward_type == 'feedback_alignment': init.kaiming_uniform_(self.weight_backward.data, a=math.sqrt(5)) elif self.backward_type == 'sign_symmetry': self.weight_backward = torch.sign(self.weight_forward.data) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight_forward) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def __init__(self, state_dim, action_dim, learning_rate, epsilon, seed, batch_size, tau, nhid = 300): self.state_dim = state_dim self.action_dim = action_dim self.learning_rate = learning_rate self.epsilon = epsilon self.seed = seed self.batch_size = batch_size self.tau = tau self.duel_enable = False self.duel_type = False self.nhid = nhid super(CriticNetwork, self).__init__() self.layer1 = nn.Linear(self.state_dim,24) n = weight_init._calculate_fan_in_and_fan_out(self.layer1.weight)[0] torch.manual_seed(self.seed) self.layer1.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n)) self.layer2 = nn.Linear(24,24) n = weight_init._calculate_fan_in_and_fan_out(self.layer2.weight)[0] torch.manual_seed(self.seed) self.layer2.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n)) # RL-LSTM self.layerLSTM = torch.nn.LSTMCell(24,nhid,bias=True) self.hiddenLSTM = self.init_hidden(self.batch_size) self.init_lstmCellWeights() self.layerLinearPostLSTM = nn.Linear(nhid,24) n = weight_init._calculate_fan_in_and_fan_out(self.layerLinearPostLSTM.weight)[0] torch.manual_seed(self.seed) self.layerLinearPostLSTM.weight.data.uniform_(-math.sqrt(6. / n), math.sqrt(6. / n)) self.layer3 = nn.Linear(24,action_dim) n = weight_init._calculate_fan_in_and_fan_out(self.layer3.weight)[0] torch.manual_seed(self.seed) self.layer3.weight.data.uniform_(-math.sqrt(6./n), math.sqrt(6./n)) self.loss_fn = torch.nn.MSELoss(size_average=True) self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate, weight_decay = 0.01)
def reset_parameters(self) -> None: if get_setting("basic_torch"): init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) else: # init.kaiming_uniform_(self.weight, a=math.sqrt(5)) init.kaiming_normal_(self.weight, a=0, mode='fan_in', nonlinearity='relu')
def reset_parameters(self): """ Resets the parameters of the layer """ # ReLU activations are used, so init using Kaiming initialisation init.kaiming_uniform_(self.w_mu, a=math.sqrt(5)) # Initialize the log variance uniformly, the exponent will be around 0 init.uniform_(self.w_log_sigma, self.log_sigma_prior_init-0.1, self.log_sigma_prior_init) if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.w_mu) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def reset_parameters(self): if self.is_weight_value is not None and self.is_weight_value is False: if self.weight.dtype is torch.half: dtype = self.weight.dtype weight = self.weight.to(torch.float) init.kaiming_uniform_(weight, a=math.sqrt(5)) self.weight = Parameter(weight.to(dtype)) else: init.kaiming_uniform_(self.weight, a=math.sqrt(5)) if self.bias is not None and self.is_bias_value is False: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def kaiming_uniform_(tensor, gain=1.0): _scale = sqrt(3.0) if gain is not None and gain > 0.0 and gain != 1.0: _scale *= gain if tensor.requires_grad and (tensor.dim() > 1): with torch.no_grad(): _fin, _ = _calculate_fan_in_and_fan_out(tensor) _bound = _scale / sqrt(float(_fin)) tensor.uniform_(-_bound, _bound) return tensor
def init_model_params_kaiming(modin, gain=1.0): _scale = sqrt(3.0) if gain is not None and gain > 0.0 and gain != 1.0: _scale *= gain with torch.no_grad(): for p in modin.parameters(): if p.requires_grad and (p.dim() > 1): _fin, _ = _calculate_fan_in_and_fan_out(p) _bound = _scale / sqrt(float(_fin)) p.uniform_(-_bound, _bound) return modin
def reset_parameters(self): ######################Initial######################### stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) #init.kaiming_uniform_(self.weight, a=math.sqrt(5)) ###################################################### if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) if self.eta is not None: self.eta.data.fill_(1) #for initializaiton of eta
def reset_parameters(self): # Initialize primary weights init.kaiming_uniform_(self.primary_weight, a=math.sqrt(5), nonlinearity='relu') # Initialize adversarial weights - much smaller # TODO: This needs to be exactly the epsilon-inf-ball # TODO: Epsilon needs to be passed as a parameter init.zeros_(self.adversary_weight) # Initialize biases if self.bias is not None: fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound)
def __init__(self, in_features, out_features): super(SparseLinear, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = torch.empty(out_features, in_features) init.kaiming_uniform_(self.weight, a=math.sqrt(5)) self.weight = Parameter(self.weight.to_sparse()) self.bias = torch.Tensor(out_features) fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight) bound = 1 / math.sqrt(fan_in) init.uniform_(self.bias, -bound, bound) self.bias = Parameter(self.bias)
def ME_weights_init_pytorch(m): classname = m.__class__.__name__ if isinstance(m, ME.MinkowskiConvolution): kaiming_uniform_(m.kernel, a=math.sqrt(5)) if m.bias is not None: fan_in, _ = _calculate_fan_in_and_fan_out(m.kernel) bound = 1 / math.sqrt(fan_in) uniform_(m.bias, -bound, bound) if isinstance(m, ME.MinkowskiLinear): m.linear.reset_parameters() if isinstance(m, ME.MinkowskiBatchNorm): m.bn.reset_parameters()
def init_weights(self, hidden_dim): """ Ref: Linear: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py#L58 RNN: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/rnn.py#L120 Embedding: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/sparse.py#L108 """ stdv = 1.0 / math.sqrt(hidden_dim) for weight in self.rnn.parameters(): init.normal_(weight, 0, stdv) init.kaiming_normal_(self.linear.weight, a=math.sqrt(5)) fan_in, _ = init._calculate_fan_in_and_fan_out(self.linear.weight) bound = 1 / math.sqrt(fan_in) init.normal_(self.linear.bias, -bound, bound)