def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None: super(Linear, self).__init__() self.linear = nn.Linear(in_features, out_features, bias=bias) init.xavier_uniform_(self.linear.weight) if bias: init.zeros_(self.linear.bias)
def _initialize(self): init.ones_(self.weights.weight.data) init.zeros_(self.biases.weight.data)
def __init__(self, inplanes, planes, stride=1, downsample=None, radix=1, cardinality=1, bottleneck_width=64, avd=False, avd_first=False, dilation=1, is_first=False, rectified_conv=False, rectify_avg=False, norm_layer=None, dropblock_prob=0.0, last_gamma=False): super(Bottleneck, self).__init__() group_width = int(planes * (bottleneck_width / 64.)) * cardinality self.conv1 = nn.Conv2d(inplanes, group_width, kernel_size=1, bias=False) self.bn1 = norm_layer(group_width) self.dropblock_prob = dropblock_prob self.radix = radix self.avd = avd and (stride > 1 or is_first) self.avd_first = avd_first if self.avd: self.avd_layer = nn.AvgPool2d(3, stride, padding=1) stride = 1 if dropblock_prob > 0.0: self.dropblock1 = DropBlock2D(dropblock_prob, 3) if radix == 1: self.dropblock2 = DropBlock2D(dropblock_prob, 3) self.dropblock3 = DropBlock2D(dropblock_prob, 3) if radix >= 1: self.conv2 = SplAtConv2d(group_width, group_width, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=cardinality, bias=False, radix=radix, rectify=rectified_conv, rectify_avg=rectify_avg, norm_layer=norm_layer, dropblock_prob=dropblock_prob) elif rectified_conv: from rfconv import RFConv2d self.conv2 = RFConv2d(group_width, group_width, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=cardinality, bias=False, average_mode=rectify_avg) self.bn2 = norm_layer(group_width) else: self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=cardinality, bias=False) self.bn2 = norm_layer(group_width) self.conv3 = nn.Conv2d(group_width, planes * 4, kernel_size=1, bias=False) self.bn3 = norm_layer(planes * 4) if last_gamma: from torch.nn.init import zeros_ zeros_(self.bn3.weight) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.dilation = dilation self.stride = stride
def reset_parameters(self): gain = init.calculate_gain('relu') self.gru.reset_parameters() for linear in self.linears: init.xavier_normal_(linear.weight, gain=gain) init.zeros_(linear.bias)
def init_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): xavier_uniform_(m.weight.data) if m.bias is not None: zeros_(m.bias)
def build(self, *inputs): k = sum(1 for i in range(self.step) for n in range(1 + i)) self.alphas_cell = Parameter(torch.Tensor(k, 8)) self.alphas_net = Parameter(torch.Tensor(self.num_layer, 4, 3)) init.zeros_(self.alphas_net) init.zeros_(self.alphas_cell)
) self.standerembed = nn.Embedding(6,128) self.bert = BertModel.from_pretrained(model_path,config=Config) def forward(self, input_ids, attention_mask,token_type_ids,c): conditional = self.standerembed(c) x1 = self.bert(input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids,conditional=conditional) x2 = x1.last_hidden_state logits = self.linear_relu_stack(x2[:, 0]) return logits model = NeuralNetwork(model_path).to(device) print(model) for i in model.state_dict(): if 'LayerNorm.bias_dense' in i or 'LayerNorm.weight_dense' in i: init.zeros_(model.state_dict()[i]) print(torch.sum(model.state_dict()['bert.encoder.layer.11.output.LayerNorm.bias_dense.weight'])) print(torch.sum(model.state_dict()['bert.encoder.layer.11.output.LayerNorm.weight_dense.weight'])) loss_fn = nn.CrossEntropyLoss() optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5) batch_size = 32 maxlen = 512 training_data = CustomImageDataset(train_data,tokenizer,maxlen) testing_data = CustomImageDataset(valid_data,tokenizer,maxlen) train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle=True) test_dataloader = DataLoader(testing_data, batch_size=batch_size) def train(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) correct = 0
def reset_parameters(self): """Reinitialize learnable parameters.""" init.xavier_uniform_(self.weight) if self.bias is not None: init.zeros_(self.bias)
def reset_bn_parameters(self): self.reset_running_stats() init.uniform_(self.gamma) init.zeros_(self.beta)
def _initialize_weights(self): init.normal_(self.W_i.weight) init.normal_(self.W_f.weight) init.normal_(self.W_c.weight) init.normal_(self.W_o.weight) init.orthogonal_(self.U_i.weight) init.orthogonal_(self.U_f.weight) init.orthogonal_(self.U_c.weight) init.orthogonal_(self.U_o.weight) init.normal_(self.att_w.weight) init.normal_(self.att_u.weight) init.zeros_(self.att_v.weight) init.zeros_(self.W_i.bias) init.zeros_(self.W_f.bias) init.zeros_(self.W_c.bias) init.zeros_(self.W_o.bias) init.zeros_(self.U_i.bias) init.zeros_(self.U_f.bias) init.zeros_(self.U_c.bias) init.zeros_(self.U_o.bias) init.zeros_(self.att_w.bias) init.zeros_(self.att_u.bias)
def reset_parameters(self): init.xavier_uniform_(self.W) init.xavier_uniform_(self.U) init.zeros_(self.bias)
def init(self, emb_init): INIT.uniform_(self.emb, -emb_init, emb_init) INIT.zeros_(self.state_sum)
def reset_running_stats(self): self.running_mean.zero_() self.running_var.fill_(1) init.ones_(self.weight) init.zeros_(self.bias)
def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: init.normal_(m.weight.data, 0, 0.001) if m.bias: init.zeros_(m.bias.data)
def weights_init(m): classname = m.__class__.__name__ if classname in ('Conv1d', 'Linear'): kaiming_normal_(m.weight, nonlinearity='relu') if m.bias is not None: zeros_(m.bias)
def reset_parameters(self): init.uniform_(self.weight) init.zeros_(self.bias) self.mean.zero_() self.var.fill_(1)
def reset_parameters(self): self.weight_initializer(self.linearity.weight) I.zeros_(self.linearity.bias)
def reset_parameters(self): #init.xavier_uniform_(self.weight) if self.bias is not None: init.zeros_(self.bias) self.cached_result = None self.cached_num_edges = None
else: model = nin_gc.Net() model.load_state_dict(checkpoint['state_dict']) best_acc = checkpoint['best_acc'] else: print('******Initializing model******') if args.model_type == 0: model = nin.Net() else: model = nin_gc.Net() best_acc = 0 for m in model.modules(): if isinstance(m, nn.Conv2d): init.xavier_uniform_(m.weight) if m.bias is not None: init.zeros_(m.bias) elif isinstance(m, nn.Linear): init.normal_(m.weight, 0, 0.01) if m.bias is not None: init.zeros_(m.bias) print('***ori_model***\n', model) quantize.prepare(model, inplace=True, a_bits=args.a_bits, w_bits=args.w_bits) print('\n***quant_model***\n', model) if not args.cpu: model.cuda() model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) base_lr = float(args.lr) param_dict = dict(model.named_parameters()) params = []
def init(self): xavier_normal_(self.project.weight) zeros_(self.project.bias)
def reset_parameters(self): kaiming_uniform_(self.weight_node) kaiming_uniform_(self.weight_edge) kaiming_uniform_(self.weight_triplet_att) kaiming_uniform_(self.weight_scale) zeros_(self.bias)
def reset_parameters(self): if not self.affine: return self.weight.data.copy_(torch.eye(2, 2).unsqueeze(-1)) init.zeros_(self.bias)
def init_lstm_weights(self): # Xavier Normal for input weights orthogonal_(self.lstm1.all_weights[0][0]) xavier_normal_(self.lstm2.all_weights[0][0]) orthogonal_(self.lstm3.all_weights[0][0]) xavier_normal_(self.lstm4.all_weights[0][0]) # Orthogonal for recurrent weights orthogonal_(self.lstm1.all_weights[0][1]) xavier_normal_(self.lstm2.all_weights[0][1]) orthogonal_(self.lstm3.all_weights[0][1]) xavier_normal_(self.lstm4.all_weights[0][1]) # Zeros for biases zeros_(self.lstm1.all_weights[0][2]) zeros_(self.lstm1.all_weights[0][3]) zeros_(self.lstm2.all_weights[0][2]) zeros_(self.lstm2.all_weights[0][3]) zeros_(self.lstm3.all_weights[0][2]) zeros_(self.lstm3.all_weights[0][3]) zeros_(self.lstm4.all_weights[0][2]) zeros_(self.lstm4.all_weights[0][3])
def reset_parameters(self): self.reset_running_stats() if self.affine: init.ones_(self.weight) init.zeros_(self.bias)
def reset_parameters(self): init.kaiming_uniform(self.weight) if self.use_bias: init.zeros_(self.bias)
def reset_parameters(self): init.zeros_(self.log_sigma_z1.weight) init.zeros_(self.log_sigma_z1.bias) init.zeros_(self.log_sigma_z2.weight) init.zeros_(self.log_sigma_z2.bias)
def reset_parameters(self): init.ones_(self.weight) init.zeros_(self.bias)
def weight_init(m): if isinstance(m, (nn.Linear, nn.Conv2d)): init.kaiming_normal_(m.weight) init.zeros_(m.bias)
def reset_parameters(self): if self.elementwise_affine: init.ones_(self.weight) init.zeros_(self.bias)
def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=False, eps=1e-5, momentum=0.01, # 考虑量化带来的抖动影响,对momentum进行调整(0.1 ——> 0.01),削弱batch统计参数占比,一定程度抑制抖动。经实验量化训练效果更好,acc提升1%左右 a_bits=8, w_bits=8, q_type=0, bn=0, activate='leaky', steps=0, quantizer_output=False ): super().__init__( in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias ) self.bn = bn self.activate = activate self.eps = eps self.momentum = momentum self.freeze_step = int(steps * 0.9) self.gamma = Parameter(torch.Tensor(out_channels)) self.beta = Parameter(torch.Tensor(out_channels)) self.register_buffer('running_mean', torch.zeros(out_channels)) self.register_buffer('running_var', torch.zeros(out_channels)) self.register_buffer('batch_mean', torch.zeros(out_channels)) self.register_buffer('batch_var', torch.zeros(out_channels)) self.register_buffer('first_bn', torch.zeros(1)) self.register_buffer('step', torch.zeros(1)) self.quantizer_output = quantizer_output init.normal_(self.gamma, 1, 0.5) init.zeros_(self.beta) # 实例化量化器(A-layer级,W-channel级) if q_type == 0: self.activation_quantizer = SymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True) self.weight_quantizer = SymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True) self.bias_quantizer = SymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True) else: self.activation_quantizer = AsymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True, sign=False) self.weight_quantizer = AsymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True, sign=False) self.bias_quantizer = AsymmetricQuantizer(bits=w_bits, range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1), out_channels=-1, FPGA=True, sign=False)