Ejemplo n.º 1
0
 def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
     super(Linear, self).__init__()
     self.linear = nn.Linear(in_features, out_features, bias=bias)
     init.xavier_uniform_(self.linear.weight)
     if bias:
         init.zeros_(self.linear.bias)
Ejemplo n.º 2
0
 def _initialize(self):
     init.ones_(self.weights.weight.data)
     init.zeros_(self.biases.weight.data)
Ejemplo n.º 3
0
    def __init__(self,
                 inplanes,
                 planes,
                 stride=1,
                 downsample=None,
                 radix=1,
                 cardinality=1,
                 bottleneck_width=64,
                 avd=False,
                 avd_first=False,
                 dilation=1,
                 is_first=False,
                 rectified_conv=False,
                 rectify_avg=False,
                 norm_layer=None,
                 dropblock_prob=0.0,
                 last_gamma=False):
        super(Bottleneck, self).__init__()

        group_width = int(planes * (bottleneck_width / 64.)) * cardinality

        self.conv1 = nn.Conv2d(inplanes,
                               group_width,
                               kernel_size=1,
                               bias=False)
        self.bn1 = norm_layer(group_width)
        self.dropblock_prob = dropblock_prob
        self.radix = radix
        self.avd = avd and (stride > 1 or is_first)
        self.avd_first = avd_first

        if self.avd:
            self.avd_layer = nn.AvgPool2d(3, stride, padding=1)
            stride = 1

        if dropblock_prob > 0.0:
            self.dropblock1 = DropBlock2D(dropblock_prob, 3)
            if radix == 1:
                self.dropblock2 = DropBlock2D(dropblock_prob, 3)
            self.dropblock3 = DropBlock2D(dropblock_prob, 3)

        if radix >= 1:
            self.conv2 = SplAtConv2d(group_width,
                                     group_width,
                                     kernel_size=3,
                                     stride=stride,
                                     padding=dilation,
                                     dilation=dilation,
                                     groups=cardinality,
                                     bias=False,
                                     radix=radix,
                                     rectify=rectified_conv,
                                     rectify_avg=rectify_avg,
                                     norm_layer=norm_layer,
                                     dropblock_prob=dropblock_prob)
        elif rectified_conv:
            from rfconv import RFConv2d
            self.conv2 = RFConv2d(group_width,
                                  group_width,
                                  kernel_size=3,
                                  stride=stride,
                                  padding=dilation,
                                  dilation=dilation,
                                  groups=cardinality,
                                  bias=False,
                                  average_mode=rectify_avg)
            self.bn2 = norm_layer(group_width)
        else:
            self.conv2 = nn.Conv2d(group_width,
                                   group_width,
                                   kernel_size=3,
                                   stride=stride,
                                   padding=dilation,
                                   dilation=dilation,
                                   groups=cardinality,
                                   bias=False)
            self.bn2 = norm_layer(group_width)

        self.conv3 = nn.Conv2d(group_width,
                               planes * 4,
                               kernel_size=1,
                               bias=False)
        self.bn3 = norm_layer(planes * 4)

        if last_gamma:
            from torch.nn.init import zeros_
            zeros_(self.bn3.weight)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.dilation = dilation
        self.stride = stride
Ejemplo n.º 4
0
 def reset_parameters(self):
     gain = init.calculate_gain('relu')
     self.gru.reset_parameters()
     for linear in self.linears:
         init.xavier_normal_(linear.weight, gain=gain)
         init.zeros_(linear.bias)
Ejemplo n.º 5
0
 def init_weights(self):
     for m in self.modules():
         if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
             xavier_uniform_(m.weight.data)
             if m.bias is not None:
                 zeros_(m.bias)
Ejemplo n.º 6
0
 def build(self, *inputs):
     k = sum(1 for i in range(self.step) for n in range(1 + i))
     self.alphas_cell = Parameter(torch.Tensor(k, 8))
     self.alphas_net = Parameter(torch.Tensor(self.num_layer, 4, 3))
     init.zeros_(self.alphas_net)
     init.zeros_(self.alphas_cell)
Ejemplo n.º 7
0
        )
        self.standerembed = nn.Embedding(6,128)
        self.bert = BertModel.from_pretrained(model_path,config=Config)

    def forward(self, input_ids, attention_mask,token_type_ids,c):
        conditional = self.standerembed(c)
        x1 = self.bert(input_ids, attention_mask=attention_mask,token_type_ids=token_type_ids,conditional=conditional)
        x2 = x1.last_hidden_state
        logits = self.linear_relu_stack(x2[:, 0])
        return logits

model = NeuralNetwork(model_path).to(device)
print(model)
for i in model.state_dict():
    if 'LayerNorm.bias_dense' in i or 'LayerNorm.weight_dense' in i:
        init.zeros_(model.state_dict()[i])
print(torch.sum(model.state_dict()['bert.encoder.layer.11.output.LayerNorm.bias_dense.weight']))
print(torch.sum(model.state_dict()['bert.encoder.layer.11.output.LayerNorm.weight_dense.weight']))
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

batch_size = 32
maxlen = 512
training_data = CustomImageDataset(train_data,tokenizer,maxlen)
testing_data = CustomImageDataset(valid_data,tokenizer,maxlen)
train_dataloader = DataLoader(training_data, batch_size=batch_size,shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=batch_size)

def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    correct = 0
Ejemplo n.º 8
0
 def reset_parameters(self):
     """Reinitialize learnable parameters."""
     init.xavier_uniform_(self.weight)
     if self.bias is not None:
         init.zeros_(self.bias)
Ejemplo n.º 9
0
 def reset_bn_parameters(self):
     self.reset_running_stats()
     init.uniform_(self.gamma)
     init.zeros_(self.beta)
Ejemplo n.º 10
0
    def _initialize_weights(self):
        init.normal_(self.W_i.weight)
        init.normal_(self.W_f.weight)
        init.normal_(self.W_c.weight)
        init.normal_(self.W_o.weight)

        init.orthogonal_(self.U_i.weight)
        init.orthogonal_(self.U_f.weight)
        init.orthogonal_(self.U_c.weight)
        init.orthogonal_(self.U_o.weight)

        init.normal_(self.att_w.weight)
        init.normal_(self.att_u.weight)
        init.zeros_(self.att_v.weight)

        init.zeros_(self.W_i.bias)
        init.zeros_(self.W_f.bias)
        init.zeros_(self.W_c.bias)
        init.zeros_(self.W_o.bias)

        init.zeros_(self.U_i.bias)
        init.zeros_(self.U_f.bias)
        init.zeros_(self.U_c.bias)
        init.zeros_(self.U_o.bias)

        init.zeros_(self.att_w.bias)
        init.zeros_(self.att_u.bias)
Ejemplo n.º 11
0
Archivo: model.py Proyecto: yuk12/dgl
 def reset_parameters(self):
     init.xavier_uniform_(self.W)
     init.xavier_uniform_(self.U)
     init.zeros_(self.bias)
 def init(self, emb_init):
     INIT.uniform_(self.emb, -emb_init, emb_init)
     INIT.zeros_(self.state_sum)
Ejemplo n.º 13
0
 def reset_running_stats(self):
     self.running_mean.zero_()
     self.running_var.fill_(1)
     init.ones_(self.weight)
     init.zeros_(self.bias)
def weights_init_classifier(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        init.normal_(m.weight.data, 0, 0.001)
        if m.bias:
            init.zeros_(m.bias.data)
Ejemplo n.º 15
0
 def weights_init(m):
     classname = m.__class__.__name__
     if classname in ('Conv1d', 'Linear'):
         kaiming_normal_(m.weight, nonlinearity='relu')
         if m.bias is not None:
             zeros_(m.bias)
Ejemplo n.º 16
0
 def reset_parameters(self):
     init.uniform_(self.weight)
     init.zeros_(self.bias)
     self.mean.zero_()
     self.var.fill_(1)
Ejemplo n.º 17
0
 def reset_parameters(self):
     self.weight_initializer(self.linearity.weight)
     I.zeros_(self.linearity.bias)
Ejemplo n.º 18
0
 def reset_parameters(self):
     #init.xavier_uniform_(self.weight)
     if self.bias is not None:
         init.zeros_(self.bias)
     self.cached_result = None
     self.cached_num_edges = None
Ejemplo n.º 19
0
        else:
            model = nin_gc.Net()
        model.load_state_dict(checkpoint['state_dict'])
        best_acc = checkpoint['best_acc']
    else:
        print('******Initializing model******')
        if args.model_type == 0:
            model = nin.Net()
        else:
            model = nin_gc.Net()
        best_acc = 0
        for m in model.modules():
            if isinstance(m, nn.Conv2d):
                init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    init.zeros_(m.bias)
    print('***ori_model***\n', model)
    quantize.prepare(model, inplace=True, a_bits=args.a_bits, w_bits=args.w_bits)
    print('\n***quant_model***\n', model)

    if not args.cpu:
        model.cuda()
        model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))

    base_lr = float(args.lr)
    param_dict = dict(model.named_parameters())
    params = []
Ejemplo n.º 20
0
 def init(self):
     xavier_normal_(self.project.weight)
     zeros_(self.project.bias)
Ejemplo n.º 21
0
 def reset_parameters(self):
     kaiming_uniform_(self.weight_node)
     kaiming_uniform_(self.weight_edge)
     kaiming_uniform_(self.weight_triplet_att)
     kaiming_uniform_(self.weight_scale)
     zeros_(self.bias)
Ejemplo n.º 22
0
    def reset_parameters(self):
        if not self.affine:
            return

        self.weight.data.copy_(torch.eye(2, 2).unsqueeze(-1))
        init.zeros_(self.bias)
Ejemplo n.º 23
0
 def init_lstm_weights(self):
     # Xavier Normal for input weights
     orthogonal_(self.lstm1.all_weights[0][0])
     xavier_normal_(self.lstm2.all_weights[0][0])
     orthogonal_(self.lstm3.all_weights[0][0])
     xavier_normal_(self.lstm4.all_weights[0][0])
     # Orthogonal for recurrent weights
     orthogonal_(self.lstm1.all_weights[0][1])
     xavier_normal_(self.lstm2.all_weights[0][1])
     orthogonal_(self.lstm3.all_weights[0][1])
     xavier_normal_(self.lstm4.all_weights[0][1])
     # Zeros for biases
     zeros_(self.lstm1.all_weights[0][2])
     zeros_(self.lstm1.all_weights[0][3])
     zeros_(self.lstm2.all_weights[0][2])
     zeros_(self.lstm2.all_weights[0][3])
     zeros_(self.lstm3.all_weights[0][2])
     zeros_(self.lstm3.all_weights[0][3])
     zeros_(self.lstm4.all_weights[0][2])
     zeros_(self.lstm4.all_weights[0][3])
Ejemplo n.º 24
0
 def reset_parameters(self):
     self.reset_running_stats()
     if self.affine:
         init.ones_(self.weight)
         init.zeros_(self.bias)
Ejemplo n.º 25
0
 def reset_parameters(self):
     init.kaiming_uniform(self.weight)
     if self.use_bias:
         init.zeros_(self.bias)
Ejemplo n.º 26
0
 def reset_parameters(self):
     init.zeros_(self.log_sigma_z1.weight)
     init.zeros_(self.log_sigma_z1.bias)
     init.zeros_(self.log_sigma_z2.weight)
     init.zeros_(self.log_sigma_z2.bias)
Ejemplo n.º 27
0
    def reset_parameters(self):

        init.ones_(self.weight)
        init.zeros_(self.bias)
Ejemplo n.º 28
0
 def weight_init(m):
     if isinstance(m, (nn.Linear, nn.Conv2d)):
         init.kaiming_normal_(m.weight)
         init.zeros_(m.bias)
Ejemplo n.º 29
0
 def reset_parameters(self):
     if self.elementwise_affine:
         init.ones_(self.weight)
         init.zeros_(self.bias)
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride=1,
            padding=0,
            dilation=1,
            groups=1,
            bias=False,
            eps=1e-5,
            momentum=0.01,  # 考虑量化带来的抖动影响,对momentum进行调整(0.1 ——> 0.01),削弱batch统计参数占比,一定程度抑制抖动。经实验量化训练效果更好,acc提升1%左右
            a_bits=8,
            w_bits=8,
            q_type=0,
            bn=0,
            activate='leaky',
            steps=0,
            quantizer_output=False
    ):
        super().__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            bias=bias
        )
        self.bn = bn
        self.activate = activate
        self.eps = eps
        self.momentum = momentum
        self.freeze_step = int(steps * 0.9)
        self.gamma = Parameter(torch.Tensor(out_channels))
        self.beta = Parameter(torch.Tensor(out_channels))
        self.register_buffer('running_mean', torch.zeros(out_channels))
        self.register_buffer('running_var', torch.zeros(out_channels))
        self.register_buffer('batch_mean', torch.zeros(out_channels))
        self.register_buffer('batch_var', torch.zeros(out_channels))
        self.register_buffer('first_bn', torch.zeros(1))
        self.register_buffer('step', torch.zeros(1))
        self.quantizer_output = quantizer_output

        init.normal_(self.gamma, 1, 0.5)
        init.zeros_(self.beta)

        # 实例化量化器(A-layer级,W-channel级)
        if q_type == 0:
            self.activation_quantizer = SymmetricQuantizer(bits=a_bits, range_tracker=AveragedRangeTracker(q_level='L',
                                                                                                           out_channels=-1),
                                                           out_channels=-1, FPGA=True)
            self.weight_quantizer = SymmetricQuantizer(bits=w_bits,
                                                       range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1),
                                                       out_channels=-1, FPGA=True)
            self.bias_quantizer = SymmetricQuantizer(bits=w_bits,
                                                     range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1),
                                                     out_channels=-1, FPGA=True)
        else:
            self.activation_quantizer = AsymmetricQuantizer(bits=a_bits,
                                                            range_tracker=AveragedRangeTracker(q_level='L',
                                                                                               out_channels=-1),
                                                            out_channels=-1, FPGA=True, sign=False)
            self.weight_quantizer = AsymmetricQuantizer(bits=w_bits,
                                                        range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1),
                                                        out_channels=-1, FPGA=True, sign=False)
            self.bias_quantizer = AsymmetricQuantizer(bits=w_bits,
                                                      range_tracker=GlobalRangeTracker(q_level='L', out_channels=-1),
                                                      out_channels=-1, FPGA=True, sign=False)