def _init(m): if isinstance(m, nn.Conv2d): mynn.init.MSRAFill(m.weight) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): mynn.init.XavierFill(m.weight) init.constant_(m.bias, 0)
def __init__( self, layers, activations, use_batch_norm=False, use_noisy_linear_layers=False, min_std=0.0, ) -> None: super(FullyConnectedNetwork, self).__init__() self.layers: nn.ModuleList = nn.ModuleList() self.batch_norm_ops: nn.ModuleList = nn.ModuleList() self.activations = activations self.use_batch_norm = use_batch_norm assert len(layers) >= 2, "Invalid layer schema {} for network".format(layers) for i, layer in enumerate(layers[1:]): if use_noisy_linear_layers: self.layers.append(NoisyLinear(layers[i], layer)) else: self.layers.append(nn.Linear(layers[i], layer)) if self.use_batch_norm: self.batch_norm_ops.append(nn.BatchNorm1d(layers[i])) gaussian_fill_w_gain( self.layers[i].weight, self.activations[i], layers[i], min_std ) init.constant_(self.layers[i].bias, 0)
def _init_weights(self, m): if isinstance(m, nn.Conv2d): if cfg.KRCNN.CONV_INIT == 'GaussianFill': init.normal_(m.weight, std=0.01) elif cfg.KRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(m.weight) else: ValueError('Unexpected cfg.KRCNN.CONV_INIT: {}'.format(cfg.KRCNN.CONV_INIT)) init.constant_(m.bias, 0)
def _init_weights(self, m): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): if cfg.MRCNN.CONV_INIT == 'GaussianFill': init.normal_(m.weight, std=0.001) elif cfg.MRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(m.weight) else: raise ValueError init.constant_(m.bias, 0)
def _init_weights(self): if not cfg.MRCNN.USE_FC_OUTPUT and cfg.MRCNN.CLS_SPECIFIC_MASK and \ cfg.MRCNN.CONV_INIT=='MSRAFill': # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence weight_init_func = mynn.init.MSRAFill else: weight_init_func = partial(init.normal_, std=0.001) weight_init_func(self.classify.weight) init.constant_(self.classify.bias, 0)
def __init__(self, version=1.0, num_classes=1000): super(SqueezeNet, self).__init__() if version not in [1.0, 1.1]: raise ValueError("Unsupported SqueezeNet version {version}:" "1.0 or 1.1 expected".format(version=version)) self.num_classes = num_classes if version == 1.0: self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) else: self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) # Final convolution is initialized differently form the rest final_conv = nn.Conv2d(512, self.num_classes, kernel_size=1) self.classifier = nn.Sequential( nn.Dropout(p=0.5), final_conv, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1)) ) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def _init_weights(self): if cfg.KRCNN.USE_DECONV: init.normal_(self.deconv.weight, std=0.01) init.constant_(self.deconv.bias, 0) if cfg.KRCNN.CONV_INIT == 'GaussianFill': init.normal_(self.classify.weight, std=0.001) elif cfg.KRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(self.classify.weight) else: raise ValueError(cfg.KRCNN.CONV_INIT) init.constant_(self.classify.bias, 0)
def _init_weights(self): if cfg.FPN.USE_GN: conv = self.conv_lateral[0] else: conv = self.conv_lateral if cfg.FPN.ZERO_INIT_LATERAL: init.constant_(conv.weight, 0) else: mynn.init.XavierFill(conv.weight) if conv.bias is not None: init.constant_(conv.bias, 0)
def init_func(m): classname = m.__class__.__name__ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1): if init_type == 'normal': init.normal_(m.weight.data, 0.0, gain) elif init_type == 'xavier': init.xavier_normal_(m.weight.data, gain=gain) elif init_type == 'kaiming': init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') elif init_type == 'orthogonal': init.orthogonal_(m.weight.data, gain=gain) else: raise NotImplementedError('initialization method [%s] is not implemented' % init_type) if hasattr(m, 'bias') and m.bias is not None: init.constant_(m.bias.data, 0.0) elif classname.find('BatchNorm2d') != -1: init.normal_(m.weight.data, 1.0, gain) init.constant_(m.bias.data, 0.0)
def test_l2_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 0.5))]) initializer(model) value = RegularizerApplicator([("", L2Regularizer(1.0))])(model) assert value.data.numpy() == 28.75
def test_regularizer_applicator_respects_regex_matching(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, 1.))]) initializer(model) value = RegularizerApplicator([("weight", L2Regularizer(0.5)), ("bias", L1Regularizer(1.0))])(model) assert value.data.numpy() == 65.0
def __init__(self, layers, activations, use_batch_norm=False, action_dim=0) -> None: """ Dueling Q-Network Architecture: https://arxiv.org/abs/1511.06581 :param layers: List of layer dimensions :param activations: List of layer activations :param use_batch_norm: bool indicating whether to apply batch normalization :param action_dim: if !=0 use parametric dueling DQN, else standard dueling DQN """ super(DuelingQNetwork, self).__init__() self.layers: nn.ModuleList = nn.ModuleList() self.batch_norm_ops: nn.ModuleList = nn.ModuleList() self.activations = activations self.use_batch_norm = use_batch_norm assert len(layers) >= 3, "Invalid layer schema {} for network".format(layers) assert ( len(layers) == len(activations) + 1 ), "Invalid activation schema {} for network".format(activations) assert ( layers[-2] % 2 == 0 ), """Last shared layer in dueling architecture should be divisible by 2.""" for i, layer in enumerate(layers[1:-1]): self.layers.append(nn.Linear(layers[i], layer)) self.batch_norm_ops.append(nn.BatchNorm1d(layers[i])) gaussian_fill_w_gain(self.layers[i].weight, self.activations[i], layers[i]) init.constant_(self.layers[i].bias, 0) self.parametric_action = action_dim > 0 # Split last layer into a value & advantage stream self.advantage = nn.Sequential( nn.Linear(int(layers[-2] + action_dim), int(layers[-2] / 2)), nn.ReLU(), nn.Linear(int(layers[-2] / 2), layers[-1]), ) self.value = nn.Sequential( nn.Linear(int(layers[-2]), int(layers[-2] / 2)), nn.ReLU(), nn.Linear(int(layers[-2] / 2), 1), ) self._name = "unnamed"
def test_l1_regularization(self): model = torch.nn.Sequential( torch.nn.Linear(5, 10), torch.nn.Linear(10, 5) ) initializer = InitializerApplicator([(".*", lambda tensor: constant_(tensor, -1))]) initializer(model) value = RegularizerApplicator([("", L1Regularizer(1.0))])(model) # 115 because of biases. assert value.data.numpy() == 115.0
def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.constant_(m.bias, 0)
def _init_weights(self): init.normal_(self.FPN_RPN_conv.weight, std=0.01) init.constant_(self.FPN_RPN_conv.bias, 0) init.normal_(self.FPN_RPN_cls_score.weight, std=0.01) init.constant_(self.FPN_RPN_cls_score.bias, 0) init.normal_(self.FPN_RPN_bbox_pred.weight, std=0.01) init.constant_(self.FPN_RPN_bbox_pred.bias, 0)
def __init__(self, input_num, output_num): super(CrossPoolingDir, self).__init__() self.input_num = input_num self.output_num = output_num self.featK = nn.Linear(self.input_num, self.output_num) self.featK_bn = nn.BatchNorm1d(self.output_num) # Softmax self.softmax = nn.Softmax() init.kaiming_uniform_(self.featK.weight, mode='fan_out') init.constant_(self.featK.bias, 0) init.constant_(self.featK_bn.weight, 1) init.constant_(self.featK_bn.bias, 0)
def __init__(self, feat_num, class_num, drop=0): super(Classifier, self).__init__() self.feat_num = feat_num self.class_num = class_num self.drop = drop # BN layer self.classifierBN = nn.BatchNorm1d(self.feat_num) # feat classifeir self.classifierlinear = nn.Linear(self.feat_num, self.class_num) # dropout_layer self.drop = drop if self.drop > 0: self.droplayer = nn.Dropout(drop) init.constant_(self.classifierBN.weight, 1) init.constant_(self.classifierBN.bias, 0) init.normal_(self.classifierlinear.weight, std=0.001) init.constant_(self.classifierlinear.bias, 0)
def __init__(self, depth, pretrained=True, cut_at_pooling=False, num_features=0, dropout=0): super(ResNet, self).__init__() self.depth = depth self.pretrained = pretrained self.cut_at_pooling = cut_at_pooling # Construct base (pretrain) resnet if depth not in ResNet.__factory: raise KeyError("Unsupported depth:", depth) conv0 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3, bias=False) init.kaiming_uniform_(conv0.weight, mode='fan_out') self.conv0 = conv0 self.base = ResNet.__factory[depth](pretrained=pretrained) if not self.cut_at_pooling: self.num_features = num_features self.dropout = dropout self.has_embedding = num_features > 0 out_planes = self.base.fc.in_features # Append new layers if self.has_embedding: self.feat = nn.Linear(out_planes, self.num_features) self.feat_bn = nn.BatchNorm1d(self.num_features) init.kaiming_uniform_(self.feat.weight, mode='fan_out') init.constant_(self.feat.bias, 0) init.constant_(self.feat_bn.weight, 1) init.constant_(self.feat_bn.bias, 0) else: self.num_features = out_planes if self.dropout > 0: self.drop = nn.Dropout(self.dropout) if not self.pretrained: self.reset_params()
def weight_init(m): # https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5 if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.kaiming_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight.data) init.normal_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
def _init_weights(self): mynn.init.XavierFill(self.fc1.weight) init.constant_(self.fc1.bias, 0) mynn.init.XavierFill(self.fc2.weight) init.constant_(self.fc2.bias, 0)
def reset_parameters(self): self.reset_running_stats() if self.affine: init.constant_(self.weight[:, :2], 1.4142135623730951) init.zeros_(self.weight[:, 2]) init.zeros_(self.bias)
def reset_parameters(self): init.xavier_uniform_(self.weight, gain=math.sqrt(2)) init.constant_(self.bias, 0)
def init_weights(m): ''' Usage: model = Model() model.apply(weight_init) ''' if isinstance(m, nn.Conv1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.Conv3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose1d): init.normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.ConvTranspose3d): init.xavier_normal_(m.weight.data) if m.bias is not None: init.normal_(m.bias.data) elif isinstance(m, nn.BatchNorm1d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm2d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.BatchNorm3d): init.normal_(m.weight.data, mean=1, std=0.02) init.constant_(m.bias.data, 0) elif isinstance(m, nn.Linear): init.xavier_normal_(m.weight.data) init.normal_(m.bias.data) elif isinstance(m, nn.LSTM): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.LSTMCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRU): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data)
lin_nn_model = nn.Sequential( nn.Linear(d, d_1, bias=False), nn.Linear(d_1, d_2, bias=False) ) ReLU_model = nn.Sequential( nn.Linear(d, d_1), nn.ReLU(), nn.Linear(d_1, d_2) ) loss = nn.MSELoss() iter = lin_nn_model.parameters() w1 = next(iter) w2 = next(iter) init.uniform_(w1, a=0, b=0.01) init.constant_(w2, w1.norm() / 10) # This is definitely true! Compute the gradient! learning_rate = 0.01 time_range = range(2000) for i in range(1): x = data[i, :, :-1] y = data[i, :, -1].unsqueeze(1) r1, r2, r3 = [], [], [] for t in time_range: y_lin_pred = lin_model(x) lin_risk = loss(y_lin_pred, y) y_lin_nn_pred = lin_nn_model(x) lin_nn_risk = loss(y_lin_nn_pred, y) y_ReLU_pred = ReLU_model(x) ReLU_risk = loss(y_ReLU_pred, y) r1.append(lin_risk.item())
def weight_init(m): if isinstance(m, nn.Linear) or isinstance(m, nn.Conv3d): init.xavier_uniform_(m.weight.data) init.constant_(m.bias.data, 0)
def _init_weights(self): init.normal_(self.cls_score.weight, std=0.01) init.constant_(self.cls_score.bias, 0) init.normal_(self.bbox_pred.weight, std=0.001) init.constant_(self.bbox_pred.bias, 0)
def __init__(self, version='Squeezenet_1_0', num_bins=66): super(Hopeless_Squeezenet, self).__init__() self.num_bins = num_bins if version == 'Squeezenet_1_0': self.features = nn.Sequential( nn.Conv2d(3, 96, kernel_size=7, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(96, 16, 64, 64), Fire(128, 16, 64, 64), Fire(128, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 32, 128, 128), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(512, 64, 256, 256), ) elif version == 'Squeezenet_1_1': self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=3, stride=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(64, 16, 64, 64), Fire(128, 16, 64, 64), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(128, 32, 128, 128), Fire(256, 32, 128, 128), nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True), Fire(256, 48, 192, 192), Fire(384, 48, 192, 192), Fire(384, 64, 256, 256), Fire(512, 64, 256, 256), ) else: # FIXME: Is this needed? SqueezeNet should only be called from the # FIXME: squeezenet1_x() functions # FIXME: This checking is not done for the other models raise ValueError("Unsupported SqueezeNet version {version}:" "1_0 or 1_1 expected".format(version=version)) # Final convolution is initialized differently from the rest final_conv_yaw = nn.Conv2d(512, self.num_bins, kernel_size=1) self.classifier_yaw = nn.Sequential(nn.Dropout(p=0.5), final_conv_yaw, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1))) final_conv_pitch = nn.Conv2d(512, self.num_bins, kernel_size=1) self.classifier_pitch = nn.Sequential(nn.Dropout(p=0.5), final_conv_pitch, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1))) final_conv_roll = nn.Conv2d(512, self.num_bins, kernel_size=1) self.classifier_roll = nn.Sequential(nn.Dropout(p=0.5), final_conv_roll, nn.ReLU(inplace=True), nn.AdaptiveAvgPool2d((1, 1))) for m in self.modules(): if isinstance(m, nn.Conv2d): if m is final_conv_pitch or \ m is final_conv_yaw or \ m is final_conv_roll: init.normal_(m.weight, mean=0.0, std=0.01) else: init.kaiming_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def init_func(m): if isinstance(m, nn.Conv2d): mynn.init.XavierFill(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
def train(opt): """ dataset preparation """ opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle=True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) print('-' * 80) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.saved_model != '': print(f'loading pretrained model from {opt.saved_model}') if opt.FT: model.load_state_dict(torch.load(opt.saved_model), strict=False) else: model.load_state_dict(torch.load(opt.saved_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.saved_model != '': start_iter = int(opt.saved_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') start_time = time.time() best_accuracy = -1 best_norm_ED = 1e+6 i = start_iter while(True): # train part image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text).log_softmax(2) preds_size = torch.IntTensor([preds.size(1)] * batch_size) preds = preds.permute(1, 0, 2) # to use CTCLoss format # (ctc_a) To avoid ctc_loss issue, disabled cudnn for the computation of the ctc_loss # https://github.com/jpuigcerver/PyLaia/issues/16 torch.backends.cudnn.enabled = False cost = criterion(preds, text.to(device), preds_size.to(device), length.to(device)) torch.backends.cudnn.enabled = True # # (ctc_b) To reproduce our pretrained model / paper, use our previous code (below code) instead of (ctc_a). # # With PyTorch 1.2.0, the below code occurs NAN, so you may use PyTorch 1.1.0. # # Thus, the result of CTCLoss is different in PyTorch 1.1.0 and PyTorch 1.2.0. # # See https://github.com/clovaai/deep-text-recognition-benchmark/issues/56#issuecomment-526490707 # cost = criterion(preds, text, preds_size, length) else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if i % opt.valInterval == 0: elapsed_time = time.time() - start_time print(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}') # for log with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log: log.write(f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n') loss_avg.reset() model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() for pred, gt in zip(preds[:5], labels[:5]): if 'Attn' in opt.Prediction: pred = pred[:pred.find('[s]')] gt = gt[:gt.find('[s]')] print(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}') log.write(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}\n') valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}' valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}' print(valid_log) log.write(valid_log + '\n') # keep best accuracy model if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy_iter_{i+1}.pth') if current_norm_ED < best_norm_ED: best_norm_ED = current_norm_ED torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED_iter_{i+1}.pth') best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}' print(best_model_log) log.write(best_model_log + '\n') # save model per 250 iter. if (i + 1) % 1000 == 0: torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth') if i == opt.num_iter: print('end the training') sys.exit() i = i + 1
def weights_init_classifier(m): classname = m.__class__.__name__ if classname.find('Linear') != -1: init.normal_(m.weight.data, std=0.001) init.constant_(m.bias.data, 0.0)
def weights_init(m): if isinstance(m, nn.Linear): if m.weight is not None: init.xavier_uniform_(m.weight) if m.bias is not None: init.constant_(m.bias, 0.0)
def _init_weights(self): """ initialize layers before ReLU activation with kaiming initialization """ if cfg.GAN.MODEL.KAIMING_INIT: if cfg.DEBUG: print("\tInit Adversarial with KAIMING") init.kaiming_uniform_(self.adversarial[0].weight, a=0, mode='fan_in', nonlinearity='relu') init.constant_(self.adversarial[0].bias, 0.0) init.kaiming_uniform_(self.adversarial[2].weight, a=0, mode='fan_in', nonlinearity='relu') init.constant_(self.adversarial[2].bias, 0.0) init.kaiming_uniform_(self.adversarial[4].weight, a=0, mode='fan_in', nonlinearity='relu') init.constant_(self.adversarial[4].bias, 0.0) else: if cfg.DEBUG: print("\tInit ResidualBlock with XAVIER") mynn.init.XavierFill(self.adversarial[0].weight) init.constant_(self.adversarial[0].bias, 0.0) mynn.init.XavierFill(self.adversarial[2].weight) init.constant_(self.adversarial[2].bias, 0.0) mynn.init.XavierFill(self.adversarial[4].weight) init.constant_(self.adversarial[4].bias, 0.0)
def _init(m): if isinstance(m, nn.Conv2d): mynn.init.MSRAFill(m.weight) elif isinstance(m, nn.Linear): mynn.init.XavierFill(m.weight) init.constant_(m.bias, 0)
def __init__(self, input_num, output_num): super(SelfPoolingDir, self).__init__() self.input_num = input_num self.output_num = output_num # todo: LSTM self.lstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0) self.bilstm = nn.LSTM(input_size=self.input_num, hidden_size=self.output_num, num_layers=1, batch_first=True, dropout=0, bidirectional=True) self.lstm_bn = nn.BatchNorm1d(self.output_num) ## Linear K self.featK = nn.Linear(self.input_num, self.output_num) self.featK_bn = nn.BatchNorm1d(self.output_num) ## Linear_Q self.featQ = nn.Linear(self.input_num, self.output_num) self.featQ_bn = nn.BatchNorm1d(self.output_num) ## Softmax self.softmax = nn.Softmax(dim=-1) init.kaiming_uniform_(self.featK.weight, mode='fan_out') init.constant_(self.featK.bias, 0) init.constant_(self.featK_bn.weight, 1) init.constant_(self.featK_bn.bias, 0) init.kaiming_uniform_(self.featQ.weight, mode='fan_out') init.constant_(self.featQ.bias, 0) init.constant_(self.featQ_bn.weight, 1) init.constant_(self.featQ_bn.bias, 0) init.constant_(self.lstm_bn.weight, 1) init.constant_(self.lstm_bn.bias, 0)
def _init_weights(self): if cfg.MRCNN.CONV_INIT == 'GaussianFill': init.normal_(self.upconv5.weight, std=0.001) elif cfg.MRCNN.CONV_INIT == 'MSRAFill': mynn.init.MSRAFill(self.upconv5.weight) init.constant_(self.upconv5.bias, 0)
def train(opt): """ dataset preparation """ if not opt.data_filtering_off: print( 'Filtering the images containing characters which are not in opt.character' ) print( 'Filtering the images whose label is longer than opt.batch_max_length' ) # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130 opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) log = open( f'/content/drive/MyDrive/saved_models/{opt.exp_name}/log_dataset.txt', 'a') AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset, valid_dataset_log = hierarchical_dataset( root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle= True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) log.write(valid_dataset_log) print('-' * 80) log.write('-' * 80 + '\n') log.close() """ model configuration """ if 'CTC' in opt.Prediction: if opt.baiduCTC: converter = CTCLabelConverterForBaiduWarpctc(opt.character) else: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.saved_model != '': print(f'loading pretrained model from {opt.saved_model}') if opt.FT: model.load_state_dict(torch.load(opt.saved_model), strict=False) else: model.load_state_dict(torch.load(opt.saved_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: if opt.baiduCTC: # need to install warpctc. see our guideline. #from warpctc_pytorch import CTCLoss #criterion = CTCLoss() print('Hello') else: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to( device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'/content/drive/MyDrive/saved_models/{opt.exp_name}/opt.txt', 'a', encoding='utf-8') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.saved_model != '': try: start_iter = int(opt.saved_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') except: pass start_time = time.time() best_accuracy = -1 best_norm_ED = -1 iteration = start_iter while (True): # train part print(iteration) image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text) preds_size = torch.IntTensor([preds.size(1)] * batch_size) if opt.baiduCTC: preds = preds.permute(1, 0, 2) # to use CTCLoss format cost = criterion(preds, text, preds_size, length) / batch_size else: preds = preds.log_softmax(2).permute(1, 0, 2) cost = criterion(preds, text, preds_size, length) else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) print(cost) # validation part if ( iteration + 1 ) % opt.valInterval == 0 or iteration == 0: # To see training progress, we also conduct validation when 'iteration == 0' elapsed_time = time.time() - start_time # for log with open( f'/content/drive/MyDrive/saved_models/{opt.exp_name}/log_train.txt', 'a', encoding='utf-8') as log: model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() # training loss and validation loss loss_log = f'[{iteration + 1}/{opt.num_iter}] Train loss: {loss_avg.val():0.5f}, Valid loss: {valid_loss:0.5f}, Elapsed_time: {elapsed_time:0.5f}' loss_avg.reset() current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"Current_norm_ED":17s}: {current_norm_ED:0.2f}' # keep best accuracy model (on valid dataset) if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save( model.state_dict(), f'/content/drive/MyDrive/saved_models//{opt.exp_name}/best_accuracy.pth' ) if current_norm_ED > best_norm_ED: best_norm_ED = current_norm_ED torch.save( model.state_dict(), f'/content/drive/MyDrive/saved_models//{opt.exp_name}/best_norm_ED.pth' ) best_model_log = f'{"Best_accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}' loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}' print(loss_model_log) log.write(loss_model_log + '\n') # show some predicted results dashed_line = '-' * 80 head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F' predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n' for gt, pred, confidence in zip(labels[:5], preds[:5], confidence_score[:5]): if 'Attn' in opt.Prediction: gt = gt[:gt.find('[s]')] pred = pred[:pred.find('[s]')] predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n' predicted_result_log += f'{dashed_line}' print(predicted_result_log) log.write(predicted_result_log + '\n') # save model per 1e+5 iter. if (iteration + 1) % 1e+5 == 0: torch.save( model.state_dict(), f'/content/drive/MyDrive/saved_models//{opt.exp_name}/iter_{iteration + 1}.pth' ) if (iteration + 1) == opt.num_iter: print('end the training') sys.exit() iteration += 1
def reset_parameters(self): init.constant_(self.weight, self.gamma)
def __init__(self, num_classes, block, layers, n_head=1, attention_type='concat', shot_mode='mean', num_way=2, num_shot=5, pos_encoding=True, pretrained=False): super(aaa_retinanet, self).__init__() self.model_path = 'data/pretrained_model/resnet50_caffe.pth' self.pretrained = pretrained self.inplanes = 64 self.n_head = n_head self.attention_type = attention_type self.shot_mode = shot_mode self.num_shot = num_shot self.pos_encoding = pos_encoding self.support_im_size = 320 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if self.pretrained == True: print("Loading pretrained weights from %s" % (self.model_path)) state_dict = torch.load(self.model_path) self.load_state_dict({ k: v for k, v in state_dict.items() if k in self.state_dict() }) def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.apply(set_bn_fix) if block == BasicBlock: fpn_sizes = [ self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels, self.layer4[layers[3] - 1].conv2.out_channels ] elif block == Bottleneck: fpn_sizes = [ self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels, self.layer4[layers[3] - 1].conv3.out_channels ] else: raise ValueError(f"Block type {block} not understood") attention_output_dim = 256 if self.attention_type == 'product' else 512 if self.attention_type == 'product': self.fpn = PyramidFeatures( fpn_sizes[0], fpn_sizes[1], fpn_sizes[2], feature_size=attention_output_dim) # [512, 1024, 2048] else: self.fpn = PyramidFeatures(fpn_sizes[0] * 2, fpn_sizes[1] * 2, fpn_sizes[2] * 2, feature_size=attention_output_dim) self.regressionModel = RegressionModel(attention_output_dim) self.classificationModel = ClassificationModel(attention_output_dim, num_classes=num_classes) self.anchors = Anchors([4, 5, 6, 7]) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() self.focalLoss = losses.FocalLoss() # weights initialization for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() prior = 0.01 self.classificationModel.output.weight.data.fill_(0) self.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) self.regressionModel.output.weight.data.fill_(0) self.regressionModel.output.bias.data.fill_(0) self.freeze_bn() self.resnet_base = nn.Sequential(self.conv1, self.bn1, self.relu, self.maxpool) # querys, keys Q_list = [] K_list = [] self.d_k = 64 self.fpn_dims = [512, 1024, 2048] for fpn_dim in self.fpn_dims: Q_weight = nn.Linear(fpn_dim, self.d_k) K_weight = nn.Linear(fpn_dim, self.d_k) init.normal_(Q_weight.weight, std=0.01) init.constant_(Q_weight.bias, 0) init.normal_(K_weight.weight, std=0.01) init.constant_(K_weight.bias, 0) Q_list.append(Q_weight) K_list.append(K_weight) self.pyramid_Q_layers = nn.ModuleList(Q_list) self.pyramid_K_layers = nn.ModuleList(K_list) if self.pos_encoding: pel_3 = PositionalEncoding(d_model=512, max_len=40 * 40) pel_4 = PositionalEncoding(d_model=1024, max_len=20 * 20) pel_5 = PositionalEncoding(d_model=2048, max_len=10 * 10) self.pos_encoding_layers = nn.ModuleList([pel_3, pel_4, pel_5])
def __init__( self, input_size, hidden_size, kernel_size, groups, reset_gate=True, min_reset=0.0, update_gate=True, min_update=0.0, out_bias=True, out_act=None, ): super().__init__() padding = kernel_size // 2 self.padding = padding self.input_size = input_size self.hidden_size = hidden_size if reset_gate: self.min_reset = min_reset self.reset_gate = nn.Conv2d( input_size + hidden_size, hidden_size, kernel_size, groups=groups, padding=padding, ) else: self.reset_gate = None if update_gate: self.min_update = min_update self.update_gate = nn.Conv2d( input_size + hidden_size, hidden_size, kernel_size, groups=groups, padding=padding, ) else: self.update_gate = None # self.out_gate = nn.Conv2d( # input_size + hidden_size, # hidden_size, # kernel_size, # groups=groups, # padding=padding, # bias=out_bias, # ) W = torch.ones(hidden_size, hidden_size, kernel_size, kernel_size) self.out_weights = nn.Parameter(0.75 * W / input_size / kernel_size**2) if self.reset_gate: init.orthogonal_(self.reset_gate.weight) init.constant_(self.reset_gate.bias, 0.0) if self.update_gate: init.orthogonal_(self.update_gate.weight) init.constant_(self.update_gate.bias, 0.0) # init.orthogonal_(self.out_gate.weight) # eye = torch.eye(kernel_size, kernel_size).unsqueeze(0).unsqueeze(0) # init.constant_(self.out_gate.weight, eye) if out_bias: init.constant_(self.out_gate.bias, 0.0) if out_act is None: self.out_act = None elif out_act == "tanh": self.out_act = torch.tanh elif out_act == "leaky_relu": self.out_act = torch.leaky_relu else: raise NotImplementedError(out_act)
def __init__(self, word_vec, class_num, pos_num, config): super().__init__() self.word_vec = word_vec self.class_num = class_num self.pos_num = pos_num # hyper parameters and others self.max_len = config.max_len self.word_dim = config.word_dim self.pos_dim = config.pos_dim self.pos_dis = config.pos_dis self.tag_dim = config.tag_dim self.dropout_value = config.dropout self.filter_num = config.filter_num self.window = config.window self.dim = self.word_dim + 2 * self.pos_dim + self.tag_dim # net structures and operations self.word_embedding = nn.Embedding.from_pretrained( embeddings=self.word_vec, freeze=False, ) self.pos1_embedding = nn.Embedding(num_embeddings=2 * self.pos_dis + 3, embedding_dim=self.pos_dim) self.pos2_embedding = nn.Embedding(num_embeddings=2 * self.pos_dis + 3, embedding_dim=self.pos_dim) self.tag_embedding = nn.Embedding(num_embeddings=self.pos_num, embedding_dim=self.tag_dim) self.conv = nn.Conv2d( in_channels=1, out_channels=self.filter_num, kernel_size=(self.window, self.dim), stride=(1, 1), bias=True, padding=(1, 0), # same padding padding_mode='zeros') self.maxpool = nn.MaxPool2d((self.max_len, 1)) self.tanh = nn.Tanh() self.we = nn.Linear(in_features=self.dim * 2, out_features=self.dim * 2, bias=True) self.wa = nn.Linear(in_features=self.dim * 2, out_features=1, bias=True) self.dense = nn.Linear(in_features=self.filter_num + 2 * self.dim, out_features=self.class_num, bias=True) # initialize weight init.uniform_(self.pos1_embedding.weight, a=-0.1, b=0.1) init.uniform_(self.pos2_embedding.weight, a=-0.1, b=0.1) init.uniform_(self.tag_embedding.weight, a=-0.1, b=0.1) init.uniform_(self.conv.weight, a=-0.1, b=0.1) init.constant_(self.conv.bias, 0.) init.uniform_(self.we.weight, a=-0.1, b=0.1) init.constant_(self.we.bias, 0.) init.uniform_(self.wa.weight, a=-0.1, b=0.1) init.constant_(self.wa.bias, 0.) init.uniform_(self.dense.weight, a=-0.1, b=0.1) init.constant_(self.dense.bias, 0.)
def weights_init(m): classname = m.__class__.__name__ if classname == 'Linear': init.xavier_uniform_(m.weight, gain=np.sqrt(2.0)) if m.bias is not None: init.constant_(m.bias, 0.1)
def init_weights(self): init.normal_(self.tgt_embedding.weight, std=0.01) init.normal_(self.fc.weight, std=0.01) init.constant_(self.fc.bias, 0)
def _init_params(self): for name, module in self.named_modules(): if isinstance(module, nn.Conv2d): init.kaiming_uniform_(module.weight) if module.bias is not None: init.constant_(module.bias, 0)
def _set_init(self, layer): init.normal_(layer.weight, mean=0., std=.1) init.constant_(layer.bias, B_INIT)
def init_parameter(self, parameter): init.constant_(parameter, val=self.val)
y=self.linear(X) return y net=LinearNet(num_inputs) print(net) #net=nn.Sequential(nn.Linear(num_inputs,1)) #net=nn.Sequential() #net.add_module('linear',nn.Linear(num_inputs,1)) #from collections import OrderDict #net=nn.Sequential(OrderDict(['linear',nn.Linear(num_inputs,1)])) for param in net.parameters(): print(param) #初始化模型参数 from torch.nn import init init.normal_(net.linear.weight,mean=0.0,std=0.01) init.constant_(net.linear.bias,val=0.0) for param in net.parameters(): print(param) #损失函数 loss=nn.MSELoss() #优化算法 import torch.optim as optim optimizer=optim.SGD(net.parameters(),lr=0.03) print(optimizer) # 为不同子网络设置不同的学习率 # optimizer =optim.SGD([ # # 如果对某个参数不指定学习率,就使用最外层的默认学习率 # {'params': net.subnet1.parameters()}, # lr=0.03 # {'params': net.subnet2.parameters(), 'lr': 0.01} # ], lr=0.03)
def init_func(m): if isinstance(m, nn.Conv2d): mynn.init.XavierFill(m.weight) #mynn.init.MSRAFill(m.weight) if m.bias is not None: init.constant_(m.bias, 0)
net = nn.Sequential( OrderedDict([('linear', nn.Linear(num_inputs, 1)) # ...... ])) print(net) print(net[0]) for param in net.parameters(): print(param) # 初始化模型参数 from torch.nn import init init.normal_(net[0].weight, mean=0.0, std=0.1) init.constant_(net[0].bias, val=0.0) for param in net.parameters(): print(param) # 定义损失函数 loss = nn.MSELoss() # 定义优化算法 import torch.optim as optim optimizer = optim.SGD(net.parameters(), lr=0.03) print(optimizer) # 为不同子网络设置不同的学习率 # optimizer =optim.SGD([ # # 如果对某个参数不指定学习率,就使用最外层的默认学习率
def constant_init(m): if isinstance(m, nn.Conv2d): init.constant_(m.weight.data, 0.01) m.bias.data.zero_()
def initialize(m): if isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight) init.constant_(m.bias, 0) if isinstance(m, nn.ConvTranspose2d): init.xavier_normal_(m.weight)
def weight_init(m): if isinstance(m, nn.Conv2d): init.xavier_normal_(m.weight) init.constant_(m.bias, 0)
def reset_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): init.kaiming_normal_(m.weight, mode='fan_out') if m.bias is not None: init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm1d): init.constant_(m.weight, 1) init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.constant_(m.bias, 0) resnet = ResNetIBN.__factory[self.depth](pretrained=self.pretrained) self.base[0].load_state_dict(resnet.conv1.state_dict()) self.base[1].load_state_dict(resnet.bn1.state_dict()) self.base[2].load_state_dict(resnet.relu.state_dict()) self.base[3].load_state_dict(resnet.maxpool.state_dict()) self.base[4].load_state_dict(resnet.layer1.state_dict()) self.base[5].load_state_dict(resnet.layer2.state_dict()) self.base[6].load_state_dict(resnet.layer3.state_dict()) self.base[7].load_state_dict(resnet.layer4.state_dict())