def init_params(self): for m in self.modules(): if isinstance(m, M.Conv2d): init.xavier_normal_(m.weight) if m.bias is not None: init.zeros_(m.bias) elif isinstance(m, M.BatchNorm2d): init.fill_(m.weight, 1) init.zeros_(m.bias) elif isinstance(m, M.Linear): init.normal_(m.weight, std=0.001) if m.bias is not None: init.zeros_(m.bias)
def __init__(self): self.mid_layers = 14 self.num_class = 2 super().__init__() self.fc0 = Linear(self.num_class, self.mid_layers, bias=True) fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc0.weight) init.normal_(self.fc0.weight, std=np.sqrt(float(1.0) / fan_in)) init.zeros_(self.fc0.bias) self.fc1 = Linear(self.mid_layers, self.mid_layers, bias=True) fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc1.weight) init.normal_(self.fc1.weight, std=np.sqrt(float(1.0) / fan_in)) init.zeros_(self.fc1.bias) self.fc2 = Linear(self.mid_layers, self.num_class, bias=True) fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc2.weight) init.normal_(self.fc2.weight, std=np.sqrt(float(1.0) / fan_in)) init.zeros_(self.fc2.bias)
def maml_init_(module: M.Conv2d): minit.xavier_uniform_(module.weight, gain=1.0) minit.zeros_(module.bias)
def fc_init_(module: M.Linear): if hasattr(module, 'weight') and module.weight is not None: init.truncated_normal_(module.weight, mean=0.0, std=0.01) if hasattr(module, 'bias') and module.bias is not None: minit.zeros_(module.bias)
def __init__(self, block, blocks, in_ch=3, num_classes=1000, first_stride=2, light_head=False, zero_init_residual=False, groups=1, width_per_group=64, strides=[1, 2, 2, 2], dilations=[1, 1, 1, 1], multi_grids=[1, 1, 1], norm_layer=None, se_module=None, reduction=16, radix=0, avd=False, avd_first=False, avg_layer=False, avg_down=False, stem_width=64): ''' Modified resnet according to https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. Implementate ResNet and the variation of ResNet. Args: in_ch: int, the number of channels of the input block: BasicBlock or Bottleneck.The block of the resnet num_classes: int, the number of classes to predict first_stride: int, the stride of the first conv layer light_head: boolean, whether use conv3x3 replace the conv7x7 in first conv layer zero_init_residual: whether initilize the residule block's batchnorm with zero groups: int, the number of groups for the conv in net width_per_group: int, the width of the conv layers strides: list, the list of the strides for the each stage dilations: list, the dilations of each block multi_grids: list, implementation of the multi grid layer in deeplabv3 norm_layer: megengine.module.Module, the normalization layer, default is batch normalization se_module: SEModule, the Squeeze Excitation Module radix: int, the radix index from ResNest reduction: int, the reduction rate avd: bool, whether use the avd layer avd_first: bool, whether use the avd layer before bottleblock's conv2 stem_width: int, the channels of the conv3x3 when use 3 conv3x3 replace conv7x7 References: "Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf> "Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf> https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch deeplab v3: https://arxiv.org/pdf/1706.05587.pdf deeplab v3+: https://arxiv.org/pdf/1802.02611.pdf "Squeeze-and-Excitation Networks"<https://arxiv.org/abs/1709.01507> "ResNeSt: Split-Attention Networks"<https://arxiv.org/pdf/2004.08955.pdf> ''' super(ResNet, self).__init__() if len(dilations) != 4: raise ValueError( "The length of dilations must be 4, but got {}".format( len(dilations))) if len(strides) != 4: raise ValueError( "The length of dilations must be 4, but got {}".format( len(strides))) if len(multi_grids) > blocks[-1]: multi_grids = multi_grids[:blocks[-1]] elif len(multi_grids) < blocks[-1]: raise ValueError( "The length of multi_grids must greater than or equal the number of blocks for last stage , but got {}/{}" .format(len(multi_grids), blocks[-1])) if norm_layer is None: norm_layer = M.BatchNorm2d self.base_width = width_per_group self.multi_grids = multi_grids self.inplanes = stem_width * 2 if light_head else 64 self.groups = groups self.norm_layer = norm_layer self.avg_layer = avg_layer self.avg_down = avg_down if light_head: self.conv1 = M.Sequential( conv3x3(in_ch, stem_width, stride=first_stride), norm_layer(stem_width), M.ReLU(), conv3x3(stem_width, stem_width, stride=1), norm_layer(stem_width), M.ReLU(), conv3x3(stem_width, self.inplanes, stride=1), ) else: self.conv1 = M.Conv2d(in_ch, self.inplanes, kernel_size=7, stride=first_stride, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = M.ReLU() self.maxpool = M.MaxPool2d(kernel_size=3, stride=2, padding=1) # 4 stage self.layer1 = self._make_layer(block, 64, blocks[0], stride=strides[0], dilation=dilations[0], se_module=se_module, reduction=reduction, radix=radix, avd=avd, avd_first=avd_first) self.layer2 = self._make_layer(block, 128, blocks[1], stride=strides[1], dilation=dilations[1], se_module=se_module, reduction=reduction, radix=radix, avd=avd, avd_first=avd_first) self.layer3 = self._make_layer(block, 256, blocks[2], stride=strides[2], dilation=dilations[2], se_module=se_module, reduction=reduction, radix=radix, avd=avd, avd_first=avd_first) self.layer4 = self._make_grid_layer(block, 512, blocks[3], stride=strides[3], dilation=dilations[3], se_module=se_module, reduction=reduction, radix=radix, avd=avd, avd_first=avd_first) # classification part self.avgpool = M.AdaptiveAvgPool2d(1) self.fc = M.Linear(self.inplanes, num_classes) # for m in self.modules(): # if isinstance(m, M.Conv2d): # #init.msra_normal_(m.weight, mode="fan_out", nonlinearity="relu") # init.xavier_normal_(m.weight) # elif isinstance(m, M.BatchNorm2d): # init.fill_(m.weight, 1) # init.zeros_(m.bias) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): init.zeros_(m.bn3.weight) elif isinstance(m, BasicBlock): init.zeros_(m.bn2.weight)