Python zeros_ Examples, megengine.module.init.zeros_ Python Examples

Example #1

0

Show file

File: espnets.py Project: Qsingle/Megvision

 def init_params(self):
     for m in self.modules():
         if isinstance(m, M.Conv2d):
             init.xavier_normal_(m.weight)
             if m.bias is not None:
                 init.zeros_(m.bias)
         elif isinstance(m, M.BatchNorm2d):
             init.fill_(m.weight, 1)
             init.zeros_(m.bias)
         elif isinstance(m, M.Linear):
             init.normal_(m.weight, std=0.001)
             if m.bias is not None:
                 init.zeros_(m.bias)

Example #2

0

Show file

    def __init__(self):
        self.mid_layers = 14
        self.num_class = 2
        super().__init__()

        self.fc0 = Linear(self.num_class, self.mid_layers, bias=True)
        fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc0.weight)
        init.normal_(self.fc0.weight, std=np.sqrt(float(1.0) / fan_in))
        init.zeros_(self.fc0.bias)

        self.fc1 = Linear(self.mid_layers, self.mid_layers, bias=True)
        fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc1.weight)
        init.normal_(self.fc1.weight, std=np.sqrt(float(1.0) / fan_in))
        init.zeros_(self.fc1.bias)

        self.fc2 = Linear(self.mid_layers, self.num_class, bias=True)
        fan_in, _ = init.calculate_fan_in_and_fan_out(self.fc2.weight)
        init.normal_(self.fc2.weight, std=np.sqrt(float(1.0) / fan_in))
        init.zeros_(self.fc2.bias)

Example #3

0

Show file

File: omniglot.py Project: zhen8838/maml-megengine

def maml_init_(module: M.Conv2d):
    minit.xavier_uniform_(module.weight, gain=1.0)
    minit.zeros_(module.bias)

Example #4

0

Show file

File: omniglot.py Project: zhen8838/maml-megengine

def fc_init_(module: M.Linear):
    if hasattr(module, 'weight') and module.weight is not None:
        init.truncated_normal_(module.weight, mean=0.0, std=0.01)
    if hasattr(module, 'bias') and module.bias is not None:
        minit.zeros_(module.bias)

Example #5

0

Show file

File: resnet.py Project: Qsingle/Megvision

    def __init__(self,
                 block,
                 blocks,
                 in_ch=3,
                 num_classes=1000,
                 first_stride=2,
                 light_head=False,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 strides=[1, 2, 2, 2],
                 dilations=[1, 1, 1, 1],
                 multi_grids=[1, 1, 1],
                 norm_layer=None,
                 se_module=None,
                 reduction=16,
                 radix=0,
                 avd=False,
                 avd_first=False,
                 avg_layer=False,
                 avg_down=False,
                 stem_width=64):
        '''
            Modified resnet according to https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
            Implementate  ResNet and the variation of ResNet.
            Args:
                in_ch: int, the number of channels of the input
                block: BasicBlock or Bottleneck.The block of the resnet
                num_classes: int, the number of classes to predict
                first_stride: int, the stride of the first conv layer
                light_head: boolean, whether use conv3x3 replace the conv7x7 in first conv layer
                zero_init_residual: whether initilize the residule block's batchnorm with zero
                groups: int, the number of groups for the conv in net
                width_per_group: int, the width of the conv layers
                strides: list, the list of the strides for the each stage
                dilations: list, the dilations of each block
                multi_grids: list, implementation of the multi grid layer in deeplabv3
                norm_layer: megengine.module.Module, the normalization layer, default is batch normalization
                se_module: SEModule, the Squeeze Excitation Module
                radix: int, the radix index from ResNest
                reduction: int, the reduction rate
                avd: bool, whether use the avd layer
                avd_first: bool, whether use the avd layer before bottleblock's conv2
                stem_width: int, the channels of the conv3x3 when use 3 conv3x3 replace conv7x7
            References:
                "Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>
                "Aggregated Residual Transformation for Deep Neural Networks" <https://arxiv.org/pdf/1611.05431.pdf>
                https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
                deeplab v3: https://arxiv.org/pdf/1706.05587.pdf
                deeplab v3+: https://arxiv.org/pdf/1802.02611.pdf
                "Squeeze-and-Excitation Networks"<https://arxiv.org/abs/1709.01507>
                "ResNeSt: Split-Attention Networks"<https://arxiv.org/pdf/2004.08955.pdf>
        '''
        super(ResNet, self).__init__()

        if len(dilations) != 4:
            raise ValueError(
                "The length of dilations must be 4, but got {}".format(
                    len(dilations)))

        if len(strides) != 4:
            raise ValueError(
                "The length of dilations must be 4, but got {}".format(
                    len(strides)))

        if len(multi_grids) > blocks[-1]:
            multi_grids = multi_grids[:blocks[-1]]
        elif len(multi_grids) < blocks[-1]:
            raise ValueError(
                "The length of multi_grids must greater than or equal the number of blocks for last stage , but got {}/{}"
                .format(len(multi_grids), blocks[-1]))

        if norm_layer is None:
            norm_layer = M.BatchNorm2d

        self.base_width = width_per_group
        self.multi_grids = multi_grids
        self.inplanes = stem_width * 2 if light_head else 64
        self.groups = groups
        self.norm_layer = norm_layer
        self.avg_layer = avg_layer
        self.avg_down = avg_down

        if light_head:
            self.conv1 = M.Sequential(
                conv3x3(in_ch, stem_width, stride=first_stride),
                norm_layer(stem_width),
                M.ReLU(),
                conv3x3(stem_width, stem_width, stride=1),
                norm_layer(stem_width),
                M.ReLU(),
                conv3x3(stem_width, self.inplanes, stride=1),
            )
        else:
            self.conv1 = M.Conv2d(in_ch,
                                  self.inplanes,
                                  kernel_size=7,
                                  stride=first_stride,
                                  padding=3,
                                  bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = M.ReLU()
        self.maxpool = M.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # 4 stage
        self.layer1 = self._make_layer(block,
                                       64,
                                       blocks[0],
                                       stride=strides[0],
                                       dilation=dilations[0],
                                       se_module=se_module,
                                       reduction=reduction,
                                       radix=radix,
                                       avd=avd,
                                       avd_first=avd_first)
        self.layer2 = self._make_layer(block,
                                       128,
                                       blocks[1],
                                       stride=strides[1],
                                       dilation=dilations[1],
                                       se_module=se_module,
                                       reduction=reduction,
                                       radix=radix,
                                       avd=avd,
                                       avd_first=avd_first)
        self.layer3 = self._make_layer(block,
                                       256,
                                       blocks[2],
                                       stride=strides[2],
                                       dilation=dilations[2],
                                       se_module=se_module,
                                       reduction=reduction,
                                       radix=radix,
                                       avd=avd,
                                       avd_first=avd_first)
        self.layer4 = self._make_grid_layer(block,
                                            512,
                                            blocks[3],
                                            stride=strides[3],
                                            dilation=dilations[3],
                                            se_module=se_module,
                                            reduction=reduction,
                                            radix=radix,
                                            avd=avd,
                                            avd_first=avd_first)

        # classification part
        self.avgpool = M.AdaptiveAvgPool2d(1)
        self.fc = M.Linear(self.inplanes, num_classes)

        # for m in self.modules():
        #     if isinstance(m, M.Conv2d):
        #         #init.msra_normal_(m.weight, mode="fan_out", nonlinearity="relu")
        #         init.xavier_normal_(m.weight)
        #     elif isinstance(m, M.BatchNorm2d):
        #         init.fill_(m.weight, 1)
        #         init.zeros_(m.bias)
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    init.zeros_(m.bn3.weight)
                elif isinstance(m, BasicBlock):
                    init.zeros_(m.bn2.weight)