예제 #1
0
def run_syncbn(trace_mode):
    x = F.ones([2, 16, 4, 4], dtype="float32")

    net = Sequential(
        Conv2d(16, 16, 1), SyncBatchNorm(16), Conv2d(16, 16, 1), SyncBatchNorm(16),
    )

    gm = ad.GradManager().attach(
        net.parameters(), callbacks=dist.make_allreduce_cb("MEAN")
    )
    opt = optimizer.SGD(net.parameters(), 1e-3)

    def train_func(x):
        with gm:
            y = net(x)
            loss = y.mean()
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    if trace_mode is not None:
        train_func = trace(train_func, symbolic=trace_mode)

    for _ in range(3):
        loss = train_func(x)
        loss.numpy()
예제 #2
0
    def __init__(self, inp, oup, *, group, first_group, mid_channels, ksize, stride):
        super(ShuffleV1Block, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        self.mid_channels = mid_channels
        self.ksize = ksize
        pad = ksize // 2
        self.pad = pad
        self.inp = inp
        self.group = group

        branch_main_1 = [
            # pw
            ConvBnRelu2d(inp, mid_channels, 1, 1, 0, groups=1 if first_group else group, bias=False),
            # dw
            ConvBn2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False)
        ]
        branch_main_2 = [
            # pw-linear
            ConvBn2d(mid_channels, oup, 1, 1, 0, groups=group, bias=False)
        ]
        self.branch_main_1 = Sequential(*branch_main_1)
        self.branch_main_2 = Sequential(*branch_main_2)
        self.add = Elemwise('FUSE_ADD_RELU')

        if stride == 2:
            self.branch_proj = ConvBn2d(inp, oup, 1, 2, 0, bias=False)
예제 #3
0
    def __init__(self, num_classes=1000, model_size='2.0x', group=None):
        super(ShuffleNetV1, self).__init__()
        print('model size is ', model_size)

        assert group is not None

        self.stage_repeats = [4, 8, 4]
        self.model_size = model_size
        if group == 3:
            if model_size == '0.5x':
                self.stage_out_channels = [-1, 12, 120, 240, 480]
            elif model_size == '1.0x':
                self.stage_out_channels = [-1, 24, 240, 480, 960]
            elif model_size == '1.5x':
                self.stage_out_channels = [-1, 24, 360, 720, 1440]
            elif model_size == '2.0x':
                self.stage_out_channels = [-1, 48, 480, 960, 1920]
            else:
                raise NotImplementedError
        elif group == 8:
            if model_size == '0.5x':
                self.stage_out_channels = [-1, 16, 192, 384, 768]
            elif model_size == '1.0x':
                self.stage_out_channels = [-1, 24, 384, 768, 1536]
            elif model_size == '1.5x':
                self.stage_out_channels = [-1, 24, 576, 1152, 2304]
            elif model_size == '2.0x':
                self.stage_out_channels = [-1, 48, 768, 1536, 3072]
            else:
                raise NotImplementedError

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = Sequential(
            ConvBnRelu2d(3, input_channel, 3, 2, 1, bias=False)
        )
        self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.features = []
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            for i in range(numrepeat):
                stride = 2 if i == 0 else 1
                first_group = idxstage == 0 and i == 0
                self.features.append(ShuffleV1Block(input_channel, output_channel,
                                                    group=group, first_group=first_group,
                                                    mid_channels=output_channel // 4, ksize=3, stride=stride))
                input_channel = output_channel

        self.features = Sequential(*self.features)
        self.quant = QuantStub()
        self.dequant = DequantStub()
        self.classifier = Sequential(Linear(self.stage_out_channels[-1], num_classes, bias=False))
        self._initialize_weights()
예제 #4
0
def test_sequential_named_children():
    modules = OrderedDict()
    modules["name0"] = Linear(20, 10)
    modules["name1"] = Linear(10, 5)
    modules["name2"] = Linear(5, 1)
    m = Sequential(modules)
    l = list(m.named_children())
    assert l[0][0] == "name0"
    assert l[1][0] == "name1"
    assert l[2][0] == "name2"
예제 #5
0
 def __init__(self):
     super().__init__()
     self.bn = BatchNorm2d(4)
     self.seq = Sequential(
         BatchNorm2d(4),
         self.InnerModule(),
     )
예제 #6
0
 def __init__(self):
     super().__init__()
     self.conv1 = Conv2d(3, 128, 3, padding=1, bias=False)
     self.conv2 = Conv2d(3, 128, 3, dilation=2, bias=False)
     self.bn1 = BatchNorm1d(128)
     self.bn2 = BatchNorm2d(128)
     self.pooling = MaxPool2d(kernel_size=2, padding=0)
     modules = OrderedDict()
     modules["depthwise"] = Conv2d(
         256,
         256,
         3,
         1,
         0,
         groups=256,
         bias=False,
     )
     modules["pointwise"] = Conv2d(
         256,
         256,
         kernel_size=1,
         stride=1,
         padding=0,
         bias=True,
     )
     self.submodule1 = Sequential(modules)
     self.list1 = [Dropout(drop_prob=0.1), [Softmax(axis=100)]]
     self.tuple1 = (
         Dropout(drop_prob=0.1),
         (Softmax(axis=100), Dropout(drop_prob=0.2)),
     )
     self.dict1 = {"Dropout": Dropout(drop_prob=0.1)}
     self.fc1 = Linear(512, 1024)
예제 #7
0
 def __init__(self):
     super().__init__()
     self.conv1 = Conv2d(3, 128, 3, stride=2, bias=False)
     self.conv2 = Conv2d(3, 128, 3, padding=1, bias=False)
     self.conv3 = Conv2d(3, 128, 3, dilation=2, bias=False)
     self.bn1 = BatchNorm2d(128)
     self.bn2 = BatchNorm1d(128)
     self.dropout = Dropout(drop_prob=0.1)
     self.softmax = Softmax(axis=100)
     self.pooling = MaxPool2d(kernel_size=2, padding=0)
     self.submodule1 = Sequential(Dropout(drop_prob=0.1), Softmax(axis=100),)
     self.fc1 = Linear(512, 1024)
예제 #8
0
 def __init__(self, config):
     super(BertEncoder, self).__init__()
     self.layer = Sequential(
         *[BertLayer(config) for _ in range(config.num_hidden_layers)])
예제 #9
0
class ShuffleNetV1(Module):
    def __init__(self, num_classes=1000, model_size="2.0x", group=None):
        # pylint: disable=too-many-branches
        super(ShuffleNetV1, self).__init__()
        print("model size is ", model_size)

        assert group is not None

        self.stage_repeats = [4, 8, 4]
        self.model_size = model_size
        if group == 3:
            if model_size == "0.5x":
                self.stage_out_channels = [-1, 12, 120, 240, 480]
            elif model_size == "1.0x":
                self.stage_out_channels = [-1, 24, 240, 480, 960]
            elif model_size == "1.5x":
                self.stage_out_channels = [-1, 24, 360, 720, 1440]
            elif model_size == "2.0x":
                self.stage_out_channels = [-1, 48, 480, 960, 1920]
            else:
                raise NotImplementedError
        elif group == 8:
            if model_size == "0.5x":
                self.stage_out_channels = [-1, 16, 192, 384, 768]
            elif model_size == "1.0x":
                self.stage_out_channels = [-1, 24, 384, 768, 1536]
            elif model_size == "1.5x":
                self.stage_out_channels = [-1, 24, 576, 1152, 2304]
            elif model_size == "2.0x":
                self.stage_out_channels = [-1, 48, 768, 1536, 3072]
            else:
                raise NotImplementedError

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = Sequential(
            ConvBnRelu2d(3, input_channel, 3, 2, 1, bias=False))
        self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.features = []
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            for i in range(numrepeat):
                stride = 2 if i == 0 else 1
                first_group = idxstage == 0 and i == 0
                self.features.append(
                    ShuffleV1Block(
                        input_channel,
                        output_channel,
                        group=group,
                        first_group=first_group,
                        mid_channels=output_channel // 4,
                        ksize=3,
                        stride=stride,
                    ))
                input_channel = output_channel

        self.features = Sequential(*self.features)
        self.quant = QuantStub()
        self.dequant = DequantStub()
        self.classifier = Sequential(
            Linear(self.stage_out_channels[-1], num_classes, bias=False))
        self.classifier.disable_quantize()
        self._initialize_weights()

    def forward(self, x):
        x = self.quant(x)
        x = self.first_conv(x)
        x = self.maxpool(x)

        x = self.features(x)

        x = F.avg_pool2d(x, 7)
        x = F.flatten(x, 1)
        x = self.dequant(x)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for name, m in self.named_modules():
            if isinstance(m, M.Conv2d):
                if "first" in name:
                    M.init.normal_(m.weight, 0, 0.01)
                else:
                    M.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
                if m.bias is not None:
                    M.init.fill_(m.bias, 0)
            elif isinstance(m, M.BatchNorm2d):
                M.init.fill_(m.weight, 1)
                if m.bias is not None:
                    M.init.fill_(m.bias, 0.0001)
                M.init.fill_(m.running_mean, 0)
            elif isinstance(m, M.BatchNorm1d):
                M.init.fill_(m.weight, 1)
                if m.bias is not None:
                    M.init.fill_(m.bias, 0.0001)
                M.init.fill_(m.running_mean, 0)
            elif isinstance(m, M.Linear):
                M.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    M.init.fill_(m.bias, 0)