def run_syncbn(trace_mode): x = F.ones([2, 16, 4, 4], dtype="float32") net = Sequential( Conv2d(16, 16, 1), SyncBatchNorm(16), Conv2d(16, 16, 1), SyncBatchNorm(16), ) gm = ad.GradManager().attach( net.parameters(), callbacks=dist.make_allreduce_cb("MEAN") ) opt = optimizer.SGD(net.parameters(), 1e-3) def train_func(x): with gm: y = net(x) loss = y.mean() gm.backward(loss) opt.step().clear_grad() return loss if trace_mode is not None: train_func = trace(train_func, symbolic=trace_mode) for _ in range(3): loss = train_func(x) loss.numpy()
def __init__(self, inp, oup, *, group, first_group, mid_channels, ksize, stride): super(ShuffleV1Block, self).__init__() self.stride = stride assert stride in [1, 2] self.mid_channels = mid_channels self.ksize = ksize pad = ksize // 2 self.pad = pad self.inp = inp self.group = group branch_main_1 = [ # pw ConvBnRelu2d(inp, mid_channels, 1, 1, 0, groups=1 if first_group else group, bias=False), # dw ConvBn2d(mid_channels, mid_channels, ksize, stride, pad, groups=mid_channels, bias=False) ] branch_main_2 = [ # pw-linear ConvBn2d(mid_channels, oup, 1, 1, 0, groups=group, bias=False) ] self.branch_main_1 = Sequential(*branch_main_1) self.branch_main_2 = Sequential(*branch_main_2) self.add = Elemwise('FUSE_ADD_RELU') if stride == 2: self.branch_proj = ConvBn2d(inp, oup, 1, 2, 0, bias=False)
def __init__(self, num_classes=1000, model_size='2.0x', group=None): super(ShuffleNetV1, self).__init__() print('model size is ', model_size) assert group is not None self.stage_repeats = [4, 8, 4] self.model_size = model_size if group == 3: if model_size == '0.5x': self.stage_out_channels = [-1, 12, 120, 240, 480] elif model_size == '1.0x': self.stage_out_channels = [-1, 24, 240, 480, 960] elif model_size == '1.5x': self.stage_out_channels = [-1, 24, 360, 720, 1440] elif model_size == '2.0x': self.stage_out_channels = [-1, 48, 480, 960, 1920] else: raise NotImplementedError elif group == 8: if model_size == '0.5x': self.stage_out_channels = [-1, 16, 192, 384, 768] elif model_size == '1.0x': self.stage_out_channels = [-1, 24, 384, 768, 1536] elif model_size == '1.5x': self.stage_out_channels = [-1, 24, 576, 1152, 2304] elif model_size == '2.0x': self.stage_out_channels = [-1, 48, 768, 1536, 3072] else: raise NotImplementedError # building first layer input_channel = self.stage_out_channels[1] self.first_conv = Sequential( ConvBnRelu2d(3, input_channel, 3, 2, 1, bias=False) ) self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1) self.features = [] for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): stride = 2 if i == 0 else 1 first_group = idxstage == 0 and i == 0 self.features.append(ShuffleV1Block(input_channel, output_channel, group=group, first_group=first_group, mid_channels=output_channel // 4, ksize=3, stride=stride)) input_channel = output_channel self.features = Sequential(*self.features) self.quant = QuantStub() self.dequant = DequantStub() self.classifier = Sequential(Linear(self.stage_out_channels[-1], num_classes, bias=False)) self._initialize_weights()
def test_sequential_named_children(): modules = OrderedDict() modules["name0"] = Linear(20, 10) modules["name1"] = Linear(10, 5) modules["name2"] = Linear(5, 1) m = Sequential(modules) l = list(m.named_children()) assert l[0][0] == "name0" assert l[1][0] == "name1" assert l[2][0] == "name2"
def __init__(self): super().__init__() self.bn = BatchNorm2d(4) self.seq = Sequential( BatchNorm2d(4), self.InnerModule(), )
def __init__(self): super().__init__() self.conv1 = Conv2d(3, 128, 3, padding=1, bias=False) self.conv2 = Conv2d(3, 128, 3, dilation=2, bias=False) self.bn1 = BatchNorm1d(128) self.bn2 = BatchNorm2d(128) self.pooling = MaxPool2d(kernel_size=2, padding=0) modules = OrderedDict() modules["depthwise"] = Conv2d( 256, 256, 3, 1, 0, groups=256, bias=False, ) modules["pointwise"] = Conv2d( 256, 256, kernel_size=1, stride=1, padding=0, bias=True, ) self.submodule1 = Sequential(modules) self.list1 = [Dropout(drop_prob=0.1), [Softmax(axis=100)]] self.tuple1 = ( Dropout(drop_prob=0.1), (Softmax(axis=100), Dropout(drop_prob=0.2)), ) self.dict1 = {"Dropout": Dropout(drop_prob=0.1)} self.fc1 = Linear(512, 1024)
def __init__(self): super().__init__() self.conv1 = Conv2d(3, 128, 3, stride=2, bias=False) self.conv2 = Conv2d(3, 128, 3, padding=1, bias=False) self.conv3 = Conv2d(3, 128, 3, dilation=2, bias=False) self.bn1 = BatchNorm2d(128) self.bn2 = BatchNorm1d(128) self.dropout = Dropout(drop_prob=0.1) self.softmax = Softmax(axis=100) self.pooling = MaxPool2d(kernel_size=2, padding=0) self.submodule1 = Sequential(Dropout(drop_prob=0.1), Softmax(axis=100),) self.fc1 = Linear(512, 1024)
def __init__(self, config): super(BertEncoder, self).__init__() self.layer = Sequential( *[BertLayer(config) for _ in range(config.num_hidden_layers)])
class ShuffleNetV1(Module): def __init__(self, num_classes=1000, model_size="2.0x", group=None): # pylint: disable=too-many-branches super(ShuffleNetV1, self).__init__() print("model size is ", model_size) assert group is not None self.stage_repeats = [4, 8, 4] self.model_size = model_size if group == 3: if model_size == "0.5x": self.stage_out_channels = [-1, 12, 120, 240, 480] elif model_size == "1.0x": self.stage_out_channels = [-1, 24, 240, 480, 960] elif model_size == "1.5x": self.stage_out_channels = [-1, 24, 360, 720, 1440] elif model_size == "2.0x": self.stage_out_channels = [-1, 48, 480, 960, 1920] else: raise NotImplementedError elif group == 8: if model_size == "0.5x": self.stage_out_channels = [-1, 16, 192, 384, 768] elif model_size == "1.0x": self.stage_out_channels = [-1, 24, 384, 768, 1536] elif model_size == "1.5x": self.stage_out_channels = [-1, 24, 576, 1152, 2304] elif model_size == "2.0x": self.stage_out_channels = [-1, 48, 768, 1536, 3072] else: raise NotImplementedError # building first layer input_channel = self.stage_out_channels[1] self.first_conv = Sequential( ConvBnRelu2d(3, input_channel, 3, 2, 1, bias=False)) self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1) self.features = [] for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): stride = 2 if i == 0 else 1 first_group = idxstage == 0 and i == 0 self.features.append( ShuffleV1Block( input_channel, output_channel, group=group, first_group=first_group, mid_channels=output_channel // 4, ksize=3, stride=stride, )) input_channel = output_channel self.features = Sequential(*self.features) self.quant = QuantStub() self.dequant = DequantStub() self.classifier = Sequential( Linear(self.stage_out_channels[-1], num_classes, bias=False)) self.classifier.disable_quantize() self._initialize_weights() def forward(self, x): x = self.quant(x) x = self.first_conv(x) x = self.maxpool(x) x = self.features(x) x = F.avg_pool2d(x, 7) x = F.flatten(x, 1) x = self.dequant(x) x = self.classifier(x) return x def _initialize_weights(self): for name, m in self.named_modules(): if isinstance(m, M.Conv2d): if "first" in name: M.init.normal_(m.weight, 0, 0.01) else: M.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) if m.bias is not None: M.init.fill_(m.bias, 0) elif isinstance(m, M.BatchNorm2d): M.init.fill_(m.weight, 1) if m.bias is not None: M.init.fill_(m.bias, 0.0001) M.init.fill_(m.running_mean, 0) elif isinstance(m, M.BatchNorm1d): M.init.fill_(m.weight, 1) if m.bias is not None: M.init.fill_(m.bias, 0.0001) M.init.fill_(m.running_mean, 0) elif isinstance(m, M.Linear): M.init.normal_(m.weight, 0, 0.01) if m.bias is not None: M.init.fill_(m.bias, 0)