def __init__(self, num_clothes, num_colors, ctx): super(fashion_net_2_branches, self).__init__() self._features = model_zoo.get_model('mobilenetv2_1.0', pretrained=True, ctx=ctx).features for _, w in self._features.collect_params().items(): w.grad_req = 'null' self._flatten = nn.Flatten() self._relu = nn.Activation(activation='relu') self._swish = nn.Swish() self._clothes_fc_1 = nn.Dense(100) self._clothes_bn = nn.BatchNorm(center=False, scale=True) self._clothes_out = nn.Dense(num_clothes) self._clothes_fc_1.initialize(init=init.Xavier(), ctx=ctx) self._clothes_bn.initialize(init=init.Zero(), ctx=ctx) self._clothes_out.initialize(init=init.Xavier(), ctx=ctx) self._color_fc_1 = nn.Dense(100) self._color_bn_1 = nn.BatchNorm(center=False, scale=True) self._color_fc_2 = nn.Dense(50) self._color_bn_2 = nn.BatchNorm(center=False, scale=True) self._color_out = nn.Dense(num_colors) self._color_fc_1.initialize(init=init.Xavier(), ctx=ctx) self._color_bn_1.initialize(init=init.Zero(), ctx=ctx) self._color_fc_2.initialize(init=init.Xavier(), ctx=ctx) self._color_bn_2.initialize(init=init.Zero(), ctx=ctx) self._color_out.initialize(init=init.Xavier(), ctx=ctx)
def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0, num_parts=1): super(ResNet, self).__init__() self.num_classes = num_classes self.num_parts = num_parts with self.name_scope(): model = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[:-1] model[-1][0].body[0]._kwargs['stride'] = (1, 1) model[-1][0].downsample[0]._kwargs['stride'] = (1, 1) self.base = nn.HybridSequential() for m in model: self.base.add(m) #local self.feat = nn.HybridSequential() self.classify = nn.HybridSequential() for _ in range(num_parts): tmp = nn.HybridSequential() tmp.add(nn.GlobalMaxPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm() bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.classify.add(classifier) #global self.g_feat = nn.HybridSequential() self.g_classify = nn.HybridSequential() for _ in range(1): tmp = nn.HybridSequential() tmp.add(nn.GlobalAvgPool2D()) feat = nn.Conv2D(channels=num_features, kernel_size=1, use_bias=False) feat.initialize(init.MSRAPrelu('in', 0), ctx=ctx) tmp.add(feat) bn = nn.BatchNorm(center=False, scale=False) bn.initialize(init=init.Zero(), ctx=ctx) tmp.add(bn) tmp.add(nn.Flatten()) self.g_feat.add(tmp) classifier = nn.Dense(num_classes, use_bias=False) classifier.initialize(init=init.Normal(0.001), ctx=ctx) self.g_classify.add(classifier)
def __init__(self, patch_size, depth, embed_dim, hidden_size, heads, img_size, classes, embed_drop=0., qkv_bias=True, att_drop=0., drop=0.1, activation='gelu', layer_norm_eps=1e-6, pool='cls', **kwargs): super(VisionTransformer, self).__init__() assert pool in ('cls', 'mean'), 'pool type must be either cls (cls token) or ' \ 'mean (mean pooling)' self.pool = pool self.embed_dim = embed_dim self.patch_size = patch_size with self.name_scope(): self.patch_embed = _PatchEmbedding(img_size, patch_size, embed_dim) self.cls_token = self.params.get('cls_token', shape=(1, 1, embed_dim), init=init.Zero()) self.pos_embed = self.params.get( 'pos_embed', shape=(1, self.patch_embed.num_patches + 1, embed_dim), init=init.Zero()) self.embed_dropout = nn.Dropout(embed_drop) if embed_drop else None self.blocks = nn.HybridSequential() for i in range(depth): self.blocks.add( _TransformerEncoder(embed_dim, heads, hidden_size, qkv_bias, att_drop, drop, activation, layer_norm_eps=layer_norm_eps)) self.head = nn.HybridSequential() self.head.add( nn.LayerNorm(epsilon=layer_norm_eps, in_channels=embed_dim), nn.Dense(classes))
def __init__(self, in_channels): super(_CAModule, self).__init__() self.in_channels = in_channels with self.name_scope(): self.gamma = self.params.get('gamma', shape=(1, ), init=init.Zero())
def __init__(self,num_feature, num_dims, **kwargs): super(BtachNorm,self).__init__(**kwargs) shape = (1,num_feature) if num_dims == 2 else (1, num_feature, 1, 1) self.beta = self.params.get('beta', shape=shape, init = init.Zero()) self.gamma = self.params.get('gamma', shape = shape, init = init.One()) self.moving_mean = nd.zeros(shape) self.moving_var = nd.ones(shape)
def __init__(self, depth, ctx, pretrained=True, num_features=0, num_classes=0): super(ResNet, self).__init__() self.num_classes = num_classes with self.name_scope(): model = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features model[-2][0].body[0]._kwargs['stride'] = (1, 1) model[-2][0].downsample[0]._kwargs['stride'] = (1, 1) self.base = nn.HybridSequential() for layer in model[:5]: self.base.add(layer) self.base.collect_params().setattr('grad_req', 'null') self.feat = nn.HybridSequential() for layer in model[5:]: self.feat.add(layer) embed = nn.Dense(num_features, use_bias=False) embed.initialize(init=init.Xavier(), ctx=ctx) self.feat.add(embed) bn = nn.BatchNorm(center=False, scale=False) bn.initialize(init=init.Zero(), ctx=ctx) self.feat.add(bn) self.classifier = nn.Dense(num_classes, use_bias=False) self.classifier.initialize(init=init.Normal(0.001), ctx=ctx)
def __init__(self, in_channels, reduction=8): super(SelfAttentionModule, self).__init__() self.in_channels = in_channels with self.name_scope(): self.query_conv = nn.Conv2D(in_channels // reduction, 1, in_channels=in_channels) self.key_conv = nn.Conv2D(in_channels // reduction, 1, in_channels=in_channels) self.value_conv = nn.Conv2D(in_channels, 1, in_channels=in_channels) self.gamma = self.params.get('gamma', shape=(1,), init=init.Zero())
def __init__(self, num_features, num_dims, **kwargs): super(BatchNorm, self).__init__(**kwargs) if num_dims == 2: shape = (1, num_features) else: shape = (1, num_features, 1, 1) # 参与求梯度和迭代的拉伸和偏移参数,分别初始化为0和1 self.gamma = self.params.get('gamma', shape=shape, init=init.One()) self.beta = self.params.get('beta', shape=shape, init=init.Zero()) # 不参与求梯度和迭代的变量,全在内存上初始化成0 self.moving_mean = nd.zeros(shape) self.moving_var = nd.zeros(shape)
def __init__(self, client_model, dataset, model_name, num_classes, ctx): self.dataset = dataset self.model_name = model_name self.ctx = ctx self.selected_clients = [] # build and synchronize the global model self.model = build_net(dataset, model_name, num_classes, self.ctx) self.model.set_params(client_model.get_params()) # build a model for merging updates self.merged_update = build_net(dataset, model_name, num_classes, self.ctx, init.Zero()) self.total_weight = 0
def __init__(self, num_fearures, num_dims, **kwargs): super().__init__(**kwargs) if num_dims == 2: shape = (1, num_fearures) # 全连接层 else: shape = (1, num_fearures, 1, 1) # 二维卷基层 # 参与求梯度和迭代的 拉伸和偏移,分别初始化为0或1 self.gamma = self.params.get('gamma', shape=shape, init=init.One()) self.beta = self.params.get('beta', grad_req='null', shape=shape, init=init.Zero()) # 不参与求梯度和迭代的变量,在内存上全吃书啊为0,先在内存上初始化之后搬到显存上 self.moving_mean = nd.zeros(shape) self.moving_var = nd.zeros(shape)
def __init__(self, depth, ctx, pretrained=True, num_classes=0): super(ResNet, self).__init__() self.pretrained = pretrained with self.name_scope(): self.base = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features self.base[-2][0].body[0]._kwargs['stride'] = (1, 1) self.base[-2][0].downsample[0]._kwargs['stride'] = (1, 1) self.base.add(nn.Flatten()) bn = nn.BatchNorm(center=False, scale=True) bn.initialize(init=init.Zero(), ctx=ctx) self.base.add(bn) self.classifier = nn.Dense(num_classes, use_bias=False) self.classifier.initialize(init=init.Normal(0.001), ctx=ctx)
def __init__(self, depth, ctx, pretrained=True, num_classes=0): super(ResNet, self).__init__() self.pretrained = pretrained with self.name_scope(): network = ResNet.__factory[depth](pretrained=pretrained, ctx=ctx).features[0:-1] network[-1][0].body[0]._kwargs['stride'] = (1, 1) network[-1][0].downsample[0]._kwargs['stride'] = (1, 1) self.base = nn.HybridSequential() for n in network: self.base.add(n) self.avgpool = nn.GlobalAvgPool2D() self.flatten = nn.Flatten() self.bn = nn.BatchNorm(center=False, scale=True) self.bn.initialize(init=init.Zero(), ctx=ctx) self.classifier = nn.Dense(num_classes, use_bias=False) self.classifier.initialize(init=init.Normal(0.001), ctx=ctx)
def __init__(self, in_channels, reduction=8): super(SelfAttention, self).__init__() with self.name_scope(): self.query_proj = nn.Conv2D(channels=in_channels // reduction, kernel_size=1, use_bias=False, in_channels=in_channels) self.key_proj = nn.Conv2D(channels=in_channels // reduction, kernel_size=1, use_bias=False, in_channels=in_channels) self.value_proj = nn.Conv2D(channels=in_channels, kernel_size=1, use_bias=False, in_channels=in_channels) self.gamma = self.params.get('gamma', shape=(1, ), init=init.Zero(), allow_deferred_init=True)
def __init__(self, nclass, aux=True, backbone='vit_large_16', height=None, width=None, base_size=520, crop_size=480, pretrained_base=False, norm_layer=nn.BatchNorm, norm_kwargs=None, decoder='pup', layer_norm_eps=1e-6, **kwargs): super(SETR, self).__init__(nclass, aux, height, width, base_size, crop_size, symbolize=False) assert backbone == 'vit_large_16', 'only support vit_large_16 for now' assert decoder in ('naive', 'pup', 'mla'), 'decoder must be any of (naive, pup, mla)' encoder = vit_large_16(pretrained_base, img_size=crop_size, classes=1000) self.stride = crop_size // encoder.patch_size with self.name_scope(): # embedding self.patch_embed = encoder.patch_embed self.pos_embed = self.params.get( 'pos_embed', shape=(1, self.patch_embed.num_patches, encoder.embed_dim), init=init.Zero()) self.embed_dropout = encoder.embed_dropout # encoder self.blocks = encoder.blocks # decoder self.out_indices, self.layer_norms, head = self._build_decoder( decoder, layer_norm_eps) self.head = head(nclass, aux, norm_layer, norm_kwargs)
strides=4) conv_trans.initialize() conv_trans(x) conv_trans.weight.set_data(bilinear_kernel(3, 3, 8)) y = conv_trans(x) y = y[0].clip(0, 1).transpose((1, 2, 0)) #print('Output', y.shape) #plt.imshow(y.asnumpy()) #plt.show() from mxnet import init conv_trans = net[-1] conv_trans.initialize(init=init.Zero()) net[-2].initialize(init=init.Xavier()) x = nd.zeros((batch_size, 3, 320, 480)) net(x) shape = conv_trans.weight.data().shape conv_trans.weight.set_data(bilinear_kernel(*shape[0:3])) import sys sys.path.append('..') import utils loss = gluon.loss.SoftmaxCrossEntropyLoss(axis=1) ctx = utils.try_all_gpus()
def main(): input_shape = (320, 480) voc_train = VOCSegDataset(True, input_shape) voc_test = VOCSegDataset(False, input_shape) batch_size = 4 train_data = gluon.data.DataLoader(voc_train, batch_size, shuffle=True, last_batch='discard') test_data = gluon.data.DataLoader(voc_test, batch_size, last_batch='discard') # ctx = common.ChoiceGpu() ctx = common.ChoiceCpu() pretrained_net1 = models.resnet18_v2(pretrained=True) # pretrained_net2 = GetNet(10, ctx=ctx) # pretrained_net3 = models.vgg13(pretrained=True) # pretrained_net2.load_parameters("train.params") net = nn.HybridSequential() for layer in pretrained_net1.features[:-2]: # for layer in pretrained_net2.net[:-2]: net.add(layer) num_classes = len(classes) with net.name_scope(): net.add( nn.Conv2D(num_classes, kernel_size=1), # nn.Conv2DTranspose(num_classes, kernel_size=4, padding=1, strides=2), # nn.Conv2DTranspose(num_classes, kernel_size=4, padding=1, strides=2), # nn.Conv2DTranspose(num_classes, kernel_size=4, padding=1, strides=2), # nn.Conv2DTranspose(num_classes, kernel_size=4, padding=1, strides=2), # nn.Conv2DTranspose(num_classes, kernel_size=4, padding=1, strides=2) # nn.Conv2DTranspose(num_classes, kernel_size=32, padding=8, strides=16) nn.Conv2DTranspose(channels=num_classes, kernel_size=64, padding=16, strides=32), ) conv_trans1 = net[-1] conv_trans1.initialize(init=init.Zero()) # conv_trans2 = net[-2] # conv_trans2.initialize(init=init.Zero()) # conv_trans3 = net[-3] # conv_trans3.initialize(init=init.Zero()) # conv_trans4 = net[-4] # conv_trans4.initialize(init=init.Zero()) # conv_trans5 = net[-5] # conv_trans5.initialize(init=init.Zero()) # net[-2].initialize(init=init.Xavier()) x = nd.zeros((batch_size, 3, *input_shape)) net(x) shape = conv_trans1.weight.data().shape conv_trans1.weight.set_data(bilinear_kernel(*shape[0:3])) conv_trans0 = net[1] print(conv_trans0.weight.data()) # print(conv_trans0.weight.data()) # # shape = conv_trans2.weight.data().shape # conv_trans2.weight.set_data(bilinear_kernel(*shape[0:3])) # # shape = conv_trans3.weight.data().shape # conv_trans3.weight.set_data(bilinear_kernel(*shape[0:3])) # # shape = conv_trans4.weight.data().shape # conv_trans4.weight.set_data(bilinear_kernel(*shape[0:3])) # # shape = conv_trans5.weight.data().shape # conv_trans5.weight.set_data(bilinear_kernel(*shape[0:3])) ctx = common.ChoiceGpu() net.collect_params().reset_ctx(ctx) net.load_parameters("train.params", ctx=ctx) loss = gluon.loss.SoftmaxCrossEntropyLoss(axis=1) if True: trainer = gluon.Trainer(net.collect_params(), 'sgd', { 'learning_rate': 1 / batch_size, 'wd': 1e-3 }) common.Train(train_data, 10, net, loss, trainer, batch_size, ctx) n = 6 imgs = [] data, label = ReadImage(train=False) for i in range(n): x = data[i] pred = label2image(predict(x, net, ctx)) imgs += [x, pred, label[i]] show_images(imgs, nrows=n, ncols=3, figsize=(6, 10)) print("ok")