def mnist_lenet_prediction(image, test=False): """ Construct LeNet for MNIST. """ image /= 255.0 c1 = PF.convolution(image, 16, (5, 5), name='conv1') c1 = F.relu(F.max_pooling(c1, (2, 2)), inplace=True) c2 = PF.convolution(c1, 16, (5, 5), name='conv2') c2 = F.relu(F.max_pooling(c2, (2, 2)), inplace=True) c3 = F.relu(PF.affine(c2, 50, name='fc3'), inplace=True) c4 = PF.affine(c3, 10, name='fc4') return c4
def test_save_load_parameters(): v = nn.Variable([64, 1, 28, 28], need_grad=False) with nn.parameter_scope("param1"): with nn.parameter_scope("conv1"): h = PF.convolution(v, 32, (3, 3)) b = PF.batch_normalization(h, batch_stat=True) with nn.parameter_scope("conv2"): h1 = PF.convolution(v, 32, (3, 3)) b2 = PF.batch_normalization(h1, batch_stat=True) for k, v in iteritems(nn.get_parameters(grad_only=False)): v.data.cast(np.float32)[...] = np.random.randn(*v.shape) with nn.parameter_scope("param1"): param1 = nn.get_parameters(grad_only=False) nn.save_parameters("tmp.h5") nn.save_parameters("tmp.protobuf") with nn.parameter_scope("param2"): nn.load_parameters('tmp.h5') param2 = nn.get_parameters(grad_only=False) with nn.parameter_scope("param3"): nn.load_parameters('tmp.protobuf') param3 = nn.get_parameters(grad_only=False) for par2 in [param2, param3]: assert param1.keys() == par2.keys() # Check order for (n1, p1), (n2, p2) in zip(sorted(param1.items()), sorted(par2.items())): assert n1 == n2 assert np.all(p1.d == p2.d) assert p1.data.dtype == p2.data.dtype assert p1.need_grad == p2.need_grad
def discriminator(x, maxh=256, test=False, output_hidden=False): """ Building discriminator network which maps a (B, 1, 28, 28) input to a (B, 1). """ # Define shortcut functions def bn(xx): # Batch normalization return PF.batch_normalization(xx, batch_stat=not test) def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 8 > 0 with nn.parameter_scope("dis"): # (1, 28, 28) --> (32, 16, 16) with nn.parameter_scope("conv1"): c1 = F.elu(bn(PF.convolution(x, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 16, 16) --> (64, 8, 8) with nn.parameter_scope("conv2"): c2 = F.elu(bn(downsample2(c1, maxh / 4))) # (64, 8, 8) --> (128, 4, 4) with nn.parameter_scope("conv3"): c3 = F.elu(bn(downsample2(c2, maxh / 2))) # (128, 4, 4) --> (256, 4, 4) with nn.parameter_scope("conv4"): c4 = bn(PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False)) # (256, 4, 4) --> (1,) with nn.parameter_scope("fc1"): f = PF.affine(c4, 1) if output_hidden: return f, [c1, c2, c3, c4] return f
def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h
def res_unit(x, scope): C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): h = F.elu(bn(PF.convolution(x, C / 2, (1, 1), with_bias=False))) with nn.parameter_scope('conv2'): h = F.elu( bn(PF.convolution(h, C / 2, (3, 3), pad=(1, 1), with_bias=False))) with nn.parameter_scope('conv3'): h = bn(PF.convolution(h, C, (1, 1), with_bias=False)) return F.elu(F.add2(h, x, inplace=True))
def mnist_lenet_feature(image, test=False): """ Construct LeNet for MNIST. """ c1 = F.elu(PF.convolution(image, 20, (5, 5), name='conv1')) c1 = F.average_pooling(c1, (2, 2)) c2 = F.elu(PF.convolution(c1, 50, (5, 5), name='conv2')) c2 = F.average_pooling(c2, (2, 2)) c3 = F.elu(PF.affine(c2, 500, name='fc3')) c4 = PF.affine(c3, 10, name='fc4') c5 = PF.affine(c4, 2, name='fc_embed') return c5
def cifar10_resnet23_prediction(ctx, image, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = bn_dropout(h, "bn_dropout1", test) h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = bn_dropout(h, "bn_dropout2", test) h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = bn_dropout(h, "bn_dropout3", test) h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def shortcut(x, ochannels, stride, shortcut_type, test): ichannels = x.shape[1] use_conv = shortcut_type.lower() == 'c' if ichannels != ochannels: assert (ichannels * 2 == ochannels) or (ichannels * 4 == ochannels) if shortcut_type.lower() == 'b': use_conv = True if use_conv: # Convolution does everything. # Matching channels, striding. with nn.parameter_scope("shortcut_conv"): x = PF.convolution(x, ochannels, (1, 1), stride=stride, with_bias=False) x = PF.batch_normalization(x, batch_stat=not test) else: if stride != (1, 1): # Stride x = F.average_pooling(x, (1, 1), stride) if ichannels != ochannels: # Zero-padding to channel axis ishape = x.shape zeros = F.constant(0, (ishape[0], ochannels - ichannels) + ishape[-2:]) x = F.concatenate(x, zeros, axis=1) return x
def resnet_model(ctx, x, inmaps=64, act=F.relu, test=False): # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): h = PF.convolution(x, inmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) h = res_unit(h, "conv2", act, False) # -> 32x32 h = res_unit(h, "conv3", act, True) # -> 16x16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv4", act, False) # -> 16x16 h = res_unit(h, "conv5", act, True) # -> 8x8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv6", act, False) # -> 8x8 h = res_unit(h, "conv7", act, True) # -> 4x4 with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv8", act, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, 10) return pred
def conv_block(x, in_planes, out_planes, test=True): residual = x out1 = PF.batch_normalization(x, batch_stat=not test, name='bn1') out1 = F.relu(out1, True) out1 = PF.convolution(out1, int(out_planes / 2), kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv1', with_bias=False) out2 = PF.batch_normalization(out1, batch_stat=not test, name='bn2') out2 = F.relu(out2, True) out2 = PF.convolution(out2, int(out_planes / 4), kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv2', with_bias=False) out3 = PF.batch_normalization(out2, batch_stat=not test, name='bn3') out3 = F.relu(out3, True) out3 = PF.convolution(out3, int(out_planes / 4), kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv3', with_bias=False) out3 = F.concatenate(out1, out2, out3, axis=1) if in_planes != out_planes: residual = PF.batch_normalization(residual, batch_stat=not test, name='downsample/0') residual = F.relu(residual, True) residual = PF.convolution(residual, out_planes, kernel=(1, 1), stride=(1, 1), name='downsample/2', with_bias=False) out3 += residual return out3
def separable_conv_with_bn(x, f, stride=False, aspp=False, atrous_rate=1, act_fn=True, last_block=False, end_point=False, eps=1e-03, out=False, test=False, fix_params=False): with nn.parameter_scope("depthwise"): if (stride == True): h = PF.depthwise_convolution(x, (3, 3), stride=(2, 2), pad=(1, 1), with_bias=False, fix_parameters=fix_params) elif (aspp == True): h = PF.depthwise_convolution(x, (3, 3), pad=(atrous_rate, atrous_rate), stride=(1, 1), dilation=(atrous_rate, atrous_rate), with_bias=False, fix_parameters=fix_params) else: h = PF.depthwise_convolution(x, (3, 3), pad=(1, 1), with_bias=False, fix_parameters=fix_params) h = PF.batch_normalization(h, batch_stat=not test, eps=eps, fix_parameters=fix_params) if last_block == True: h = F.relu(h) with nn.parameter_scope("pointwise"): h = PF.convolution(h, f, (1, 1), stride=(1, 1), with_bias=False, fix_parameters=fix_params) h = PF.batch_normalization(h, batch_stat=not test, eps=eps, fix_parameters=fix_params) if end_point == True: global endpoints endpoints['Decoder End Point 1'] = h if act_fn == True: h = F.relu(h) return h
def conv_block(x, out_ch, kernel, stride, pad, test, scope): with nn.parameter_scope(scope): h = PF.convolution(x, out_ch, (kernel, kernel), stride=(stride, stride), pad=(pad, pad), w_init=NormalInitializer(0.02), name='conv') h = PF.batch_normalization(h, batch_stat=not test, name='bn') h = F.leaky_relu(h, alpha=0.2) return h
def discriminator(x, num_layer, fs, min_fs, kernel, pad, scope, test=False): with nn.parameter_scope(scope): h = conv_block(x, fs, kernel, 1, pad, test, 'head') h = middle_blocks(h, num_layer, fs, min_fs, kernel, pad, test) h = PF.convolution(h, 1, (kernel, kernel), stride=(1, 1), pad=(pad, pad), w_init=NormalInitializer(0.02), name='tail') return h
def test_save_module_grad_only_false(): x = nn.Variable.from_numpy_array(np.random.random([1, 2, 10, 10])) ref_y = PF.convolution(x, 4, kernel=(3, 3), stride=(1, 1)) for v in nn.get_parameters().values(): v.need_grad = False ref_g = nn.graph_def.create_graph_from_variable("te_module", ref_y) y = ref_g(x) forward_variable_and_check_equal(ref_y, y)
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.relu, test=False): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) if act is None: return h h = PF.batch_normalization(h, batch_stat=not test) h = act(h) return h
def clbn_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, bias_w=False, name='clbn-convblock'): h = x with nn.parameter_scope(name): h = PF.convolution(x, maps, kernel=kernel, pad=pad, stride=stride, channel_last=True, with_bias=bias_w) z = h h = PF.batch_normalization(h, axes=[3], batch_stat=not test) return F.relu(h + z)
def clbn_self_folding_resblock(x, i, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name='convblock'): h = x with nn.parameter_scope(name): h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, channel_last=True, with_bias=False) a, b = create_scale_bias(i, h.shape[3], axes=[3]) h = a * h + b return F.relu(h + x)
def multiple_inputs_outputs_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), w_bias=False, test=False, name='mo-convblock'): h = x with nn.parameter_scope(name): h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, with_bias=w_bias) h = PF.batch_normalization(h, axes=[1], batch_stat=not test) return F.relu(h + x)
def Downsample(h, nmap_out, scope_name): with nn.parameter_scope(scope_name): def sn_w(w): return PF.spectral_norm(w, dim=0) h = PF.convolution(h, nmap_out, (4, 4), stride=( 2, 2), pad=(1, 1), apply_w=sn_w, with_bias=False) h = PF.batch_normalization(h) h = F.leaky_relu(h, 0.2, inplace=True) return h
def res_unit_default(x, scope, bn_idx, test): # BatchNorm is independent from parameter sharing C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): with nn.parameter_scope('bn_{}-a'.format(bn_idx)): h = PF.batch_normalization(x, batch_stat=not test) h = F.relu(h) h = PF.convolution(h, C, (3, 3), pad=(1, 1), with_bias=False) with nn.parameter_scope('bn_{}-b'.format(bn_idx)): h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) if not test: h = F.dropout(h, 0.25) with nn.parameter_scope('conv2'): h = PF.convolution(h, C, (3, 3), pad=(1, 1), with_bias=False) return x + h
def pad_conv(self, x, fdim, stride): h = PF.convolution(x, fdim, (4, 4), stride=stride, pad=(2, 2), **self.conv_opts) return h
def get_alex_feat(input_var): """ Exactly the same architecture used for LPIPS. This is a little bit modified version (can't use nnabla models!). """ assert input_var.shape[1] == 3 act1 = F.relu(PF.convolution(input_var, outmaps=64, kernel=( 11, 11), pad=(2, 2), stride=(4, 4), name="conv0"), True) act2 = F.relu(PF.convolution(F.max_pooling(act1, kernel=(3, 3), stride=( 2, 2)), outmaps=192, kernel=(5, 5), pad=(2, 2), name="conv3"), True) act3 = F.relu(PF.convolution(F.max_pooling(act2, kernel=(3, 3), stride=( 2, 2)), outmaps=384, kernel=(3, 3), pad=(1, 1), name="conv6"), True) act4 = F.relu(PF.convolution(act3, outmaps=256, kernel=( 3, 3), pad=(1, 1), name="conv8"), True) act5 = F.relu(PF.convolution(act4, outmaps=256, kernel=( 3, 3), pad=(1, 1), name="conv10"), True) return [act1, act2, act3, act4, act5]
def bsf_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, w_bias=False, name='convblock'): with nn.parameter_scope(name): h = PF.convolution(x, maps, kernel=kernel, pad=pad, stride=stride, with_bias=w_bias) z = h h = PF.batch_normalization(h, batch_stat=False) return F.relu(h + z)
def convblock(x, n, f_size=9, name=''): r = PF.convolution(x, n, kernel=(f_size, f_size), pad=(f_size // 2, f_size // 2), stride=(1, 1), name=name) return F.relu(r)
def cifar10_resnet23_prediction(ctx, scope, image, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope(scope): with nn.parameter_scope("conv1"): h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def __call__(self, z, m): # m has target image shape: (N, emb, H, W) # z: (N, z_dim) N = m.shape[0] H, W = self.image_shape sh = H // (2**self.num_upsample) sw = W // (2**self.num_upsample) with ps("spade_generator"): with ps("z_embedding"): x = PF.affine(z, 16 * self.nf * sh * sw, w_init=w_init(z, 16 * self.nf * sh * sw)) x = F.reshape(x, (N, 16 * self.nf, sh, sw)) with ps("head"): x = self.head_0(x, m) with ps("middle0"): x = self.up(x) x = self.G_middle_0(x, m) with ps("middel1"): if self.num_upsample > 5: x = self.up(x) x = self.G_middle_1(x, m) with ps("up0"): x = self.up(x) x = self.up_0(x, m) with ps("up1"): x = self.up(x) x = self.up_1(x, m) with ps("up2"): x = self.up(x) x = self.up_2(x, m) with ps("up3"): x = self.up(x) x = self.up_3(x, m) if self.num_upsample > 6: with ps("up4"): x = self.up(x) x = self.up_4(x, m) with ps("last_conv"): x = PF.convolution(F.leaky_relu(x, 2e-1), 3, kernel=(3, 3), pad=(1, 1), w_init=w_init(x, 3)) x = F.tanh(x) return x
def vectorizer(x, maxh=256, test=False, output_hidden=False): """ Building discriminator network which maps a (B, 1, 28, 28) input to a (B, 100). """ # Define shortcut functions def bn(xx): # Batch normalization return PF.batch_normalization(xx, batch_stat=not test) def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 8 > 0 with nn.parameter_scope("dis"): # (1, 28, 28) --> (32, 16, 16) if not test: x_ = F.image_augmentation(x,min_scale=0.9,max_scale=1.08) x2 = F.random_shift(x_,(2,2)) with nn.parameter_scope("conv1"): c1 = F.elu(bn(PF.convolution(x2, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) else: with nn.parameter_scope("conv1"): c1 = F.elu(bn(PF.convolution(x, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 16, 16) --> (64, 8, 8) with nn.parameter_scope("conv2"): c2 = F.elu(bn(downsample2(c1, maxh / 4))) # (64, 8, 8) --> (128, 4, 4) with nn.parameter_scope("conv3"): c3 = F.elu(bn(downsample2(c2, maxh / 2))) # (128, 4, 4) --> (256, 4, 4) with nn.parameter_scope("conv4"): c4 = bn(PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False)) # (256, 4, 4) --> (1,) with nn.parameter_scope("fc1"): #print "c4fdafafa",c4.shape #f = PF.affine(c4, 100) f = PF.convolution(c4, 100, (4, 4), pad=(0, 0), with_bias=False) if output_hidden: return f, [c1, c2, c3, c4] return f
def discriminator(x, maxh=256, test=False, output_hidden=False): """ Building discriminator network which maps a (B, 1, 28, 28) input to a (B, 1). """ # Define shortcut functions def bn(xx): # Batch normalization return PF.batch_normalization(xx, batch_stat=not test) def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 8 > 0 with nn.parameter_scope("dis"): # (1, 28, 28) --> (32, 16, 16) with nn.parameter_scope("conv1"): c1 = F.elu( bn( PF.convolution(x, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 16, 16) --> (64, 8, 8) with nn.parameter_scope("conv2"): c2 = F.elu(bn(downsample2(c1, maxh / 4))) # (64, 8, 8) --> (128, 4, 4) with nn.parameter_scope("conv3"): c3 = F.elu(bn(downsample2(c2, maxh / 2))) # (128, 4, 4) --> (256, 4, 4) with nn.parameter_scope("conv4"): c4 = bn( PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False)) # (256, 4, 4) --> (1,) with nn.parameter_scope("fc1"): f = PF.affine(c4, 1) if output_hidden: return f, [c1, c2, c3, c4] return f
def network(self, x, test=False): # Input -> 1,28,28 # Convolution -> 16,22,22 with nn.parameter_scope('Convolution'): h = PF.convolution(x, 16, (7, 7), (0, 0)) # ReLU h = F.relu(h, True) # BatchNormalization with nn.parameter_scope('BatchNormalization'): h = PF.batch_normalization(h, (1, ), 0.9, 0.0001, not test) # MaxPooling -> 16,11,11 h = F.max_pooling(h, (2, 2), (2, 2), True) # Convolution_2 -> 30,9,9 with nn.parameter_scope('Convolution_2'): h = PF.convolution(h, 30, (3, 3), (0, 0)) # ReLU_2 h = F.relu(h, True) # BatchNormalization_2 with nn.parameter_scope('BatchNormalization_2'): h = PF.batch_normalization(h, (1, ), 0.9, 0.0001, not test) # MaxPooling_2 -> 30,4,4 h = F.max_pooling(h, (2, 2), (2, 2), True) # Affine -> 160 with nn.parameter_scope('Affine'): h = PF.affine(h, (160, )) # ReLU_3 h = F.relu(h, True) # BatchNormalization_3 with nn.parameter_scope('BatchNormalization_3'): h = PF.batch_normalization(h, (1, ), 0.9, 0.0001, not test) # Affine_2 -> 26 with nn.parameter_scope('Affine_2'): h = PF.affine(h, (26, )) return h
def residual_block_5C(x, num_output_channel=64, growth_channel=32): conv1 = F.leaky_relu(PF.convolution(x, growth_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv1'), alpha=0.2, inplace=True) conv2 = F.leaky_relu(PF.convolution(F.concatenate(x, conv1, axis=1), growth_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv2'), alpha=0.2, inplace=True) conv3 = F.leaky_relu(PF.convolution(F.concatenate(x, conv1, conv2, axis=1), growth_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv3'), alpha=0.2, inplace=True) conv4 = F.leaky_relu(PF.convolution(F.concatenate(x, conv1, conv2, conv3, axis=1), growth_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv4'), alpha=0.2, inplace=True) conv5 = PF.convolution(F.concatenate(x, conv1, conv2, conv3, conv4, axis=1), num_output_channel, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='conv5') return (conv5 * 0.2) + x
def spade(x, m, hidden_dim=128, kernel=(3, 3), norm_type="in"): """ Spatially-Adaptive Normalization proposed in Semantic Image Synthesis with Spatially-Adaptive Normalization (https://arxiv.org/pdf/1903.07291.pdf). Args: x (nn.Variable): Input variable for spade layer. m (nn.Variable): Spatial condition variable like object_id mask segmentation. This is for generating adaptive scale(gamma) and adaptice bias(beta) applied after normalization. hidden_dim (int): Hidden dims for first convolution applied to m. kernel (list of int): Kernel shapes for convolutions. norm_type (str) : A type of normalization. ["in", "bn"] are supported now. """ # x: (N, Cx, H, W), m: (N, Cm, H, W) assert len(x.shape) == 4 and len(m.shape) == 4 pad = tuple(i // 2 for i in kernel) c_dim = x.shape[1] conv_args = dict(kernel=kernel, pad=pad) with ps("spatial_adaptive_normalization"): normalized = _normalize(x, norm_type) m = F.interpolate(m, output_size=x.shape[2:], mode="nearest") with ps("shared"): actv = F.relu( PF.convolution(m, hidden_dim, w_init=w_init(m, hidden_dim), **conv_args)) with ps("gamma"): gamma = PF.convolution(actv, c_dim, w_init=w_init(actv, c_dim), **conv_args) with ps("beta"): beta = PF.convolution(actv, c_dim, w_init=w_init(actv, c_dim), **conv_args) return normalized * gamma + beta
def cf_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, channel_last=False, name='cf-convblock'): axes = get_channel_axes(channel_last) h = x with nn.parameter_scope(name): h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, channel_last=channel_last, with_bias=False) h = PF.batch_normalization(h, axes=axes, batch_stat=not test) return F.relu(h + x)
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 3) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1.05e-2)
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.prelu, test=False): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) h = PF.batch_normalization(h, batch_stat=not test) shape = h.shape w = nn.Variable() w.d = 0.1 h = act(h, w) return h
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.prelu, test=False): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) h = PF.batch_normalization(h, batch_stat=not test) shape = h.shape w = nn.Variable() w.d = 0.3 h = act(h, w) return h
def front_end(self, x, channels): with nn.parameter_scope("first_layer"): pad_width = get_symmetric_padwidth(3, channel_last=self.channel_last) h = F.pad(x, pad_width=pad_width, mode=self.padding_type) h = PF.convolution(h, channels[0], (7, 7), **self.conv_opts) h = self.instance_norm_relu(h) for i, channel in enumerate(channels[1:]): with nn.parameter_scope("down_sample_layer_{}".format(i)): h = PF.convolution(h, channel, (3, 3), stride=(2, 2), pad=(1, 1), **self.conv_opts) h = self.instance_norm_relu(h) return h
def __call__(self, x, m): # x: (N, C, H, W) s = self.shortcut(x, m) hidden_dim = min(x.shape[1], self.out_dim) with ps("res_layer1"): h = spade(x, m) h = self.act(h) h = PF.convolution(h, hidden_dim, kernel=(3, 3), pad=(1, 1), w_init=w_init(h, hidden_dim), **self.conv_opts) with ps("res_layer2"): h = spade(h, m) h = self.act(h) h = PF.convolution(h, self.out_dim, kernel=(3, 3), pad=(1, 1), w_init=w_init(h, self.out_dim), **self.conv_opts) return s + h
def conv1x1(x, output_filter, scope, test): """ 1x1 convolution, works more like a constant multiplier. """ with nn.parameter_scope("1x1conv"): h = PF.convolution(x, output_filter, (1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) return h
def convolution(x, n, kernel, stride, pad, init_method=None): if init_method == "paper": init = nn.initializer.NormalInitializer(0.02) else: s = nn.initializer.calc_normal_std_glorot(x.shape[1], n, kernel=kernel) init = nn.initializer.NormalInitializer(s) x = PF.convolution(x, n, kernel=kernel, stride=stride, pad=pad, with_bias=True, w_init=init) return x
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definition nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 8) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 8) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert_allclose(ngrad, agrad, atol=1.05e-2)
def res_block(x, scope_name, act=F.relu, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C/2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C/2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) return h
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() L.forward(clear_no_need_grad=cnng) L.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() assert np.all(g == g2)
def mnist_resnet_prediction(image, test=False): """ Construct ResNet for MNIST. """ image /= 255.0 def bn(x): return PF.batch_normalization(x, batch_stat=not test) def res_unit(x, scope): C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): h = F.elu(bn(PF.convolution(x, C / 2, (1, 1), with_bias=False))) with nn.parameter_scope('conv2'): h = F.elu( bn(PF.convolution(h, C / 2, (3, 3), pad=(1, 1), with_bias=False))) with nn.parameter_scope('conv3'): h = bn(PF.convolution(h, C, (1, 1), with_bias=False)) return F.elu(F.add2(h, x, inplace=True)) # Conv1 --> 64 x 32 x 32 with nn.parameter_scope("conv1"): c1 = F.elu( bn(PF.convolution(image, 64, (3, 3), pad=(3, 3), with_bias=False))) # Conv2 --> 64 x 16 x 16 c2 = F.max_pooling(res_unit(c1, "conv2"), (2, 2)) # Conv3 --> 64 x 8 x 8 c3 = F.max_pooling(res_unit(c2, "conv3"), (2, 2)) # Conv4 --> 64 x 8 x 8 c4 = res_unit(c3, "conv4") # Conv5 --> 64 x 4 x 4 c5 = F.max_pooling(res_unit(c4, "conv5"), (2, 2)) # Conv5 --> 64 x 4 x 4 c6 = res_unit(c5, "conv6") pl = F.average_pooling(c6, (4, 4)) with nn.parameter_scope("classifier"): y = PF.affine(pl, 10) return y
def generator(z, maxh=256, test=False, output_hidden=False): """ Building generator network which takes (B, Z, 1, 1) inputs and generates (B, 1, 28, 28) outputs. """ # Define shortcut functions def bn(x): # Batch normalization return PF.batch_normalization(x, batch_stat=not test) def upsample2(x, c): # Twise upsampling with deconvolution. return PF.deconvolution(x, c, kernel=(4, 4), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 4 > 0 with nn.parameter_scope("gen"): # (Z, 1, 1) --> (256, 4, 4) with nn.parameter_scope("deconv1"): d1 = F.elu(bn(PF.deconvolution(z, maxh, (4, 4), with_bias=False))) # (256, 4, 4) --> (128, 8, 8) with nn.parameter_scope("deconv2"): d2 = F.elu(bn(upsample2(d1, maxh / 2))) # (128, 8, 8) --> (64, 16, 16) with nn.parameter_scope("deconv3"): d3 = F.elu(bn(upsample2(d2, maxh / 4))) # (64, 16, 16) --> (32, 28, 28) with nn.parameter_scope("deconv4"): # Convolution with kernel=4, pad=3 and stride=2 transforms a 28 x 28 map # to a 16 x 16 map. Deconvolution with those parameters behaves like an # inverse operation, i.e. maps 16 x 16 to 28 x 28. d4 = F.elu(bn(PF.deconvolution( d3, maxh / 8, (4, 4), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 28, 28) --> (1, 28, 28) with nn.parameter_scope("conv5"): x = F.tanh(PF.convolution(d4, 1, (3, 3), pad=(1, 1))) if output_hidden: return x, [d1, d2, d3, d4] return x
def cifar100_resnet23_prediction(image, ctx, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, rng, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): w_init = UniformInitializer( calc_uniform_lim_glorot(C, C / 2, kernel=(1, 1)), rng=rng) h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): w_init = UniformInitializer( calc_uniform_lim_glorot(C / 2, C / 2, kernel=(3, 3)), rng=rng) h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): w_init = UniformInitializer( calc_uniform_lim_glorot(C / 2, C, kernel=(1, 1)), rng=rng) h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices rng = np.random.RandomState(0) nmaps = 384 ncls = 100 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): # Preprocess if not test: image = F.image_augmentation(image, contrast=1.0, angle=0.25, flip_lr=True) image.need_grad = False w_init = UniformInitializer( calc_uniform_lim_glorot(3, nmaps, kernel=(3, 3)), rng=rng) h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), w_init=w_init, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", rng, False) # -> 32x32 h = res_unit(h, "conv3", rng, True) # -> 16x16 h = res_unit(h, "conv4", rng, False) # -> 16x16 h = res_unit(h, "conv5", rng, True) # -> 8x8 h = res_unit(h, "conv6", rng, False) # -> 8x8 h = res_unit(h, "conv7", rng, True) # -> 4x4 h = res_unit(h, "conv8", rng, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 w_init = UniformInitializer( calc_uniform_lim_glorot(int(np.prod(h.shape[1:])), ncls, kernel=(1, 1)), rng=rng) pred = PF.affine(h, ncls, w_init=w_init) return pred
def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False)
def one_by_one_conv(h, scope, k=1, s=1, p=1): with nn.parameter_scope(scope): maps = h.shape[1] h = PF.convolution(h, maps, kernel=(k, k), stride=(s, s), pad=(1, 1)) return h
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.relu, test=False, cnt=0): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) h = batch_normalization(h, test=test) h = act(h) return h