def resnet_model(ctx, x, inmaps=64, act=F.relu, test=False): # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): h = PF.convolution(x, inmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) h = res_unit(h, "conv2", act, False) # -> 32x32 h = res_unit(h, "conv3", act, True) # -> 16x16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv4", act, False) # -> 16x16 h = res_unit(h, "conv5", act, True) # -> 8x8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv6", act, False) # -> 8x8 h = res_unit(h, "conv7", act, True) # -> 4x4 with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv8", act, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, 10) return pred
def cnn_model_003(ctx, x, act=F.relu, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) # Convblock 3 h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) return h
def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h
def cifar10_resnet23_prediction(ctx, scope, image, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope(scope): with nn.parameter_scope("conv1"): h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def res_unit(x, scope): C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): h = F.elu(bn(PF.convolution(x, C / 2, (1, 1), with_bias=False))) with nn.parameter_scope('conv2'): h = F.elu( bn(PF.convolution(h, C / 2, (3, 3), pad=(1, 1), with_bias=False))) with nn.parameter_scope('conv3'): h = bn(PF.convolution(h, C, (1, 1), with_bias=False)) return F.elu(F.add2(h, x, inplace=True))
def res_unit(x, scope): C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): h = F.elu(bn(PF.binary_connect_convolution( x, C / 2, (1, 1), with_bias=False))) with nn.parameter_scope('conv2'): h = F.elu( bn(PF.binary_connect_convolution(h, C / 2, (3, 3), pad=(1, 1), with_bias=False))) with nn.parameter_scope('conv3'): h = bn(PF.binary_connect_convolution( h, C, (1, 1), with_bias=False)) return F.elu(x + h)
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h_branch = h # Convblock 3 h = conv_unit(h_branch, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u0 = conv_unit(h_branch, "u0", 10, k=1, s=1, p=0, act=act, test=test) u0 = F.average_pooling(u0, (6, 6)) with nn.parameter_scope("u0bn"): u0 = PF.batch_normalization(u0, batch_stat=not test) log_var = F.reshape(u0, (u0.shape[0], np.prod(u0.shape[1:]))) # Uncertainty for uncertainty u1 = conv_unit(h_branch, "u1", 10, k=1, s=1, p=0, act=act, test=test) u1 = F.average_pooling(u1, (6, 6)) with nn.parameter_scope("u1bn"): u1 = PF.batch_normalization(u1, batch_stat=not test) log_s = F.reshape(u1, (u1.shape[0], np.prod(u1.shape[1:]))) return pred, log_var, log_s
def cnn_model_003(ctx, h, act=F.elu, do=True, test=False): with nn.context_scope(ctx): if not test: b, c, s, s = h.shape h = F.image_augmentation(h, (c, s, s), min_scale=1.0, max_scale=1.5, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True) # Convblock0 h = conv_unit(h, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (6, 6)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def mnist_binary_weight_lenet_prediction(image, test=False): """ Construct LeNet for MNIST (Binary Weight Network version). """ with nn.parameter_scope("conv1"): c1 = PF.binary_weight_convolution(image, 16, (5, 5)) c1 = F.elu(F.average_pooling(c1, (2, 2))) with nn.parameter_scope("conv2"): c2 = PF.binary_weight_convolution(c1, 16, (5, 5)) c2 = F.elu(F.average_pooling(c2, (2, 2))) with nn.parameter_scope("fc3"): c3 = F.elu(PF.binary_weight_affine(c2, 50)) with nn.parameter_scope("fc4"): c4 = PF.binary_weight_affine(c3, 10) return c4
def test_parameter_scope_slash(): """Testing if parameter_scope('aaa/bbb') works. """ import nnabla as nn from nnabla.parameter import get_parameter_or_create nn.clear_parameters() with nn.parameter_scope('aaa/bbb'): param = get_parameter_or_create('ccc', (2, 3, 4, 5)) ref = np.random.randn(*param.shape).astype(np.float32) param.d = ref with nn.parameter_scope('aaa'): with nn.parameter_scope('bbb'): param = get_parameter_or_create('ccc', (2, 3, 4, 5)) assert np.all(param.d == ref) nn.clear_parameters()
def test_parametric_function_api(): """ Testing :function:`nnabla.parametric_functions.parametric_function_api`. """ import nnabla as nn import inspect nn.clear_parameters() shape = (2, 3, 4) # Signature check spec = inspect.getargspec(dummy_parametric_function) assert spec.args == ['shape', 'f', 'i', 's', 'name'] assert spec.defaults == (10, 1, 'dummy', None) assert dummy_parametric_function.__doc__.splitlines()[0] == 'Doc' # Verify two different ways does the same thing. # Using name argument v = dummy_parametric_function(shape, name='group1') # Using parameter_scope with nn.parameter_scope('group1'): v = dummy_parametric_function(shape) params = nn.get_parameters() assert len(params) == 2 assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2'] # No scope v = dummy_parametric_function(shape) params = nn.get_parameters() len(params) == 4 assert list(iterkeys(params)) == ['group1/dummy/p1', 'group1/dummy/p2', 'dummy/p1', 'dummy/p2'] nn.clear_parameters()
def cnn_ae_model_000(ctx, x, act=F.relu, test=False): with nn.parameter_scope("ae"): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv03", 32, k=4, s=2, p=1, act=act, test=test) # 32 -> 16 if not test: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv13", 64, k=4, s=2, p=1, act=act, test=test) # 16 -> 8 if not test: h = F.dropout(h) # Deconvblock0 h = deconv_unit(h, "deconv00", 64, k=4, s=2, p=1, act=act, test=test) # 8 -> 16 h = deconv_unit(h, "deconv01", 64, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv02", 64, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv03", 64, k=3, s=1, p=1, act=act, test=test) # Deconvblock 1 h = deconv_unit(h, "deconv10", 32, k=4, s=2, p=1, act=act, test=test) # 16 -> 32 h = deconv_unit(h, "deconv11", 32, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv12", 32, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv13", 3, k=3, s=1, p=1, act=None, test=test) return h
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.relu, test=False): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) if act is None: return h h = PF.batch_normalization(h, batch_stat=not test) h = act(h) return h
def res_block(x, scope_name, act=F.relu, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C/2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C/2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) return h
def conv_unit(x, scope, maps, k=4, s=2, p=1, act=F.prelu, test=False): with nn.parameter_scope(scope): h = PF.convolution(x, maps, kernel=(k, k), stride=(s, s), pad=(p, p)) h = PF.batch_normalization(h, batch_stat=not test) shape = h.shape w = nn.Variable() w.d = 0.3 h = act(h, w) return h
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 28 -> 14 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 14 -> 7 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 7 -> 5 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (5, 5)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (5, 5)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def test_graph_model(model, seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4], need_grad=True) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) nn.set_default_context(nn.Context()) # Forwardprop by definintion nn.clear_parameters() if model == "mlp": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) elif model == "recurrent": with nn.parameter_scope('fc1'): z = PF.affine(x, 3) z2 = F.relu(z, inplace=True) h = z2 for _ in range(2): with nn.parameter_scope('fc2'): h = PF.affine(h, 3) h = F.relu(h, inplace=True) with nn.parameter_scope('fc3'): z3 = PF.affine(h, 5) elif model == "convolution": with nn.parameter_scope('conv1'): z = PF.convolution(x, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) else: raise ValueError() l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop L.forward(clear_no_need_grad=True) # Backprop # Diff should be initialized since they are always accumulated x.grad.zero() L.backward(clear_buffer=True) x.g = rng.randn(*x.shape) parameters = nn.get_parameters() for param in parameters.values(): param.grad.zero() inputs = [x] + list(parameters.values()) from nbla_test_utils import \ compute_analytical_and_numerical_grad_graph as grads agrad, ngrad = grads(L, inputs, 1e-3) assert np.allclose(ngrad, agrad, atol=1.05e-2)
def test_graph_unlink_backward(seed): rng = np.random.RandomState(seed) x0 = nn.Variable([2, 4], need_grad=True) x1 = nn.Variable([2, 4], need_grad=True) x0.d = rng.randn(*x0.shape) x1.d = rng.randn(*x1.shape) x0.grad.zero() x1.grad.zero() with nn.auto_forward(): with nn.parameter_scope("fc0"): h0 = PF.affine(x0, 2) with nn.parameter_scope("fc1"): h1 = PF.affine(x1, 2) h0.need_grad = False h = h0 + h1 with nn.parameter_scope("fc"): y = PF.affine(h, 1) y.backward(clear_buffer=True) assert np.all(x0.g == 0) assert not np.all(x1.g == 0)
def mnist_binary_connect_lenet_prediction(image, test=False): """ Construct LeNet for MNIST (BinaryNet version). """ with nn.parameter_scope("conv1"): c1 = PF.binary_connect_convolution(image, 16, (5, 5)) c1 = PF.batch_normalization(c1, batch_stat=not test) c1 = F.elu(F.average_pooling(c1, (2, 2))) with nn.parameter_scope("conv2"): c2 = PF.binary_connect_convolution(c1, 16, (5, 5)) c2 = PF.batch_normalization(c2, batch_stat=not test) c2 = F.elu(F.average_pooling(c2, (2, 2))) with nn.parameter_scope("fc3"): c3 = PF.binary_connect_affine(c2, 50) c3 = PF.batch_normalization(c3, batch_stat=not test) c3 = F.elu(c3) with nn.parameter_scope("fc4"): c4 = PF.binary_connect_affine(c3, 10) c4 = PF.batch_normalization(c4, batch_stat=not test) return c4
def discriminator(x, maxh=256, test=False, output_hidden=False): """ Building discriminator network which maps a (B, 1, 28, 28) input to a (B, 1). """ # Define shortcut functions def bn(xx): # Batch normalization return PF.batch_normalization(xx, batch_stat=not test) def downsample2(xx, c): return PF.convolution(xx, c, (3, 3), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 8 > 0 with nn.parameter_scope("dis"): # (1, 28, 28) --> (32, 16, 16) with nn.parameter_scope("conv1"): c1 = F.elu(bn(PF.convolution(x, maxh / 8, (3, 3), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 16, 16) --> (64, 8, 8) with nn.parameter_scope("conv2"): c2 = F.elu(bn(downsample2(c1, maxh / 4))) # (64, 8, 8) --> (128, 4, 4) with nn.parameter_scope("conv3"): c3 = F.elu(bn(downsample2(c2, maxh / 2))) # (128, 4, 4) --> (256, 4, 4) with nn.parameter_scope("conv4"): c4 = bn(PF.convolution(c3, maxh, (3, 3), pad=(1, 1), with_bias=False)) # (256, 4, 4) --> (1,) with nn.parameter_scope("fc1"): f = PF.affine(c4, 1) if output_hidden: return f, [c1, c2, c3, c4] return f
def test_save_load_parameters(): v = nn.Variable([64, 1, 28, 28], need_grad=False) with nn.parameter_scope("param1"): with nn.parameter_scope("conv1"): h = PF.convolution(v, 32, (3, 3)) b = PF.batch_normalization(h, batch_stat=True) with nn.parameter_scope("conv2"): h1 = PF.convolution(v, 32, (3, 3)) b2 = PF.batch_normalization(h1, batch_stat=True) for k, v in iteritems(nn.get_parameters(grad_only=False)): v.data.cast(np.float32)[...] = np.random.randn(*v.shape) with nn.parameter_scope("param1"): param1 = nn.get_parameters(grad_only=False) nn.save_parameters("tmp.h5") nn.save_parameters("tmp.protobuf") with nn.parameter_scope("param2"): nn.load_parameters('tmp.h5') param2 = nn.get_parameters(grad_only=False) with nn.parameter_scope("param3"): nn.load_parameters('tmp.protobuf') param3 = nn.get_parameters(grad_only=False) for par2 in [param2, param3]: assert param1.keys() == par2.keys() # Check order for (n1, p1), (n2, p2) in zip(sorted(param1.items()), sorted(par2.items())): assert n1 == n2 assert np.all(p1.d == p2.d) assert p1.data.dtype == p2.data.dtype assert p1.need_grad == p2.need_grad
def test_graph_clear_buffer(seed): np.random.seed(313) rng = np.random.RandomState(seed) x = nn.Variable([2, 3, 4, 4]) t = nn.Variable([2, 1]) x.d = rng.randn(*x.shape) t.d = rng.randint(0, 5, size=t.shape) # Network definition nn.set_default_context(nn.Context()) nn.clear_parameters() x1 = x + 1 x2 = x1 - 1 with nn.parameter_scope('conv1'): z = PF.convolution(x2, 3, (2, 2)) z2 = F.relu(z, inplace=True) with nn.parameter_scope('fc2'): z3 = PF.affine(z2, 5) l = F.softmax_cross_entropy(z3, t, 1) L = F.mean(l) # Forwardprop import tempfile import os tmpd = tempfile.mkdtemp() nn.save_parameters(os.path.join(tmpd, 'parameter.h5')) first = False for cnng in [False, True]: for cb in [False, True]: _ = nn.load_parameters(os.path.join(tmpd, 'parameter.h5')) for v in nn.get_parameters().values(): v.grad.zero() L.forward(clear_no_need_grad=cnng) L.backward(clear_buffer=cb) if not first: first = True g = list(nn.get_parameters().values())[0].g.copy() else: g2 = list(nn.get_parameters().values())[0].g.copy() assert np.all(g == g2)
def mnist_resnet_prediction(image, test=False): """ Construct ResNet for MNIST. """ image /= 255.0 def bn(x): return PF.batch_normalization(x, batch_stat=not test) def res_unit(x, scope): C = x.shape[1] with nn.parameter_scope(scope): with nn.parameter_scope('conv1'): h = F.elu(bn(PF.convolution(x, C / 2, (1, 1), with_bias=False))) with nn.parameter_scope('conv2'): h = F.elu( bn(PF.convolution(h, C / 2, (3, 3), pad=(1, 1), with_bias=False))) with nn.parameter_scope('conv3'): h = bn(PF.convolution(h, C, (1, 1), with_bias=False)) return F.elu(F.add2(h, x, inplace=True)) # Conv1 --> 64 x 32 x 32 with nn.parameter_scope("conv1"): c1 = F.elu( bn(PF.convolution(image, 64, (3, 3), pad=(3, 3), with_bias=False))) # Conv2 --> 64 x 16 x 16 c2 = F.max_pooling(res_unit(c1, "conv2"), (2, 2)) # Conv3 --> 64 x 8 x 8 c3 = F.max_pooling(res_unit(c2, "conv3"), (2, 2)) # Conv4 --> 64 x 8 x 8 c4 = res_unit(c3, "conv4") # Conv5 --> 64 x 4 x 4 c5 = F.max_pooling(res_unit(c4, "conv5"), (2, 2)) # Conv5 --> 64 x 4 x 4 c6 = res_unit(c5, "conv6") pl = F.average_pooling(c6, (4, 4)) with nn.parameter_scope("classifier"): y = PF.affine(pl, 10) return y
def mlp_net(x, n_h, n_y, test=False): """ Function for building multi-layer-perceptron with batch_normalization Args: x(`~nnabla.Variable`): N-D array n_h(int): number of units in an intermediate layer n_y(int): number of classes test: operation type train=True, test=False Returns: ~nnabla.Variable: log(p(y|x)) """ h = x with nn.parameter_scope("fc1"): h = F.relu(PF.batch_normalization( PF.affine(h, n_h), batch_stat=not test), inplace=True) with nn.parameter_scope("fc2"): h = F.relu(PF.batch_normalization( PF.affine(h, n_h), batch_stat=not test), inplace=True) with nn.parameter_scope("fc3"): h = PF.affine(h, n_y) return h
def bn_opp_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, channel_last=False, name='convblock'): axes = get_channel_axes(channel_last) with nn.parameter_scope(name): h = PF.batch_normalization(x, axes=axes, batch_stat=not test) z = h h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, channel_last=channel_last, with_bias=True) return F.relu(z + h)
def create_ema_op(params, ema_decay=0.9999): """ Define exponential moving average update for trainable params. """ def ema_update(p_ema, p_train): return F.assign(p_ema, ema_decay * p_ema + (1. - ema_decay) * p_train) ops = [] with nn.parameter_scope("ema"): for name, p_train in params.items(): p_ema = get_parameter_or_create(name, shape=p_train.shape, need_grad=False) p_ema.data.copy_from(p_train.data, use_current_context=False) # initialize ops.append(ema_update(p_ema, p_train)) ema_params = nn.get_parameters(grad_only=False) return F.sink(*ops), ema_params
def mnist_lenet_prediction_slim(image, scope="slim", rrate=0.75, test=False): """ Construct LeNet for MNIST. """ with nn.parameter_scope(scope): image /= 255.0 c1 = PF.convolution(image, 16, (5, 5), name='conv1') c1 = F.relu(F.max_pooling(c1, (2, 2)), inplace=True) c2 = PF.convolution(c1, 16, (5, 5), name='conv2') c2 = F.relu(F.max_pooling(c2, (2, 2)), inplace=True) # SVD applied inmaps = np.prod(c2.shape[1:]) # c * h * w outmaps0 = 50 # original outmaps outmaps1 = reduce_maps(inmaps, outmaps0, rrate) d0 = F.relu(PF.affine(c2, outmaps1, name='fc-d0'), inplace=True) d1 = F.relu(PF.affine(d0, outmaps0, name='fc-d1'), inplace=True) c4 = PF.affine(d1, 10, name='fc4') return c4
def fpq_relu_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, sign=False, n=8, delta=2e-4, name="convblock"): h = x with nn.parameter_scope(name): h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) return F.fixed_point_quantize(h + x, n=n, sign=sign, delta=delta)
def get_fnet_output(conf, rnn_length, frame_t_pre, frame_t, scope_name): """ Return the flow estimations for LR and HR from flow-estimator network """ fnet_input = F.concatenate(frame_t_pre, frame_t) fnet_input = F.reshape(fnet_input, (conf.train.batch_size * (rnn_length - 1), conf.train.crop_size, conf.train.crop_size, 2 * 3)) with nn.parameter_scope(scope_name + "fnet"): flow_lr = flow_estimator(fnet_input) flow_hr = upscale_four(flow_lr * 4.0) # a linear up-sampling flow_hr = F.reshape(flow_hr, (conf.train.batch_size, (rnn_length - 1), conf.train.crop_size * 4, conf.train.crop_size * 4, 2), inplace=False) fnet_output = collections.namedtuple('fnet_output', 'flow_lr, flow_hr') return fnet_output( flow_lr=flow_lr, flow_hr=flow_hr, )
def network(x, y_index, test=False): # Input -> 3,64,64 # Convolution -> 16,31,31 with nn.parameter_scope('Convolution'): h = PF.convolution(x, 16, (3, 3), (0, 0), (2, 2)) # Tanh h = F.tanh(h) # MaxPooling -> 16,16,11 h = F.max_pooling(h, (2, 3), (2, 3)) # Dropout if not test: h = F.dropout(h) # Convolution_2 -> 32,6,5 with nn.parameter_scope('Convolution_2'): h = PF.convolution(h, 32, (5, 3), (0, 0), (2, 2)) # ReLU_4 h = F.relu(h, True) # MaxPooling_2 -> 32,3,3 h = F.max_pooling(h, (2, 2), (2, 2)) # Dropout_2 if not test: h = F.dropout(h) # Convolution_3 -> 64,1,1 with nn.parameter_scope('Convolution_3'): h = PF.convolution(h, 64, (3, 3), (0, 0), (2, 2)) # Tanh_2 h = F.tanh(h) # Dropout_3 if not test: h = F.dropout(h) # Affine -> 50 with nn.parameter_scope('Affine'): h = PF.affine(h, (50, )) # ReLU_2 h = F.relu(h, True) # Dropout_4 if not test: h = F.dropout(h) # Affine_2 -> 5 with nn.parameter_scope('Affine_2'): h = PF.affine(h, (5, )) # ELU h = F.elu(h) # Affine_3 -> 1 with nn.parameter_scope('Affine_3'): h = PF.affine(h, (1, )) # SquaredError #h = F.squared_error(h, y_index) return h
def clbn_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, bias_w=False, name='clbn-convblock'): h = x with nn.parameter_scope(name): h = PF.convolution(x, maps, kernel=kernel, pad=pad, stride=stride, channel_last=True, with_bias=bias_w) z = h h = PF.batch_normalization(h, axes=[3], batch_stat=not test) return F.relu(h + z)
def generator(z, maxh=256, test=False, output_hidden=False): """ Building generator network which takes (B, Z, 1, 1) inputs and generates (B, 1, 28, 28) outputs. """ # Define shortcut functions def bn(x): # Batch normalization return PF.batch_normalization(x, batch_stat=not test) def upsample2(x, c): # Twise upsampling with deconvolution. return PF.deconvolution(x, c, kernel=(4, 4), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 4 > 0 with nn.parameter_scope("gen"): # (Z, 1, 1) --> (256, 4, 4) with nn.parameter_scope("deconv1"): d1 = F.elu(bn(PF.deconvolution(z, maxh, (4, 4), with_bias=False))) # (256, 4, 4) --> (128, 8, 8) with nn.parameter_scope("deconv2"): d2 = F.elu(bn(upsample2(d1, maxh / 2))) # (128, 8, 8) --> (64, 16, 16) with nn.parameter_scope("deconv3"): d3 = F.elu(bn(upsample2(d2, maxh / 4))) # (64, 16, 16) --> (32, 28, 28) with nn.parameter_scope("deconv4"): # Convolution with kernel=4, pad=3 and stride=2 transforms a 28 x 28 map # to a 16 x 16 map. Deconvolution with those parameters behaves like an # inverse operation, i.e. maps 16 x 16 to 28 x 28. d4 = F.elu( bn( PF.deconvolution(d3, maxh / 8, (4, 4), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 28, 28) --> (1, 28, 28) with nn.parameter_scope("conv5"): x = F.tanh(PF.convolution(d4, 1, (3, 3), pad=(1, 1))) if output_hidden: return x, [d1, d2, d3, d4] return x
def __init__(self, embedding_dim, num_embedding, commitment_cost, rng, scope_name='vector_quantizer'): self.embedding_dim = embedding_dim self.num_embedding = num_embedding self.commitment_cost = commitment_cost self.rng = rng self.scope_name = scope_name with nn.parameter_scope(scope_name): self.embedding_weight = nn.parameter.get_parameter_or_create( 'W', shape=(self.num_embedding, self.embedding_dim), initializer=I.UniformInitializer( (-1. / self.num_embedding, 1. / self.num_embedding), rng=self.rng), need_grad=True)
def res_block(self, x, scope_name='res_block', test=False): with nn.parameter_scope(scope_name): out = F.relu(x, inplace=True) out = PF.convolution(out, self.num_hidden, (3, 3), stride=(1, 1), pad=(1, 1), with_bias=False, name='conv_1', rng=self.rng) out = PF.batch_normalization(out, name='bn_1', batch_stat=not test) out = F.relu(out, inplace=True) out = PF.convolution(out, self.num_hidden, (1, 1), stride=(1, 1), with_bias=False, name='conv_2', rng=self.rng) out = PF.batch_normalization(out, name='bn_2', batch_stat=not test) return x + out
def sameblock(x, out_features, kernel_size=3, padding=1, groups=1, test=False, comm=None): if comm: batchnorm = functools.partial(PF.sync_batch_normalization, comm=comm, group='world', axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) else: # 1 GPU batchnorm = functools.partial(PF.batch_normalization, axes=[1], decay_rate=0.9, eps=1e-05, batch_stat=not test) inmaps, outmaps = x.shape[1], out_features k_w = I.calc_normal_std_he_forward( inmaps, outmaps, kernel=(kernel_size, kernel_size)) / np.sqrt(2.) k_b = I.calc_normal_std_he_forward(inmaps, outmaps) / np.sqrt(2.) w_init = I.UniformInitializer((-k_w, k_w)) b_init = I.UniformInitializer((-k_b, k_b)) with nn.parameter_scope("downblock"): out = PF.convolution(x, outmaps=out_features, kernel=(kernel_size, kernel_size), pad=(padding, padding), group=groups, w_init=w_init, b_init=b_init) out = batchnorm(out) out = F.relu(out, inplace=True) return out
def requantize_bias(self, f, inputs, scope, rm, nr, dt, skip_bias=False): def with_bias(): return True if len(inputs) == 3 else False functions_with_bias = [ 'Affine', 'Convolution', 'Deconvolution', 'DepthwiseConvolution', 'DepthwiseDeconvolution' ] fn = f.info.type_name # handle bias. recalculate scale and zero point of bias if fn in functions_with_bias and with_bias(): x, w, b = inputs sx, zpx, sw = None, None, None # Get scale and zero_point of x sx, zpx = self.get_quantization_params(x) # Get scale and zero_point of w sw, _ = self.get_quantization_params(w) if sx is not None and sw is not None: sbd = np.reshape(sx.copy(), (1,)) * \ np.reshape(sw.copy(), (1,)) with nn.parameter_scope(scope): sb = nn.parameter.get_parameter_or_create( 'scale-b', (1, ), sbd, False) zpbd = np.reshape(zpx.copy(), (1, )) zpb = nn.parameter.get_parameter_or_create( 'zeropoint-b', (1, ), zpbd, False) if b.parent is not None and b.parent.info.type_name == 'QuantizeLinear': b = b.parent.inputs[0] # Quantize/Dequantize Bias only when skip bias is False b = F.quantize_linear(b, sb, zpb, rm, nr, dt) if not skip_bias else b inputs[2] = b return inputs
def modify(self, f, inputs): outputs = f.outputs[0] # Not end if len(outputs.function_references) == 0: return # Function is BN whose previous function is an inner-prod layer if f.info.type_name == 'BatchNormalization' \ and inputs[0].parent.info.type_name in self._fct_set: return inputs[0] # Next func is not BatchNorm next_func = outputs.function_references[0] if next_func.info.type_name != 'BatchNormalization': return # Variable is not forked if len(f.outputs[0].function_references) != 1: return # Function is inner-product layer if not f.info.type_name in self._fct_set: return ip_func = f bn_func = next_func w_data, b_data = self._compute_folded_parameters(ip_func, bn_func) x = inputs[0] scope = self.get_parameter_scope(inputs[1]) with nn.parameter_scope(scope): w = get_parameter_or_create('w-folded', inputs[1].shape, w_data, inputs[1].need_grad) b = get_parameter_or_create('b-folded', inputs[2].shape, b_data, inputs[2].need_grad) h = self.connect(f, x, w, b) return h
def test_graph_representation(self): h = nn.Variable((2, 1)) h.data.data[0] = 1 h.data.data[1] = 2 x = nn.Variable((2, 1)) x.data.data[0] = 2 x.data.data[1] = 4 with nn.parameter_scope("test_graph_representation"): r = L.graph_representation(h, x, 1, w_init=I.ConstantInitializer(1), b_init=I.ConstantInitializer(0)) self.assertEqual((1, 1), r.shape) r.forward() actual = 0 actual += 1 / (1 + math.exp(-3)) * math.tanh(3) actual += 1 / (1 + math.exp(-6)) * math.tanh(6) self.assertTrue(np.allclose(actual, r.data.data[0, 0]))
def inverted_residual(self, x, maps=32, kernel=(3, 3), stride=(1, 1), ef=6, act="relu", se=False, name="inv-resblock"): h = x c = h.shape[get_channel_axis(self.channel_last)] hmaps = round(c * ef) omaps = maps with nn.parameter_scope(name): h = self.conv_bn_act(h, hmaps, (1, 1), (1, 1), act=act, name="conv-pw") if ef != 1 else h h = self.conv_bn_act(h, hmaps, kernel, stride, group=hmaps, act=act, name="conv-dw") h = self.squeeze_and_excite( h, name="squeeze-and-excite") if se else h h = self.conv_bn(h, omaps, (1, 1), stride=( 1, 1), name="conv-pw-linear") use_res_connect = (stride == (1, 1) and c == omaps) if use_res_connect: h = x + h return h
def discriminator(x, scopename, maps=64, init_method=None): with nn.parameter_scope('discriminator'): with nn.parameter_scope(scopename): with nn.parameter_scope('conv1'): x = convolution(x, maps, kernel=(4, 4), pad=(1, 1), stride=(2, 2), init_method=init_method) x = F.leaky_relu(x, alpha=0.2) with nn.parameter_scope('conv2'): x = convblock(x, n=maps * 2, k=(4, 4), s=(2, 2), p=(1, 1), leaky=True, init_method=init_method) with nn.parameter_scope('conv3'): x = convblock(x, n=maps * 4, k=(4, 4), s=(2, 2), p=(1, 1), leaky=True, init_method=init_method) with nn.parameter_scope('conv4'): x = convblock(x, n=maps * 8, k=(4, 4), s=(1, 1), p=(1, 1), leaky=True, init_method=init_method) with nn.parameter_scope('conv5'): x = convolution(x, 1, kernel=(4, 4), pad=(1, 1), stride=(1, 1), init_method=init_method) return x
def decoder(self, x, test): with nn.parameter_scope('decoder'): out = self.decoder_res_stack(x, test=test) out = F.relu(out) out = PF.deconvolution(out, self.num_hidden, (4, 4), stride=(2, 2), pad=(1, 1), name='deconv_1', rng=self.rng) out = PF.batch_normalization(out, batch_stat=not test) out = F.relu(out) out = PF.deconvolution(out, self.in_channels, (4, 4), stride=(2, 2), pad=(1, 1), name='deconv_2', rng=self.rng) out = F.tanh(out) return out
def bn_self_folding_resblock(x, i, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), channel_last=False, name='convblock'): h = x with nn.parameter_scope(name): h = PF.convolution(h, maps, kernel=kernel, pad=pad, stride=stride, channel_last=channel_last, with_bias=False) axes = get_channel_axes(channel_last) a, b = create_scale_bias(1, h.shape, axes=axes) h = a * h + b return F.relu(h + x)
def res_block(x, out_ch, name): with nn.parameter_scope(name): residual = x out = F.pad(x, (1, 1, 1, 1), 'reflect') out = PF.convolution( out, out_ch, kernel=( 3, 3), stride=( 1, 1), name='conv1') out = F.instance_normalization( out, gamma=None, beta=None, channel_axis=1) out = PF.prelu(out) out = F.pad(out, (1, 1, 1, 1), 'reflect') out = PF.convolution( out, out_ch, kernel=( 3, 3), stride=( 1, 1), name='conv2') out = F.instance_normalization( out, gamma=None, beta=None, channel_axis=1) out += residual out = PF.prelu(out) return out
def _make_conv_level(x, ochannels, convs, test, stride=1, dilation=1, channel_last=False): axes = [3 if channel_last else 1] for i in range(convs): with nn.parameter_scope("conv{}".format(i + 1)): s = (stride, stride) if i == 0 else (1, 1) x = pf_convolution(x, ochannels, (3, 3), stride=s, pad=(dilation, dilation), dilation=(dilation, dilation), with_bias=False, channel_last=channel_last) x = F.relu( PF.batch_normalization(x, axes=axes, batch_stat=not test)) return x
def upsample(x, ochannels, test, kernel_size=4, channel_last=False): rng = np.random.RandomState(313) axes = 3 if channel_last else 1 with nn.parameter_scope("up"): x = pf_convolution(x, ochannels, (1, 1), stride=(1, 1), with_bias=False, w_init=he_initializer(ochannels, kernel_size, rng), channel_last=channel_last) x = F.relu(PF.batch_normalization(x, axes=[axes], batch_stat=not test)) ichannels = x.shape[axes] x = pf_depthwise_deconvolution(x, (kernel_size, kernel_size), pad=(1, 1), stride=(2, 2), dilation=(1, 1), with_bias=False, w_init=bilinear_depthwise_initializer( ichannels, kernel_size), channel_last=channel_last) return x
def res_block_2(x, c, k, d): r"""Residual block of type 2. Args: x (nn.Variable): Input variable of shape (B, C, L). c (int): Number of channels. k (int): Kernel size. d (tuple of `int`): Dilations. Returns: nn.Variable: Output variable. """ for i in range(len(d)): with nn.parameter_scope(f"conv_{i}"): out = F.leaky_relu(x, 0.1) out = wn_conv(out, c, (k, ), dilation=(d[i], ), pad=((k * d[i] - d[i]) // 2, )) x = x + out return x
def node_annotation(self): h = nn.Variable((1, 2)) h.data.data[0, 0] = 1 h.data.data[0, 1] = 0 x = nn.Variable((1, 1)) x.data.data[0] = 2 with nn.parameter_scope("test_node_annotation"): h, x = L.node_annotation(h, x, 1, w_init=I.ConstantInitializer(1), b_init=I.ConstantInitializer(0)) self.assertEqual((1, 2), h.shape) self.assertEqual((1, 1), x.shape) F.sink(h, x).forward() actual = 1 / (1 + math.exp(-3)) self.assertTrue(np.allclose(actual, h.data.data[0, 0])) self.assertEqual(0, h.data.data[0, 1]) self.assertTrue(np.allclose(actual, x.data.data[0, 0]))
def mapping_network(noise, outmaps=512, num_layers=8, net_scope='G_mapping/Dense'): """ a mapping network which embeds input noise into a vector in latent space. activation layer contains multiplication by np.sqrt(2). """ lrmul = 0.01 runtime_coef = 0.00044194172 out = noise for i in range(num_layers): with nn.parameter_scope(f'{net_scope}{i}'): W, bias = weight_init_fn(shape=(out.shape[1], outmaps), lrmul=lrmul) out = F.affine(out, W * runtime_coef, bias * lrmul) out = F.mul_scalar(F.leaky_relu(out, alpha=0.2, inplace=True), np.sqrt(2), inplace=True) return out
def node_representation(h, x, n_outmaps, w_init=None, b_init=None): """ Outputs node selection/representaiton model Arguments: h -- the input vertex representations (nnabla.Variable with shape (|V|, H)) x -- the input vertex annotation (nnabla.Variable with shape (|V|, X)) n_outmaps -- the size of node representation w_init -- (optional) b_init -- (optional) Return value - Return a variable with shape (|V|, n_outmaps) """ with nn.parameter_scope("node_representation"): return PF.affine(F.concatenate(h, x), n_outmaps, w_init=w_init, b_init=b_init)
def conv_bn_act(self, x, maps=32, kernel=(3, 3), stride=(1, 1), group=1, act="linear", name="conv-bn"): conv_opts = dict(stride=stride, group=group, channel_last=self.channel_last, with_bias=False) axes = [get_channel_axis(self.channel_last)] with nn.parameter_scope(name): h = pf_convolution(x, maps, kernel, **conv_opts) h = PF.batch_normalization(h, axes, batch_stat=not self.test, decay_rate=0.99) h = self.act_map[act](h) return h
def set_parameter(self, key, param, raise_if_missing=False): if key.startswith('@'): # Recursively set parameters pos = key.find('/') if pos < 0 or pos == len(key) - 1: raise ValueError(f'Invalid parameter key {key}.' ' A module parameter scope represented' ' as `@name` must be followed by `/`.') module_name, subkey = key[1:pos], key[pos + 1:] if module_name in self.submodules.keys(): self.submodules[module_name].set_parameter(subkey, param) elif raise_if_missing: raise ValueError( f'A child module {module_name[1:]} cannot be found in' '{this}. This error is raised because `raise_if_missing`' 'is specified as True. Please turn off if you allow it.') return # Set parameters with nn.parameter_scope('', self.parameter_scope): nn.parameter.set_parameter(key, param)
def upscale_four(inputs, scope='upscale_four'): """ Mimic the tensorflow bilinear-upscaling for a fix ratio of 4. """ with nn.parameter_scope(scope): b, h, w, c = inputs.shape p_inputs = F.concatenate( inputs, inputs[:, -1:, :, :], axis=1) # pad bottom p_inputs = F.concatenate( p_inputs, p_inputs[:, :, -1:, :], axis=2) # pad right hi_res_bin = [ [ inputs, # top-left p_inputs[:, :-1, 1:, :] # top-right ], [ p_inputs[:, 1:, :-1, :], # bottom-left p_inputs[:, 1:, 1:, :] # bottom-right ] ] hi_res_array = [] for hi in range(4): for wj in range(4): hi_res_array.append( hi_res_bin[0][0] * (1.0 - 0.25 * hi) * (1.0 - 0.25 * wj) + hi_res_bin[0][1] * (1.0 - 0.25 * hi) * (0.25 * wj) + hi_res_bin[1][0] * (0.25 * hi) * (1.0 - 0.25 * wj) + hi_res_bin[1][1] * (0.25 * hi) * (0.25 * wj) ) hi_res = F.stack(*hi_res_array, axis=3) # shape (b,h,w,16,c) hi_res_reshape = F.reshape(hi_res, (b, h, w, 4, 4, c)) hi_res_reshape = F.transpose(hi_res_reshape, (0, 1, 3, 2, 4, 5)) hi_res_reshape = F.reshape(hi_res_reshape, (b, h*4, w*4, c)) return hi_res_reshape
def discriminator(x, y, scopename="discriminator", maps=64, n_classes=1000, s=4, test=False, sn=True): with nn.parameter_scope(scopename): # Resblocks h = optblock_d(x, y, "block-1", n_classes, maps * 1, test=test, sn=sn) h = resblock_d(h, y, "block-2", n_classes, maps * 2, test=test, sn=sn) h = attnblock(h, sn=sn, test=test) h = resblock_d(h, y, "block-3", n_classes, maps * 4, test=test, sn=sn) h = resblock_d(h, y, "block-4", n_classes, maps * 8, test=test, sn=sn) h = resblock_d(h, y, "block-5", n_classes, maps * 16, test=test, sn=sn) h = resblock_d(h, y, "block-6", n_classes, maps * 16, downsample=False, test=test, sn=sn) # Last affine #h = F.leaky_relu(h, 0.2) h = F.relu(h) h = F.sum(h, axis=(2, 3)) o0 = affine(h, 1, sn=sn, test=test) # Project discriminator l, u = calc_uniform_lim_glorot(n_classes, maps * 16) e = embed(y, n_classes, maps * 16, initializer=UniformInitializer((l, u)), name="projection", sn=sn, test=test) o1 = F.sum(h * e, axis=1, keepdims=True) return o0 + o1
def load_files(ctx, file_loaders, filenames, extension=None): '''load_files Load files, if filesnames is not a list, we converted it to a list. If filenames is a list, we handle it one by one, with the handler tied with its postfix name. Args: ctx: A object that represents the context of sharing states with loaders. file_loaders (OrderedDict): List of handlers, tied with extension name. filenames (list): List of filenames. extension (str): File extension name, used to identify the file type Returns: None ''' def _load_files(): for filename in filenames: if isinstance(filename, str): _, ext = os.path.splitext(filename) else: ext = extension handled = False for supported_extensions, file_loader in file_loaders.items(): if ext in supported_extensions: file_loader(ctx, file_loaders, None, filename, ext) handled = True else: if not handled: logger.warn('{} is omitted.'.format(filename)) if isinstance(filenames, list) or isinstance(filenames, tuple): pass elif isinstance(filenames, str) or hasattr(filenames, 'read'): filenames = [filenames] if hasattr(ctx, 'parameter_scope'): with nn.parameter_scope('', ctx.parameter_scope): _load_files() else: _load_files()
def generator(z, maxh=256, test=False, output_hidden=False): """ Building generator network which takes (B, Z, 1, 1) inputs and generates (B, 1, 28, 28) outputs. """ # Define shortcut functions def bn(x): # Batch normalization return PF.batch_normalization(x, batch_stat=not test) def upsample2(x, c): # Twise upsampling with deconvolution. return PF.deconvolution(x, c, kernel=(4, 4), pad=(1, 1), stride=(2, 2), with_bias=False) assert maxh / 4 > 0 with nn.parameter_scope("gen"): # (Z, 1, 1) --> (256, 4, 4) with nn.parameter_scope("deconv1"): d1 = F.elu(bn(PF.deconvolution(z, maxh, (4, 4), with_bias=False))) # (256, 4, 4) --> (128, 8, 8) with nn.parameter_scope("deconv2"): d2 = F.elu(bn(upsample2(d1, maxh / 2))) # (128, 8, 8) --> (64, 16, 16) with nn.parameter_scope("deconv3"): d3 = F.elu(bn(upsample2(d2, maxh / 4))) # (64, 16, 16) --> (32, 28, 28) with nn.parameter_scope("deconv4"): # Convolution with kernel=4, pad=3 and stride=2 transforms a 28 x 28 map # to a 16 x 16 map. Deconvolution with those parameters behaves like an # inverse operation, i.e. maps 16 x 16 to 28 x 28. d4 = F.elu(bn(PF.deconvolution( d3, maxh / 8, (4, 4), pad=(3, 3), stride=(2, 2), with_bias=False))) # (32, 28, 28) --> (1, 28, 28) with nn.parameter_scope("conv5"): x = F.tanh(PF.convolution(d4, 1, (3, 3), pad=(1, 1))) if output_hidden: return x, [d1, d2, d3, d4] return x
def batch_normalization(h, cnt=0, test=False): with nn.parameter_scope("{}".format(cnt)): h = PF.batch_normalization(h, batch_stat=not test) return h
def compare_with_cpu_command(args): configure_progress(os.path.join(args.outdir, 'progress.txt')) class TrainConfig: pass class OptConfig: pass class MonConfig: pass # Load config with current context files = [] files.append(args.config) with nn.parameter_scope('current'): info = load.load(files) parameters = get_parameters(grad_only=False) config = TrainConfig() config.global_config = info.global_config config.training_config = info.training_config config.optimizers = OrderedDict() for name, opt in info.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config.optimizers[name] = o config.monitors = OrderedDict() for name, mon in info.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config.monitors[name] = m # Load config with cpu context files = [] files.append(args.config2) with nn.parameter_scope('cpu'): info_cpu = load.load(files) cpu_parameters = get_parameters(grad_only=False) config_cpu = TrainConfig() config_cpu.global_config = info_cpu.global_config config_cpu.training_config = info_cpu.training_config config_cpu.optimizers = OrderedDict() for name, opt in info_cpu.optimizers.items(): o = OptConfig() o.optimizer = opt o.data_iterator = None config_cpu.optimizers[name] = o config_cpu.monitors = OrderedDict() for name, mon in info_cpu.monitors.items(): m = MonConfig() m.monitor = mon m.data_iterator = None config_cpu.monitors[name] = m result_array = [['1-Correl']] # Profile Optimizer with ExitStack() as stack: for name, o in config.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) for name, o in config_cpu.optimizers.items(): o.data_iterator = stack.enter_context( o.optimizer.data_iterator()) result_array = compare_optimizer( config, parameters, config_cpu, cpu_parameters, result_array) # Write profiling result import csv with open(args.outdir + os.sep + 'compare_with_cpu.csv', 'w') as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(result_array) logger.log(99, 'Compare with CPU Completed.') progress(None)
def one_by_one_conv(h, scope, k=1, s=1, p=1): with nn.parameter_scope(scope): maps = h.shape[1] h = PF.convolution(h, maps, kernel=(k, k), stride=(s, s), pad=(1, 1)) return h
def cnn_model_003_with_cross_attention(ctx, x_list, act=F.relu, test=False): """With attention before pooling """ with nn.context_scope(ctx): # Convblock0 h0_list = [] for x in x_list: h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h0_list.append(h) # Corss attention ca0 = attention(h0_list[0], h0_list[1], h0_list[1], div_dim=True, softmax=True) ca1 = attention(h0_list[1], h0_list[0], h0_list[0], div_dim=True, softmax=True) # Maxpooing, Batchnorm, Dropout h0_list = [] for h in [ca0, ca1]: h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h0_list.append(h) # Convblock 1 h1_list = [] for h in h0_list: h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h1_list.append(h) # Corss attention ca0 = attention(h1_list[0], h1_list[1], h1_list[1], div_dim=True, softmax=True) ca1 = attention(h1_list[1], h1_list[0], h1_list[0], div_dim=True, softmax=True) # Maxpooing, Batchnorm, Dropout h1_list = [] for h in [ca0, ca1]: h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h1_list.append(h) # Convblock 2 h2_list = [] for h in h1_list: h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h2_list.append(h) # Corss attention ca0 = attention(h2_list[0], h2_list[1], h2_list[1], div_dim=True, softmax=True) ca1 = attention(h2_list[1], h2_list[0], h2_list[0], div_dim=True, softmax=True) # Convblock 3 h3_list = [] for h in [ca0, ca1]: h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) h3_list.append(h) return h3_list
def train(args): """ Main script. """ # Get context. from nnabla.contrib.context import extension_context extension_module = args.context if args.context is None: extension_module = 'cpu' logger.info("Running in %s" % extension_module) ctx = extension_context(extension_module, device_id=args.device_id) nn.set_default_context(ctx) # Create CNN network for both training and testing. # TRAIN # Fake path z = nn.Variable([args.batch_size, 100, 1, 1]) fake = generator(z) fake.persistent = True # Not to clear at backward pred_fake = discriminator(fake) loss_gen = F.mean(F.sigmoid_cross_entropy( pred_fake, F.constant(1, pred_fake.shape))) fake_dis = fake.unlinked() pred_fake_dis = discriminator(fake_dis) loss_dis = F.mean(F.sigmoid_cross_entropy( pred_fake_dis, F.constant(0, pred_fake_dis.shape))) # Real path x = nn.Variable([args.batch_size, 1, 28, 28]) pred_real = discriminator(x) loss_dis += F.mean(F.sigmoid_cross_entropy(pred_real, F.constant(1, pred_real.shape))) # Create Solver. solver_gen = S.Adam(args.learning_rate, beta1=0.5) solver_dis = S.Adam(args.learning_rate, beta1=0.5) with nn.parameter_scope("gen"): solver_gen.set_parameters(nn.get_parameters()) with nn.parameter_scope("dis"): solver_dis.set_parameters(nn.get_parameters()) # Create monitor. import nnabla.monitor as M monitor = M.Monitor(args.monitor_path) monitor_loss_gen = M.MonitorSeries("Generator loss", monitor, interval=10) monitor_loss_dis = M.MonitorSeries( "Discriminator loss", monitor, interval=10) monitor_time = M.MonitorTimeElapsed("Time", monitor, interval=100) monitor_fake = M.MonitorImageTile( "Fake images", monitor, normalize_method=lambda x: x + 1 / 2.) data = data_iterator_mnist(args.batch_size, True) # Training loop. for i in range(args.max_iter): if i % args.model_save_interval == 0: with nn.parameter_scope("gen"): nn.save_parameters(os.path.join( args.model_save_path, "generator_param_%06d.h5" % i)) with nn.parameter_scope("dis"): nn.save_parameters(os.path.join( args.model_save_path, "discriminator_param_%06d.h5" % i)) # Training forward image, _ = data.next() x.d = image / 255. - 0.5 # [0, 255] to [-1, 1] z.d = np.random.randn(*z.shape) # Generator update. solver_gen.zero_grad() loss_gen.forward(clear_no_need_grad=True) loss_gen.backward(clear_buffer=True) solver_gen.weight_decay(args.weight_decay) solver_gen.update() monitor_fake.add(i, fake) monitor_loss_gen.add(i, loss_gen.d.copy()) # Discriminator update. solver_dis.zero_grad() loss_dis.forward(clear_no_need_grad=True) loss_dis.backward(clear_buffer=True) solver_dis.weight_decay(args.weight_decay) solver_dis.update() monitor_loss_dis.add(i, loss_dis.d.copy()) monitor_time.add(i) nnp = os.path.join( args.model_save_path, 'dcgan_%06d.nnp' % args.max_iter) runtime_contents = { 'networks': [ {'name': 'Generator', 'batch_size': args.batch_size, 'outputs': {'G': fake}, 'names': {'z': z}}, {'name': 'Discriminator', 'batch_size': args.batch_size, 'outputs': {'D': pred_real}, 'names': {'x': x}}], 'executors': [ {'name': 'Generator', 'network': 'Generator', 'data': ['z'], 'output': ['G']}, {'name': 'Discriminator', 'network': 'Discriminator', 'data': ['x'], 'output': ['D']}]} save.save(nnp, runtime_contents) from cpp_forward_check import check_cpp_forward check_cpp_forward(args.model_save_path, [z.d], [z], fake, nnp, "Generator")