def ce_loss_with_uncertainty(ctx, pred, y_l, log_var): r = F.randn(0., 1., log_var.shape) r = F.pow_scalar(F.exp(log_var), 0.5) * r h = pred + r with nn.context_scope(ctx): loss_ce = F.mean(F.softmax_cross_entropy(h, y_l)) return loss_ce
def resnet_model(ctx, x, inmaps=64, act=F.relu, test=False): # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): h = PF.convolution(x, inmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, decay_rate=0.9, batch_stat=not test) h = act(h) h = res_unit(h, "conv2", act, False) # -> 32x32 h = res_unit(h, "conv3", act, True) # -> 16x16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv4", act, False) # -> 16x16 h = res_unit(h, "conv5", act, True) # -> 8x8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv6", act, False) # -> 8x8 h = res_unit(h, "conv7", act, True) # -> 4x4 with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) h = res_unit(h, "conv8", act, False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, 10) return pred
def inplace_function_test_helper(inputs, func, func_args=[], func_kwargs={}, ctx=None, rng=None): if rng is None: rng = np.random.RandomState(313) if ctx is None: ctx = nn.Context() with nn.context_scope(ctx): a_s = [inp * 1.0 for inp in inputs] y = func(*(a_s + list(func_args)), inplace=False, **func_kwargs) l = F.sum(y) a_s_i = [inp * 1.0 for inp in inputs] y_i = func(*(a_s_i + list(func_args)), inplace=True, **func_kwargs) l_i = F.sum(y_i) data = [(rng.randn(*inp.shape), rng.randn(*inp.shape)) for inp in inputs] for i in range(len(data)): inputs[i].d = data[i][0] inputs[i].g = data[i][1] l.forward() l.backward() grads = [inp.g.copy() for inp in inputs] for i in range(len(data)): inputs[i].d = data[i][0] inputs[i].g = data[i][1] l_i.forward() l_i.backward() grads_i = [inp.g.copy() for inp in inputs] for g, g_i in zip(grads, grads_i): assert np.allclose(g, g_i)
def sigma_regularization(ctx, log_var, one): with nn.context_scope(ctx): h = F.exp(log_var) h = F.pow_scalar(h, 0.5) h = F.mean(h, axis=1) r = F.mean(F.squared_error(h, one)) return r
def cnn_ae_model_000(ctx, x, act=F.relu, test=False): with nn.parameter_scope("ae"): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 32, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv03", 32, k=4, s=2, p=1, act=act, test=test) # 32 -> 16 if not test: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 64, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv13", 64, k=4, s=2, p=1, act=act, test=test) # 16 -> 8 if not test: h = F.dropout(h) # Deconvblock0 h = deconv_unit(h, "deconv00", 64, k=4, s=2, p=1, act=act, test=test) # 8 -> 16 h = deconv_unit(h, "deconv01", 64, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv02", 64, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv03", 64, k=3, s=1, p=1, act=act, test=test) # Deconvblock 1 h = deconv_unit(h, "deconv10", 32, k=4, s=2, p=1, act=act, test=test) # 16 -> 32 h = deconv_unit(h, "deconv11", 32, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv12", 32, k=3, s=1, p=1, act=act, test=test) h = deconv_unit(h, "deconv13", 3, k=3, s=1, p=1, act=None, test=test) return h
def cnn_model_003(ctx, x, act=F.relu, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) # Convblock 3 h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) h = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) return h
def sigma_regularization(ctx, log_var, one): with nn.context_scope(ctx): h = F.exp(log_var) h = F.pow_scalar(h, 0.5) b = log_var.shape[0] r = F.sum(F.squared_error(h, one)) / b return r
def cifar10_resnet23_prediction(ctx, image, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope("conv1"): h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = bn_dropout(h, "bn_dropout1", test) h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = bn_dropout(h, "bn_dropout2", test) h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = bn_dropout(h, "bn_dropout3", test) h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def kl_divergence(ctx, pred, label, log_var): with nn.context_scope(ctx): s = F.pow_scalar(F.exp(log_var), 0.5) elms = softmax_with_temperature(ctx, label, s) \ * F.log(F.softmax(pred, axis=1)) loss = -F.mean(F.sum(elms, axis=1)) return loss
def ref_fused_convolution(x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args): with nn.context_scope(cpu_context): graph = RefFusedConvolutionGraph(**locals()) return graph.get_output()
def sigmas_regularization(ctx, log_var0, log_var1): with nn.context_scope(ctx): h0 = F.exp(log_var0) h0 = F.pow_scalar(h0, 0.5) h1 = F.exp(log_var1) h1 = F.pow_scalar(h1, 0.5) r = F.mean(F.squared_error(h0, h1)) return r
def test_rand_forward(seed, ctx, func_name, low, high, shape): with nn.context_scope(ctx): o = F.rand(low, high, shape, seed=seed) assert o.shape == tuple(shape) assert o.parent.name == func_name o.forward() assert np.all(o.d < high) assert np.all(o.d >= low)
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1): #TODO: squared error/absolute error s0 = F.exp(log_var0) s1 = F.exp(log_var1) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss_sr = F.mean(squared_error * (1 / s0 + 1 / s1) + (s0 / s1 + s1 / s0)) * 0.5 return loss_sr
def test_randint_forward(seed, ctx, func_name, low, high, shape): with nn.context_scope(ctx): o = F.randint(low, high, shape, seed=seed) assert o.shape == tuple(shape) assert o.parent.name == func_name o.forward() assert np.all(o.d < high) assert np.all(o.d >= low)
def test_gru(seed, num_layers, dropout, bidirectional, training, seq_len, batch_size, input_size, hidden_size, with_bias, ctx, func_name): from nbla_test_utils import function_tester if func_name == "GRU": pytest.skip("Not implemented in CPU.") with nn.context_scope(ctx): rng = np.random.RandomState(seed) num_directions = 1 if bidirectional: num_directions = 2 inputs = [ rng.randn(seq_len, batch_size, input_size).astype(np.float32) ] inputs += [ rng.randn(num_layers, num_directions, batch_size, hidden_size).astype(np.float32) ] inputs += [ rng.randn(num_directions, 3, hidden_size, input_size + hidden_size) ] if num_layers > 1: inputs += [ rng.randn(max(1, num_layers - 1), num_directions, 3, hidden_size, num_directions * hidden_size + hidden_size).astype(np.float32) ] else: inputs += [None] if with_bias: inputs += [ rng.randn(num_layers, num_directions, 4, hidden_size).astype(np.float32) ] else: inputs += [None] backward = [False for _ in inputs] if training: backward = [True for _ in inputs] function_tester(rng, F.gru, execute_fixed_length_gru, inputs, func_kwargs=dict(num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, training=training), atol_f=1e-6, atol_b=1e-2, dstep=1e-3, backward=backward, ctx=ctx, func_name=func_name, ref_grad=get_gru_grad, disable_half_test=True)
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) h_branch = h # Convblock 3 h = conv_unit(h_branch, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u0 = conv_unit(h_branch, "u0", 10, k=1, s=1, p=0, act=act, test=test) u0 = F.average_pooling(u0, (6, 6)) with nn.parameter_scope("u0bn"): u0 = PF.batch_normalization(u0, batch_stat=not test) log_var = F.reshape(u0, (u0.shape[0], np.prod(u0.shape[1:]))) # Uncertainty for uncertainty u1 = conv_unit(h_branch, "u1", 10, k=1, s=1, p=0, act=act, test=test) u1 = F.average_pooling(u1, (6, 6)) with nn.parameter_scope("u1bn"): u1 = PF.batch_normalization(u1, batch_stat=not test) log_s = F.reshape(u1, (u1.shape[0], np.prod(u1.shape[1:]))) return pred, log_var, log_s
def cifar10_resnet23_prediction(ctx, scope, image, test=False): """ Construct ResNet 23 """ # Residual Unit def res_unit(x, scope_name, dn=False, test=False): C = x.shape[1] with nn.parameter_scope(scope_name): # Conv -> BN -> Relu with nn.parameter_scope("conv1"): h = PF.convolution(x, C / 2, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN -> Relu with nn.parameter_scope("conv2"): h = PF.convolution(h, C / 2, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) # Conv -> BN with nn.parameter_scope("conv3"): h = PF.convolution(h, C, kernel=(1, 1), pad=(0, 0), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) # Residual -> Relu h = F.relu(h + x) # Maxpooling if dn: h = F.max_pooling(h, kernel=(2, 2), stride=(2, 2)) return h # Random generator for using the same init parameters in all devices nmaps = 64 ncls = 10 # Conv -> BN -> Relu with nn.context_scope(ctx): with nn.parameter_scope(scope): with nn.parameter_scope("conv1"): h = PF.convolution(image, nmaps, kernel=(3, 3), pad=(1, 1), with_bias=False) h = PF.batch_normalization(h, batch_stat=not test) h = F.relu(h) h = res_unit(h, "conv2", False) # -> 32x32 h = res_unit(h, "conv3", True) # -> 16x16 h = res_unit(h, "conv4", False) # -> 16x16 h = res_unit(h, "conv5", True) # -> 8x8 h = res_unit(h, "conv6", False) # -> 8x8 h = res_unit(h, "conv7", True) # -> 4x4 h = res_unit(h, "conv8", False) # -> 4x4 h = F.average_pooling(h, kernel=(4, 4)) # -> 1x1 pred = PF.affine(h, ncls) return pred
def ref_fused_convolution(ctx, x, weight, bias, beta, gamma, rmean, rvar, z, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, pad_mode, constant_value): args = locals().copy() del args['ctx'] with nn.context_scope(ctx): graph = RefFusedConvolutionGraph(**args) return graph.get_output()
def test_image_augmentation_forward(seed, shape, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, shape=shape, pad=(2, 2), min_scale=0.8, max_scale=1.2, angle=0.2, aspect_ratio=1.1, distortion=0.1, flip_lr=True, flip_ud=False, brightness=0.1, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.1, seed=0) assert o.d.shape == (inputs[0].shape[0],) + shape
def er_loss(ctx, pred): with nn.context_scope(ctx): bs = pred.shape[0] d = np.prod(pred.shape[1:]) denominator = bs * d pred_normalized = F.softmax(pred) pred_log_normalized = F.log(F.softmax(pred)) loss_er = -F.sum(pred_normalized * pred_log_normalized) / denominator return loss_er
def test_random_crop_forward_backward(seed, inshape, shape, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_crop(i, shape, 0, seed) if shape is not None: max_correl = 0 possible_crop_range = [ input - output for output, input in zip(shape, inshape) ] for crop_pos in itertools.product(*map( tuple, map(lambda x: range(*x), [(0, r + 1) for r in possible_crop_range]))): r = inputs[0][crop_pos[0]:crop_pos[0] + shape[0], crop_pos[1]:crop_pos[1] + shape[1], crop_pos[2]:crop_pos[2] + shape[2]] assert (o.d.shape == r.shape) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: max_correl = correl_and_p[0] else: max_correl = pearsonr(o.d.flatten(), inputs[0].flatten())[0] assert (max_correl == 1.0) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_diff = rng.randn(*o.shape).astype(i.d.dtype) o.backward(o_diff) assert np.all(i.g == 0)
def er_loss(ctx, pred): with nn.context_scope(ctx): bs = pred.shape[0] d = np.prod(pred.shape[1:]) denominator = bs * d pred_normalized = F.softmax(pred) pred_log_normalized = F.log(F.softmax(pred)) loss_er = - F.sum(pred_normalized * pred_log_normalized) / denominator return loss_er
def test_batch_normalization_forward_backward(seed, axis, decay_rate, eps, output_stat, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = list(create_inputs(rng, axis)) axes = [axis] batch_stat = True function_tester(rng, F.batch_normalization, ref_batch_normalization, inputs, func_args=[axes, decay_rate, eps, batch_stat, output_stat], backward=[True, True, True, False, False], ctx=ctx, func_name=func_name, dstep=1e-2, atol_b=1e-2) # Check if running mean and var works. vinputs = [] for i in inputs: vinputs.append(nn.Variable(i.shape, True)) vinputs[-1].d = i for i in range(5): inputs[0] = rng.randn(*inputs[0].shape) vinputs[0].d[...] = inputs[0] ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(vinputs[3].d, inputs[3]) assert np.allclose(vinputs[4].d, inputs[4], atol=1e-3) # Check if global stat mode works batch_stat = False if output_stat: return ref_y = ref_batch_normalization( *(inputs + [axes, decay_rate, eps, batch_stat, output_stat])) with nn.context_scope(ctx), nn.auto_forward(): y = F.batch_normalization( *(vinputs + [axes, decay_rate, eps, batch_stat, output_stat])) assert np.allclose(ref_y, y.d, atol=1e-6)
def get_model(args, num_classes, test=False, channel_last=False, mixup=None, channels=4, spatial_size=224, label_smoothing=0, ctx_for_loss=None): """ Create computation graph and variables. """ from models import build_network from utils.loss import softmax_cross_entropy_with_label_smoothing if hasattr(spatial_size, '__len__'): assert len(spatial_size) == 2, \ f'Spatial size must be a scalar or a tuple of two ints. Given {spatial_size}' spatial_shape = tuple(spatial_size) else: spatial_shape = (spatial_size, spatial_size) if channel_last: image = nn.Variable( (args.batch_size, spatial_shape[0], spatial_shape[1], channels)) else: image = nn.Variable((args.batch_size, channels) + spatial_shape) label = nn.Variable([args.batch_size, 1]) in_image = image in_label = label if mixup is not None: image, label = mixup.mix_data(image, label) pred, hidden = build_network(image, num_classes, args.arch, test=test, channel_last=channel_last) pred.persistent = True def define_loss(pred, in_label, label, label_smoothing): loss = F.mean( softmax_cross_entropy_with_label_smoothing(pred, label, label_smoothing)) error = F.sum(F.top_n_error(pred, in_label, n=1)) return loss, error # Use specified context if possible. # We use it when we pass float32 context to avoid nan issue if ctx_for_loss is not None: with nn.context_scope(ctx_for_loss): loss, error = define_loss(pred, in_label, label, label_smoothing) else: loss, error = define_loss(pred, in_label, label, label_smoothing) Model = namedtuple('Model', ['image', 'label', 'pred', 'loss', 'error', 'hidden']) return Model(in_image, in_label, pred, loss, error, hidden)
def test_dropout_forward_backward(p, seed, ctx, func_name): from nbla_test_utils import cap_ignore_region # Note: each backward execution requires a forward execution in NNabla. with nn.context_scope(ctx): # Create inputs rng = np.random.RandomState(seed) inputs = [ cap_ignore_region( rng.randn(2, 3, 4).astype(np.float32) * 2, (-1e-3, 1e-3)) ] # Ensure there is no zero. x = nn.Variable(inputs[0].shape, need_grad=True) x.d = inputs[0] init_dx = rng.randn(*x.shape).astype(x.data.dtype) init_dy = rng.randn(*x.shape).astype(x.data.dtype) # Construct graph y = F.dropout(x, p) # Reference parameter scale = 1. / (1. - p) # Test forward y.forward(clear_buffer=True) mask = (y.d != 0) ref_y = x.d * mask * scale assert_allclose(y.d, ref_y) assert y.parent.name == func_name # Test backward x.g[...] = init_dx y.backward(init_dy, clear_buffer=True) ref_dx = init_dy * mask * scale assert_allclose(x.g, init_dx + ref_dx) # Test accumulation y.forward(clear_no_need_grad=True) mask = (y.d != 0) x.g[...] = 1 y.g = init_dy y.parent.backward([x], [y], [False]) ref_dx = init_dy * mask * scale assert_allclose(x.g, ref_dx) # Test accum=False with NaN gradient y.forward(clear_no_need_grad=True) x.g = np.float32('nan') y.parent.backward([x], [y], [False]) assert not np.any(np.isnan(x.g)) # Test need_grad y.forward(clear_no_need_grad=True) x.g[...] = 0 x.need_grad = False y.backward(init_dy) assert np.all(x.g == 0)
def ref_grad_fused_convolution(x, weight, bias, beta, gamma, rmean, rvar, z, dy, base_axis, pad, stride, dilation, group, channel_last, decay_rate, eps, batch_stat, nonlinearity, nonlinearity_args, need_grad_flags): args = locals().copy() del args['dy'] del args['need_grad_flags'] with nn.context_scope(cpu_context): graph = RefFusedConvolutionGraph(**args) return graph.get_grads(dy, need_grad_flags=need_grad_flags)
def test_function_context(seed): rng = np.random.RandomState(313) xd = rng.randn(2, 3) x = nn.Variable.from_numpy_array(xd) ctx1 = nn.Context(backend=['cpu:float'], array_class='CpuCachedArray', device_id='1') with nn.context_scope(ctx1): y = F.relu(x) ctx0 = nn.Context(backend=['cpu:float'], array_class='CpuCachedArray', device_id='0') # TODO: use id or hash if we determine the spec assert str(ctx0) != str(ctx1) assert str(ctx1) == str(y.parent.context) with nn.context_scope(y.parent.context): z = F.relu(x) assert str(y.parent.context) == str(z.parent.context)
def test_randint_forward(seed, ctx, func_name, low, high, shape): with nn.context_scope(ctx): o = F.randint(low, high, shape, seed=seed) assert o.shape == tuple(shape) assert o.parent.name == func_name o.forward() # NOTE: The following should be < high, # but use <= high because std::uniform_random contains a bug. assert np.all(o.d <= high) assert np.all(o.d >= low)
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_var0, log_var1): var0 = F.exp(log_var0) var1 = F.exp(log_var1) s0 = F.pow_scalar(var0, 0.5) s1 = F.pow_scalar(var0, 0.5) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss = F.log(s1/s0) + (var0/var1 + squared_error/var1) * 0.5 loss_sr = F.mean(loss) return loss_sr
def test_image_augmentation_forward(seed, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape shape = (3, 5, 8) with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, shape=shape, pad=(2, 2), min_scale=0.8, max_scale=1.2, angle=0.2, aspect_ratio=1.1, distortion=0.1, flip_lr=True, flip_ud=False, brightness=0.1, brightness_each=True, contrast=1.1, contrast_center=0.5, contrast_each=True, noise=0.1, seed=0) assert o.d.shape == (inputs[0].shape[0],) + shape
def test_pack_padded_long_sequence_forward_backward(total_length, padding_value, batch_first, shapes, seed, ctx, func_name): if not func_name.endswith("Cuda"): pytest.skip( "PackPaddedSequence tests except for Cuda for very long sequence skips.") from nbla_test_utils import function_tester rng = np.random.RandomState(seed) sequences = [rng.randn(*shape).astype(np.float32) for shape in shapes] padded_sequence = pad_sequence(sequences, batch_first) lengths = np.array([seq.shape[0] for seq in sequences]) inputs = [padded_sequence, lengths] func_args0 = [batch_first] func_args1 = [batch_first, padding_value, total_length] insert_identity = [True, False] # Forward function_tester(rng, F.pack_padded_sequence, ref_pack_padded_sequence, inputs, ctx=ctx, func_name=func_name, func_args=func_args0, backward=[False, False], atol_f=1e-3, atol_b=1e-2, insert_identity=insert_identity) # Backward import nnabla as nn padded_sequence0 = nn.Variable.from_numpy_array( inputs[0]).apply(need_grad=True) lengths = nn.Variable.from_numpy_array(inputs[1]) with nn.context_scope(ctx), nn.auto_forward(): # Pack backward padded_sequence0.g = rng.randn(*padded_sequence0.shape) packed_sequence0, batch_sizes = F.pack_padded_sequence( padded_sequence0, lengths, *func_args0) g = rng.randn(*packed_sequence0.shape) packed_sequence0.g = g packed_sequence0.parent.backward([padded_sequence0, lengths], [packed_sequence0, batch_sizes], [False, False]) # Unpack packed_sequence1 = nn.Variable.from_numpy_array(g) padded_sequence1, lengths = F.pad_packed_sequence( packed_sequence1, batch_sizes, *func_args1) # Compare w/o accum np.testing.assert_allclose(padded_sequence0.g.flatten(), padded_sequence1.d.flatten( )[:np.prod(padded_sequence0.shape)], atol=1e-4, err_msg="{} test (w/o accum) with long sequence failed.".format(func_name)) # Compare w/ accum packed_sequence0.parent.backward([padded_sequence0, lengths], [packed_sequence0, batch_sizes], [True, False]) np.testing.assert_allclose(padded_sequence0.g.flatten() / 2, padded_sequence1.d.flatten( )[:np.prod(padded_sequence0.shape)], atol=1e-4, err_msg="{} test (w/ accum) with long sequence failed.".format(func_name))
def test_one_hot_forward(seed, inshape, shape, ctx, func_name): rng = np.random.RandomState(seed) # Input input = rng.randint(0, shape[0], size=inshape) vinput = nn.Variable(input.shape, need_grad=False) vinput.d = input with nn.context_scope(ctx), nn.auto_forward(): o = F.one_hot(vinput, shape) r = ref_one_hot(input, shape) assert np.allclose(o.d, r) assert func_name == o.parent.name
def test_large_transform_binary(fname, ctx, func_name): if not func_name.endswith('Cuda'): pytest.skip('Grid-strided loop is tested only for CUDA backend') with nn.context_scope(ctx), nn.auto_forward(True): a = nn.Variable.from_numpy_array(np.random.randn( 1024, 64, 1)).apply(need_grad=True) b = nn.Variable.from_numpy_array(np.random.randn( 1024, 64, 3)).apply(need_grad=True) c = F.mul2(a, b) c.backward()
def test_random_shift_forward_backward(seed, inshape, shifts, border_mode, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_shift(i, shifts, border_mode, 0, seed) result_shifts = (0, 0, 0) max_correl = 0 for shift_amount in itertools.product(*map( tuple, map(lambda x: range(*x), [(-2, 3) for _ in range(len(inshape))]))): r = scipy_shift(inputs[0], shift_amount, mode=border_mode) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: result_shifts = shift_amount max_correl = correl_and_p[0] ref = scipy_shift(inputs[0], result_shifts, mode=border_mode) if shifts is None: shifts = (0, ) * len(inputs[0].shape) for result, shift_range in zip(result_shifts, shifts): assert abs(result) <= shift_range assert np.allclose(o.d, ref) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) assert np.all(i.g == 0)
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, log_s0, log_s1): v0 = F.exp(log_v0) v1 = F.exp(log_v1) squared_error = F.squared_error(pred0, pred1) s0 = F.exp(log_s0) s1 = F.exp(log_s1) with nn.context_scope(ctx): error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + (s0 / s1 + s1 / s0) loss_sr = F.mean(error) * 0.5 return loss_sr
def sr_loss_with_uncertainty(ctx, pred0, pred1, log_v0, log_v1, log_s0, log_s1): v0 = F.exp(log_v0) v1 = F.exp(log_v1) squared_error = F.squared_error(pred0, pred1) s0 = F.exp(log_s0) s1 = F.exp(log_s1) with nn.context_scope(ctx): error = squared_error * (1 / v0 + 1 / v1) + (v0 / v1 + v1 / v0) + ( s0 / s1 + s1 / s0) loss_sr = F.mean(error) * 0.5 return loss_sr
def test_image_augmentation_forward(seed, shape, ctx, func_name): rng = np.random.RandomState(seed) inputs = [rng.randn(16, 3, 8, 8).astype(np.float32)] i = nn.Variable(inputs[0].shape) # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i) assert o.d.shape == inputs[0].shape func_kargs = { 'shape': shape, 'pad': (2, 2), 'min_scale': 0.8, 'max_scale': 1.2, 'angle': 0.2, 'aspect_ratio': 1.1, 'distortion': 0.1, 'flip_lr': True, 'flip_ud': False, 'brightness': 0.1, 'brightness_each': True, 'contrast': 1.1, 'contrast_center': 0.5, 'contrast_each': True, 'noise': 0.1, 'seed': 0} with nn.context_scope(ctx), nn.auto_forward(): o = F.image_augmentation(i, **func_kargs) assert o.d.shape == (inputs[0].shape[0],) + shape # Checking recomputation from nbla_test_utils import recomputation_test recomputation_test(rng=rng, func=F.image_augmentation, vinputs=[i], func_args=[], func_kwargs=func_kargs, ctx=ctx) func_kargs['seed'] = -1 recomputation_test(rng=rng, func=F.image_augmentation, vinputs=[i], func_args=[], func_kwargs=func_kargs, ctx=ctx)
def solver_tester(rng, solver, ref_solver, solver_args=[], solver_kwargs={}, num_itr=5, decay=1e-4, atol=1e-6, ctx=None, solver_name=None): if ctx is None: ctx = nn.Context() # Create params p1 = nn.Variable([2, 3, 4]) p2 = nn.Variable([3, 4, 1, 2]) p3 = nn.Variable([]) params = OrderedDict([('zZzZ', p1), ('bbb', p2), ('asdfadfdasd', p3)]) for p in params.values(): p.d = rng.randn(*p.shape) p.g = rng.randn(*p.shape) with nn.context_scope(ctx): s = solver(*solver_args, **solver_kwargs) s.set_parameters(params) if solver_name is not None: assert s.name == solver_name ref_s = ref_solver(*solver_args, **solver_kwargs) ref_s.set_parameters(params) # Check weight decay. grad_copy = OrderedDict([(k, p.g.copy()) for k, p in iteritems(params)]) s.weight_decay(decay) ref_s.weight_decay(grad_copy, decay) for p, ref_p in zip(params.values(), grad_copy.values()): assert np.allclose(ref_p, p.g, atol=atol) # Check solver udpate. for i in range(num_itr): grads = OrderedDict([(k, rng.randn(*p.shape)) for k, p in iteritems(params)]) for k, g in iteritems(grads): params[k].g = g s.update() ref_s.update(grads) for p, ref_p in zip(params.values(), ref_s.params.values()): assert np.allclose(ref_p, p.d, atol=atol) # Check if remove_state_impl work correctly. s.clear_parameters()
def test_gru_double_backward(seed, num_layers, dropout, bidirectional, training, seq_len, batch_size, input_size, hidden_size, with_bias, ctx, func_name): from nbla_test_utils import backward_function_tester with nn.context_scope(ctx): rng = np.random.RandomState(seed) num_directions = 1 if bidirectional: num_directions = 2 inputs = [ rng.randn(seq_len, batch_size, input_size).astype(np.float32) * 0.1 ] inputs += [ rng.randn(num_layers, num_directions, batch_size, hidden_size).astype(np.float32) ] inputs += [ rng.randn(num_directions, 3, hidden_size, input_size + hidden_size) ] if num_layers > 1: inputs += [ rng.randn(max(1, num_layers - 1), num_directions, 3, hidden_size, num_directions * hidden_size + hidden_size).astype(np.float32) ] else: inputs += [None] if with_bias: inputs += [ rng.randn(num_layers, num_directions, 4, hidden_size).astype(np.float32) ] else: inputs += [None] backward = [False for _ in inputs] if training: backward = [True for _ in inputs] backward_function_tester(rng, F.gru, inputs, func_kwargs=dict(num_layers=num_layers, dropout=dropout, bidirectional=bidirectional, training=training), atol_f=1e-6, dstep=1e-3, backward=backward, ctx=ctx, skip_backward_check=True)
def test_random_choice_without_replacement(ctx, func_name, seed): x = nn.Variable.from_numpy_array(np.array([0, 1, 2]).astype(np.int32)) w = nn.Variable.from_numpy_array(np.array([5, 5, 90]).astype(np.int32)) x.need_grad = True w.need_grad = True repeats = 1000 with nn.context_scope(ctx): y = F.random_choice(x, w, shape=[w.size], replace=False, seed=seed) r = np.zeros((repeats, w.size)).astype(np.int32) for i in range(repeats): y.forward() r[i] = y.d assert np.all(np.bincount(r.flatten()) == x.size * [repeats])
def test_copy_from(): shape = [2, 3, 4] src = nn.NdArray(shape) dst = nn.NdArray(shape) src.data = 0 src.cast(dtype=np.uint8) dst.copy_from(src, use_current_context=False) assert dst.dtype == np.uint8 from nnabla.ext_utils import get_extension_context with nn.context_scope(get_extension_context('cpu', dtype='float')): dst.copy_from(src, use_current_context=True) assert dst.dtype == np.float32
def test_random_crop_forward_backward(seed, inshape, shape, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_crop(i, shape, 0, seed) if shape is not None: max_correl = 0 possible_crop_range = [ input - output for output, input in zip(shape, inshape)] for crop_pos in itertools.product(*map(tuple, map(lambda x: range(*x), [(0, r + 1) for r in possible_crop_range]))): r = inputs[0][crop_pos[0]:crop_pos[0] + shape[0], crop_pos[1]:crop_pos[1] + shape[1], crop_pos[2]:crop_pos[2] + shape[2]] assert(o.d.shape == r.shape) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: max_correl = correl_and_p[0] else: max_correl = pearsonr(o.d.flatten(), inputs[0].flatten())[0] assert(max_correl == 1.0) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_diff = rng.randn(*o.shape).astype(i.d.dtype) o.backward(o_diff) assert np.all(i.g == 0)
def test_random_shift_forward_backward(seed, inshape, shifts, border_mode, ctx, func_name): from nbla_test_utils import function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(*inshape).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_shift(i, shifts, border_mode, 0, seed) result_shifts = (0, 0, 0) max_correl = 0 for shift_amount in itertools.product(*map(tuple, map(lambda x: range(*x), [(-2, 3) for _ in range(len(inshape))]))): r = scipy_shift(inputs[0], shift_amount, mode=border_mode) correl_and_p = pearsonr(o.d.flatten(), r.flatten()) if correl_and_p[0] > max_correl: result_shifts = shift_amount max_correl = correl_and_p[0] ref = scipy_shift(inputs[0], result_shifts, mode=border_mode) if shifts is None: shifts = (0,) * len(inputs[0].shape) for result, shift_range in zip(result_shifts, shifts): assert abs(result) <= shift_range assert np.allclose(o.d, ref) assert o.parent.name == func_name # Skipping Backward check g = np.random.randn(*i.shape) i.g = g o_grad = np.random.randn(*o.shape) o.g = o_grad o.parent.backward([i], [o]) ref_grad = i.g.copy() - g # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad, atol=1e-6) # Check if need_grad works i.g[...] = 0 i.need_grad = False o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) assert np.all(i.g == 0)
def cnn_model_003(ctx, h, act=F.elu, do=True, test=False): with nn.context_scope(ctx): if not test: b, c, s, s = h.shape h = F.image_augmentation(h, (c, s, s), min_scale=1.0, max_scale=1.5, angle=0.5, aspect_ratio=1.3, distortion=0.2, flip_lr=True) # Convblock0 h = conv_unit(h, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 32 -> 16 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 16 -> 8 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 8 -> 6 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (6, 6)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (6, 6)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def test_unlinked(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_u = v2.unlinked() v3 = F.identity(v2_u) v2_u.grad.zero() v2_g = v2_u.g.copy() v3.backward(clear_buffer=False) assert type(v2_u) == type(v2) assert np.all(v.g == grad) assert np.all(v2_u.g == v2.g) assert np.all(v2_u.g == v2_g + 1)
def test_rehape(): v = nn.Variable([2, 3, 4], need_grad=True) grad = np.random.randn(*v.shape).astype(np.float32) v.g = grad v.d = np.random.randn(*v.shape) import nnabla.functions as F with nn.context_scope(nn.Context()), nn.auto_forward(): v2 = F.identity(v) v2_s = v2.reshape((3, 4, 2)) v3 = F.identity(v2_s) v3.backward(clear_buffer=False) assert np.all(v2_s.g.flat == v2.g.flat) assert np.all(v2_s.g == 1) v2.d = 1 assert np.all(v2_s.d == 1) v2.g = 1.5 assert np.all(v2_s.g == 1.5)
def test_forward_backward(): batch_size, m, h, w = 4, 3, 32, 32 extension_module = "cpu" device_id = 0 ctx = extension_context(extension_module, device_id=device_id) x_l_data = np.random.randn(batch_size, m, h, w) y_l_data = (np.random.rand(batch_size, 1) * 10).astype(np.int32) x_l = nn.Variable(x_l_data.shape) y_l = nn.Variable(y_l_data.shape) x_l.d = x_l_data y_l.d = y_l_data pred = cnn_model_003(ctx, x_l) with nn.context_scope(ctx): loss = F.mean(F.softmax_cross_entropy(pred, y_l)) loss.forward() loss.backward()
def sr_loss_with_uncertainty_and_coef(ctx, pred0, pred1, log_var0, log_var1): c0 = srwu_learned_coef(ctx, log_var0) c1 = srwu_learned_coef(ctx, log_var1) sc0 = sigmas_learned_coef(ctx, log_var0, log_var1) sc1 = sigmas_learned_coef(ctx, log_var1, log_var0) c0.need_grad = False c1.need_grad = False sc0.need_grad = False sc1.need_grad = False #TODO: squared error/absolute error s0 = F.exp(log_var0) s1 = F.exp(log_var1) squared_error = F.squared_error(pred0, pred1) with nn.context_scope(ctx): loss_sr = F.mean( squared_error * (c0 / s0 + c1 / s1) + (sc0 * s0 / s1 + sc1 * s1 / s0)) * 0.5 return loss_sr
def _setup(self, delete=True): """Create a function instance and execute setup. Args: delete (bool): Delete buffered variables. """ if delete: self.clear() with nn.context_scope(self.ctx): outputs = self.func( *(self.inputs_f + self.func_args), **self.func_kwargs) if not hasattr(outputs, '__iter__'): self.outputs = [outputs] else: self.outputs = outputs self.func_ins = self.outputs[0].parent self.inputs = self.func_ins.inputs
def test_random_flip_forward_backward(seed, axes, ctx, func_name): from nbla_test_utils import cap_ignore_region, function_tester rng = np.random.RandomState(seed) inputs = [rng.randn(2, 3, 4).astype(np.float32)] i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.random_flip(i, axes, 0, seed) flip_close = np.allclose(o.d, ref_flip(inputs[0], axes)) assert flip_close or (not flip_close and np.allclose(o.d, i.d)) assert o.parent.name == func_name # NNabla backward orig_grad = rng.randn(*i.shape).astype(i.data.dtype) i.g[...] = orig_grad o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.g = o_grad o.parent.backward([i], [o]) # Verify if flip_close: ref_grad = ref_flip(o_grad, axes) else: ref_grad = o_grad assert np.allclose(i.g, orig_grad + ref_grad) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad) # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if need_grad works i.g[...] = 0 i.need_grad = False o.backward(o_grad) assert np.all(i.g == 0)
def cnn_model_003(ctx, x, act=F.elu, do=True, test=False): with nn.context_scope(ctx): # Convblock0 h = conv_unit(x, "conv00", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv01", 128, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv02", 128, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 28 -> 14 with nn.parameter_scope("bn0"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 1 h = conv_unit(h, "conv10", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv11", 256, k=3, s=1, p=1, act=act, test=test) h = conv_unit(h, "conv12", 256, k=3, s=1, p=1, act=act, test=test) h = F.max_pooling(h, (2, 2)) # 14 -> 7 with nn.parameter_scope("bn1"): h = PF.batch_normalization(h, batch_stat=not test) if not test and do: h = F.dropout(h) # Convblock 2 h = conv_unit(h, "conv20", 512, k=3, s=1, p=0, act=act, test=test) # 7 -> 5 h = conv_unit(h, "conv21", 256, k=1, s=1, p=0, act=act, test=test) h = conv_unit(h, "conv22", 128, k=1, s=1, p=0, act=act, test=test) u = h # Convblock 3 h = conv_unit(h, "conv23", 10, k=1, s=1, p=0, act=act, test=test) h = F.average_pooling(h, (5, 5)) with nn.parameter_scope("bn2"): h = PF.batch_normalization(h, batch_stat=not test) pred = F.reshape(h, (h.shape[0], np.prod(h.shape[1:]))) # Uncertainty u = conv_unit(u, "u0", 10, k=1, s=1, p=0, act=act, test=test) u = F.average_pooling(u, (5, 5)) with nn.parameter_scope("u0bn"): u = PF.batch_normalization(u, batch_stat=not test) log_var = F.reshape(u, (u.shape[0], np.prod(u.shape[1:]))) return pred, log_var
def test_dropout_forward_backward(p, seed, ctx, func_name): from nbla_test_utils import cap_ignore_region, function_tester rng = np.random.RandomState(seed) inputs = [ cap_ignore_region( rng.randn(2, 3, 4).astype(np.float32) * 2, (-1e-3, 1e-3))] # Ensure there is no zero. i = nn.Variable(inputs[0].shape, need_grad=True) i.d = inputs[0] # NNabla forward with nn.context_scope(ctx), nn.auto_forward(): o = F.dropout(i, p) scale = 1. / (1. - p) mask = o.d != 0 assert np.allclose(o.d, i.d * mask * scale) assert o.parent.name == func_name # NNabla backward orig_grad = rng.randn(*i.shape).astype(i.data.dtype) i.g[...] = orig_grad o_grad = rng.randn(*i.shape).astype(i.data.dtype) o.backward(o_grad) ref_grad = o_grad * mask * scale # Verify assert np.allclose(i.g, orig_grad + ref_grad) # Check if accum option works i.g[...] = 1 o.g = o_grad o.parent.backward([i], [o], [False]) assert np.allclose(i.g, ref_grad) # Check accum=False with NaN gradient i.g = np.float32('nan') o.parent.backward([i], [o], [False]) assert not np.any(np.isnan(i.g)) # Check if need_grad works i.g[...] = 0 i.need_grad = False o.backward(o_grad) assert np.all(i.g == 0)