def __init__(self, nfm, first=False, strides=1, batch_norm=False): self.trunk = None self.side_path = None main_path = [ Convolution( **conv_params(1, nfm, strides=strides, batch_norm=batch_norm)), Convolution(**conv_params(3, nfm, batch_norm=batch_norm)), Convolution(**conv_params(1, nfm * 4, relu=False, batch_norm=False)) ] if first or strides == 2: self.side_path = Convolution(**conv_params( 1, nfm * 4, strides=strides, relu=False, batch_norm=False)) else: if batch_norm: main_path = [BatchNorm(), Activation(Rectlin())] + main_path else: main_path = [Activation(Rectlin())] + main_path if strides == 2: if batch_norm: self.trunk = Sequential([BatchNorm(), Activation(Rectlin())]) else: self.trunk = Sequential([Activation(Rectlin())]) self.main_path = Sequential(main_path)
def __init__(self, inputs, stage_depth, batch_norm=True, activation=True, preprocess=True): nfms = [ 2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth) ] strides = [ 1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1]) ] layers = [] if preprocess: layers = Preprocess(functor=cifar_mean_subtract) parallel_axis = inputs['image'].axes.batch_axes() with ng.metadata(device_id=('1', '2'), parallel=parallel_axis[0]): layers.append( Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) layers.append( Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) self.layers = layers
def test_batchnorm_bprop(input_placeholder, bn_params, transformer_factory): layer = BatchNorm(**bn_params) fprop = layer(input_placeholder) # Derivatives to check bprop_vars = [input_placeholder, layer.gamma, layer.beta] delta_placeholder = ng.placeholder(fprop.axes) bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars] with ExecutorFactory() as ex: # Create derivative executor bprop_function = ex.executor(bprops, input_placeholder, delta_placeholder) # Generate data x = rng.uniform(0, 1, input_placeholder.axes) delta = rng.uniform(-.1, .1, delta_placeholder.axes) # Compute reference bprop dx_ref, dgamma_ref, dbeta_ref = BatchNormReference( x, **bn_params).bprop(delta) # Compute ngraph bprop dx, dgamma, dbeta = bprop_function(x, delta) assert ng.testing.allclose(dx, dx_ref, rtol=rtol, atol=atol) assert ng.testing.allclose(dgamma, dgamma_ref, rtol=rtol, atol=atol) assert ng.testing.allclose(dbeta, dbeta_ref, rtol=rtol, atol=atol)
def __init__(self, inputs, dataset, stage_depth, batch_norm=False, activation=False, preprocess=False): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [] if preprocess and dataset == 'cifar10': layers = Preprocess(functor=cifar_mean_subtract) layers.append(Convolution(**conv_params(3, 16, batch_norm=batch_norm))) layers.append(f_module(nfms[0], first=True, batch_norm=batch_norm)) for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride, batch_norm=batch_norm)) if batch_norm: layers.append(BatchNorm()) if activation: layers.append(Activation(Rectlin())) layers.append(Pool2D(8, strides=2, op='avg')) if dataset == 'cifar10': ax.Y.length = 10 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) elif dataset == 'i1k': ax.Y.length = 1000 layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), batch_norm=batch_norm, activation=Softmax())) else: raise ValueError("Incorrect dataset provided") super(mini_residual_network, self).__init__(layers=layers)
def test_conv_batchnorm_fprop(conv_input_placeholder, bn_params): """This checks that that we are doing batch norm across multiple axes and properly tracking the side effect variables """ layer = BatchNorm(**bn_params) fprop = layer(conv_input_placeholder) with ExecutorFactory() as ex: # Compute executors fprop_function = ex.executor(fprop, conv_input_placeholder) stats_function = ex.executor([ng.value_of(layer.gmean), ng.value_of(layer.gvar)]) # Initial conditions for tracked variables bn_params['gmean'] = 0.0 bn_params['gvar'] = 1.0 bn_params['axis'] = (1, 2, 3, ) # Test over 2 iterations to make sure values update properly for i in range(2): # Generate data x = rng.uniform(0, 1, conv_input_placeholder.axes) # Compute reference fprop and stats batch_norm_reference = BatchNormReference(x, **bn_params) out_ref, bn_params['gmean'], bn_params['gvar'] = batch_norm_reference.fprop # Compute ngraph fprop and stats out = fprop_function(x) gm, gv = stats_function() ng.testing.assert_allclose(out, out_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(gm, bn_params['gmean'], rtol=rtol, atol=atol) ng.testing.assert_allclose(gv, bn_params['gvar'], rtol=rtol, atol=atol)
def test_batchnorm_bprop(input_placeholder, bn_params, transformer_factory): if input_placeholder._axes.lengths == (32, 32): pytest.config.flex_skip_now( "Results mismatch - too strict tolerance (rtol, atol)") layer = BatchNorm(**bn_params) fprop = layer(input_placeholder) # Derivatives to check bprop_vars = [input_placeholder, layer.gamma, layer.beta] delta_placeholder = ng.placeholder(fprop.axes) bprops = [ng.deriv(fprop, var, delta_placeholder) for var in bprop_vars] with ExecutorFactory() as ex: # Create derivative executor bprop_function = ex.executor(bprops, input_placeholder, delta_placeholder) # Generate data x = rng.uniform(0, 1, input_placeholder.axes) delta = rng.uniform(-.1, .1, delta_placeholder.axes) # Compute reference bprop dx_ref, dgamma_ref, dbeta_ref = BatchNormReference( x, **bn_params).bprop(delta) # Compute ngraph bprop dx, dgamma, dbeta = bprop_function(x, delta) ng.testing.assert_allclose(dx, dx_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(dgamma, dgamma_ref, rtol=rtol, atol=atol) ng.testing.assert_allclose(dbeta, dbeta_ref, rtol=rtol, atol=atol)
def test_batchnorm_fprop(batch_size, input_size, rho, epsilon, transformer_factory): # This checks that that we are doing batch norm across a feature make_axis # and properly tracking the side effect variables np.random.seed(0) # set inputs N = ng.make_axis(batch_size, name='N') F = ng.make_axis(input_size) input_placeholder = ng.placeholder([F, N]) layer = BatchNorm(rho, epsilon) fprop = layer.train_outputs(input_placeholder) with ExecutorFactory() as ex: fprop_function = ex.transformer.computation(fprop, input_placeholder) stats_function = ex.transformer.computation( [ng.value_of(layer.gmean), ng.value_of(layer.gvar)]) # initial conditions for tracked variables gmean_ref, gvar_ref = 0.0, 1.0 # create data for i in range(2): x = np.random.random((input_size, batch_size)).astype(np.float32) out = fprop_function(x) gm, gv = stats_function() xmean = x.mean(axis=1, keepdims=True) xvar = x.var(axis=1, keepdims=True) out_ref = (x - xmean) / np.sqrt(xvar + epsilon) gmean_ref = xmean.ravel() * (1.0 - rho) + gmean_ref * rho gvar_ref = xvar.ravel() * (1.0 - rho) + gvar_ref * rho assert ng.testing.allclose( out, out_ref, atol=1e-6), '%e' % np.max(np.abs(out - out_ref)) assert ng.testing.allclose( gm, gmean_ref, atol=1e-6), '%e' % np.max(np.abs(gm - gmean_ref)) assert ng.testing.allclose( gv, gvar_ref, atol=1e-6), '%e' % np.max(np.abs(gv - gvar_ref))
def test_inference_reuse_batch_norm(input_placeholder): layer = BatchNorm() layer(input_placeholder) train_params = (layer.gamma, layer.beta) with Layer.inference_mode_on(): layer(input_placeholder) inference_params = (layer.gamma, layer.beta) for train_param, inference_param in zip(train_params, inference_params): assert train_param is inference_param
def conv_params(filter_shape, strides=1, batch_norm=None, activation=Rectlin(), bias_init=None, filter_init=UniformInit(), padding=0): # If you do not want any batch_norm, set batch_norm to False # If batch_norm is set to None, it will initialize a BatchNorm with the given rho if batch_norm is None: batch_norm = BatchNorm(rho=0.999) return dict(filter_shape=filter_shape, strides=strides, padding=padding, batch_norm=batch_norm, activation=activation, filter_init=filter_init, bias_init=bias_init)
def __init__(self, stage_depth): nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)] print(nfms) strides = [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])] layers = [Preprocess(functor=cifar_mean_subtract), Convolution(**conv_params(3, 16)), f_module(nfms[0], first=True)] for nfm, stride in zip(nfms[1:], strides): layers.append(f_module(nfm, strides=stride)) layers.append(BatchNorm()) layers.append(Activation(Rectlin())) layers.append(Pooling((8, 8), pool_type='avg')) layers.append(Affine(axes=ax.Y, weight_init=KaimingInit(), activation=Softmax())) super(residual_network, self).__init__(layers=layers)