def modify(self, f, inputs): outputs = f.outputs[0] # Not end if len(outputs.function_references) == 0: return # Check fused bn block start if not self._block and self._is_fused_bn_block(f, inputs): self._block = True # Remove BatchNormalization if self._block and f.info.type_name == 'BatchNormalization': self._bn_args = f.info.args self._name = self.get_parameter_scope(inputs[0]) return inputs[0] # Remove Add2 if self._block and f.info.type_name == 'Add2': self._add2_input1 = inputs[1] return inputs[0] # Remove non linear function then connect fused bn if self._block and f.info.type_name in self._fct_set: f_non_linear = self._fct_set[f.info.type_name] h = PF.fused_batch_normalization(inputs[0], self._add2_input1, nonlinearity=f_non_linear, **self._bn_args, name='fused{}-{}'.format( self._name, self._cnt)) self._cnt += 1 self._block = False return h
def test_FLOPsEstimator(): x = nn.Variable((1, 3, 12, 12)) y = PF.depthwise_convolution(x, kernel=(5, 5), with_bias=True) t = PF.fused_batch_normalization(y) z = F.relu6(F.sigmoid(PF.affine(t, (3, 3), base_axis=2) + 3)) z = F.global_average_pooling(z) est = FLOPsEstimator() assert est.predict(z) == 17644
def __call__(self, x): ''' Defines a ResNet-like network according to the configuration specified. Args: x: A Variable object which has a shape with a format `NCHW` if `channel_last=False` else `NHWC`. Returns: * An output `Variable` of classification layer * Intermediate `Variable` outputs from input and output of each cell ''' logger.debug(x.shape) # First convolution axes = [get_channel_axis(self.channel_last)] with nn.parameter_scope("conv1"): r = pf_convolution(x, 64, (7, 7), stride=(2, 2), channel_last=self.channel_last) r = PF.fused_batch_normalization(r, axes=axes, batch_stat=not self.test) mp_opts = dict( ignore_border=False) if self.max_pooling_ceil_border else dict( pad=(1, 1)) r = F.max_pooling(r, (3, 3), (2, 2), channel_last=self.channel_last, **mp_opts) hidden = {} hidden['r0'] = r logger.debug(r.shape) # Create cells each of which consists of blocks repeatedly applied cell_configs = self.get_cell_configurations(self.num_layers) for i, (counts, ochannels, strides) in enumerate(zip(*cell_configs)): with nn.parameter_scope("res{}".format(i + 1)): r = self.cell(r, ochannels, counts, (strides, ) * 2) hidden['r{}'.format(i + 1)] = r logger.debug(r.shape) # Global average pooling pool_shape = get_spatial_shape(r.shape, self.channel_last) r = F.average_pooling(r, pool_shape, channel_last=self.channel_last) # Final classification layer with nn.parameter_scope("fc"): r = pf_affine(r, self.num_classes, channel_last=self.channel_last) return r, hidden
def bn(self, h, z=None, no_relu=False): axes = [get_channel_axis(self.channel_last)] if no_relu: h = PF.batch_normalization(h, axes=axes, batch_stat=not self.test) if z is None: return h return F.add2(z, h) return PF.fused_batch_normalization(h, z, axes=axes, batch_stat=not self.test)
def fbn_resblock(x, maps, kernel=(3, 3), pad=(1, 1), stride=(1, 1), test=False, name='fbn-convblock'): with nn.parameter_scope(name): h = PF.convolution(x, maps, kernel=kernel, pad=pad, stride=stride, with_bias=False) h = PF.fused_batch_normalization(h, x, batch_stat=not test) return h
def test_pf_fused_batch_normalization_execution(g_rng, inshape, axes, decay_rate, eps, batch_stat, nonlinearity, output_stat, param_init, fix_parameters, with_z, rng): p_shape = _get_bn_parameter_shape(inshape, axes) if param_init: beta_init = np.ones(p_shape) gamma_init = np.ones(p_shape) * 2 mean_init = np.ones(p_shape) * 0.5 var_init = np.ones(p_shape) * 1.5 param_init = dict(beta=beta_init, gamma=gamma_init, mean=mean_init, var=var_init) rng = process_rng(rng) x = nn.Variable.from_numpy_array(g_rng.randn(*inshape)) z = None if with_z: z = nn.Variable.from_numpy_array(g_rng.randn(*inshape)) kw = {} insert_if_not_none(kw, 'z', z) insert_if_not_default(kw, 'axes', axes, [1]) insert_if_not_default(kw, 'decay_rate', decay_rate, 0.9) insert_if_not_default(kw, 'eps', eps, 1e-5) insert_if_not_default(kw, 'batch_stat', batch_stat, True) insert_if_not_default(kw, 'nonlinearity', nonlinearity, 'relu') insert_if_not_default(kw, 'output_stat', output_stat, False) insert_if_not_default(kw, 'fix_parameters', fix_parameters, False) insert_if_not_none(kw, 'param_init', param_init) # Check creation y = PF.fused_batch_normalization(x, **kw) # Check parameter values before execution h = y[0] if output_stat else y if with_z: _, b, g, m, v, _ = h.parent.inputs else: _, b, g, m, v = h.parent.inputs if param_init: assert np.allclose(b.d, beta_init) assert np.allclose(g.d, gamma_init) assert np.allclose(m.d, mean_init) assert np.allclose(v.d, var_init) else: assert np.allclose(b.d, 0) assert np.allclose(g.d, 1) assert np.allclose(m.d, 0) assert np.allclose(v.d, 1) # Check execution if output_stat: forward_backward_all(*y) else: y.forward() # TODO: Enable when implemented if batch_stat: y.backward() # Check values # TODO # Check args assert h.parent.info.type_name == 'FusedBatchNormalization' args = h.parent.info.args assert args['axes'] == axes assert np.isclose(args['decay_rate'], decay_rate) assert np.isclose(args['eps'], eps) assert args['batch_stat'] == batch_stat assert args['nonlinearity'] == nonlinearity # Check created parameters assert h.parent.inputs[0] == x num_inputs = 5 if with_z: num_inputs = 6 h.parent.inputs[5] == z assert len(h.parent.inputs) == num_inputs assert len(nn.get_parameters()) == 2 assert len(nn.get_parameters(grad_only=False)) == 4 beta, gamma, mean, var = [ nn.get_parameters(grad_only=False)['bn/' + name] for name in ['beta', 'gamma', 'mean', 'var'] ] assert beta.shape == p_shape assert gamma.shape == p_shape assert mean.shape == p_shape assert var.shape == p_shape assert beta.need_grad assert gamma.need_grad assert not mean.need_grad assert not var.need_grad _, b, g, m, v = h.parent.inputs[:5] assert b.need_grad == (not fix_parameters) assert g.need_grad == (not fix_parameters) assert not m.need_grad assert not v.need_grad
def bn(h, z=None): axes = [3 if channel_last else 1] return PF.fused_batch_normalization(h, z, axes=axes, batch_stat=not test)
def resnet_imagenet(x, num_classes, num_layers, shortcut_type, test, tiny=False, channel_last=False): """ Args: x : Variable num_classes : Number of classes of outputs num_layers : Number of layers of ResNet chosen from (18, 34, 50, 101, 152) shortcut_type : 'c', 'b', '' 'c' : Use Convolution anytime 'b' : Use Convolution if numbers of channels of input and output mismatch. '' : Use Identity mapping if channels match, otherwise zero padding. test : Construct net for testing. tiny (bool): Tiny imagenet mode. Input image must be (3, 56, 56). channel_last (bool): Channel dimmension comes at last in an input image. A.k.a NHWC order. """ layers = { 18: ((2, 2, 2, 2), basicblock, 1), 34: ((3, 4, 6, 3), basicblock, 1), 50: ((3, 4, 6, 3), bottleneck, 4), 101: ((3, 4, 23, 3), bottleneck, 4), 152: ((3, 8, 36, 3), bottleneck, 4) } counts, block, ocoef = layers[num_layers] logger.debug(x.shape) axes = [3 if channel_last else 1] with nn.parameter_scope("conv1"): stride = (1, 1) if tiny else (2, 2) r = pf_convolution(x, 64, (7, 7), stride=stride, channel_last=channel_last) r = PF.fused_batch_normalization(r, axes=axes, batch_stat=not test) r = F.max_pooling(r, (3, 3), stride, pad=(1, 1), channel_last=channel_last) hidden = {} hidden['r0'] = r ochannels = [64, 128, 256, 512] strides = [1, 2, 2, 2] logger.debug(r.shape) for i in range(4): with nn.parameter_scope("res{}".format(i + 1)): r = layer(r, block, ochannels[i] * ocoef, counts[i], (strides[i], strides[i]), shortcut_type, test, channel_last=channel_last) hidden['r{}'.format(i + 1)] = r logger.debug(r.shape) pool_shape = r.shape[-2:] if channel_last: pool_shape = r.shape[1:3] r = F.average_pooling(r, pool_shape, channel_last=channel_last) with nn.parameter_scope("fc"): r = pf_affine(r, num_classes, channel_last=channel_last) return r, hidden