def modify(self, f, inputs):
        outputs = f.outputs[0]

        # Not end
        if len(outputs.function_references) == 0:
            return

        # Check fused bn block start
        if not self._block and self._is_fused_bn_block(f, inputs):
            self._block = True

        # Remove BatchNormalization
        if self._block and f.info.type_name == 'BatchNormalization':
            self._bn_args = f.info.args
            self._name = self.get_parameter_scope(inputs[0])
            return inputs[0]

        # Remove Add2
        if self._block and f.info.type_name == 'Add2':
            self._add2_input1 = inputs[1]
            return inputs[0]

        # Remove non linear function then connect fused bn
        if self._block and f.info.type_name in self._fct_set:
            f_non_linear = self._fct_set[f.info.type_name]
            h = PF.fused_batch_normalization(inputs[0],
                                             self._add2_input1,
                                             nonlinearity=f_non_linear,
                                             **self._bn_args,
                                             name='fused{}-{}'.format(
                                                 self._name, self._cnt))
            self._cnt += 1
            self._block = False
            return h
Beispiel #2
0
def test_FLOPsEstimator():
    x = nn.Variable((1, 3, 12, 12))
    y = PF.depthwise_convolution(x, kernel=(5, 5), with_bias=True)
    t = PF.fused_batch_normalization(y)
    z = F.relu6(F.sigmoid(PF.affine(t, (3, 3), base_axis=2) + 3))
    z = F.global_average_pooling(z)

    est = FLOPsEstimator()
    assert est.predict(z) == 17644
Beispiel #3
0
    def __call__(self, x):
        '''
        Defines a ResNet-like network according to the configuration specified.

        Args:
            x:
                A Variable object which has a shape with a format
                `NCHW` if `channel_last=False` else `NHWC`.

        Returns:
            * An output `Variable` of classification layer
            * Intermediate `Variable` outputs from input and output of each
              cell

        '''

        logger.debug(x.shape)

        # First convolution
        axes = [get_channel_axis(self.channel_last)]
        with nn.parameter_scope("conv1"):
            r = pf_convolution(x,
                               64, (7, 7),
                               stride=(2, 2),
                               channel_last=self.channel_last)
            r = PF.fused_batch_normalization(r,
                                             axes=axes,
                                             batch_stat=not self.test)
            mp_opts = dict(
                ignore_border=False) if self.max_pooling_ceil_border else dict(
                    pad=(1, 1))
            r = F.max_pooling(r, (3, 3), (2, 2),
                              channel_last=self.channel_last,
                              **mp_opts)
        hidden = {}
        hidden['r0'] = r
        logger.debug(r.shape)

        # Create cells each of which consists of blocks repeatedly applied
        cell_configs = self.get_cell_configurations(self.num_layers)
        for i, (counts, ochannels, strides) in enumerate(zip(*cell_configs)):
            with nn.parameter_scope("res{}".format(i + 1)):
                r = self.cell(r, ochannels, counts, (strides, ) * 2)
            hidden['r{}'.format(i + 1)] = r
            logger.debug(r.shape)

        # Global average pooling
        pool_shape = get_spatial_shape(r.shape, self.channel_last)
        r = F.average_pooling(r, pool_shape, channel_last=self.channel_last)

        # Final classification layer
        with nn.parameter_scope("fc"):
            r = pf_affine(r, self.num_classes, channel_last=self.channel_last)
        return r, hidden
Beispiel #4
0
 def bn(self, h, z=None, no_relu=False):
     axes = [get_channel_axis(self.channel_last)]
     if no_relu:
         h = PF.batch_normalization(h, axes=axes, batch_stat=not self.test)
         if z is None:
             return h
         return F.add2(z, h)
     return PF.fused_batch_normalization(h,
                                         z,
                                         axes=axes,
                                         batch_stat=not self.test)
Beispiel #5
0
def fbn_resblock(x,
                 maps,
                 kernel=(3, 3),
                 pad=(1, 1),
                 stride=(1, 1),
                 test=False,
                 name='fbn-convblock'):
    with nn.parameter_scope(name):
        h = PF.convolution(x,
                           maps,
                           kernel=kernel,
                           pad=pad,
                           stride=stride,
                           with_bias=False)
        h = PF.fused_batch_normalization(h, x, batch_stat=not test)
    return h
def test_pf_fused_batch_normalization_execution(g_rng, inshape, axes,
                                                decay_rate, eps, batch_stat,
                                                nonlinearity, output_stat,
                                                param_init, fix_parameters,
                                                with_z, rng):

    p_shape = _get_bn_parameter_shape(inshape, axes)

    if param_init:
        beta_init = np.ones(p_shape)
        gamma_init = np.ones(p_shape) * 2
        mean_init = np.ones(p_shape) * 0.5
        var_init = np.ones(p_shape) * 1.5
        param_init = dict(beta=beta_init,
                          gamma=gamma_init,
                          mean=mean_init,
                          var=var_init)
    rng = process_rng(rng)

    x = nn.Variable.from_numpy_array(g_rng.randn(*inshape))
    z = None
    if with_z:
        z = nn.Variable.from_numpy_array(g_rng.randn(*inshape))

    kw = {}
    insert_if_not_none(kw, 'z', z)
    insert_if_not_default(kw, 'axes', axes, [1])
    insert_if_not_default(kw, 'decay_rate', decay_rate, 0.9)
    insert_if_not_default(kw, 'eps', eps, 1e-5)
    insert_if_not_default(kw, 'batch_stat', batch_stat, True)
    insert_if_not_default(kw, 'nonlinearity', nonlinearity, 'relu')
    insert_if_not_default(kw, 'output_stat', output_stat, False)
    insert_if_not_default(kw, 'fix_parameters', fix_parameters, False)
    insert_if_not_none(kw, 'param_init', param_init)

    # Check creation
    y = PF.fused_batch_normalization(x, **kw)

    # Check parameter values before execution
    h = y[0] if output_stat else y
    if with_z:
        _, b, g, m, v, _ = h.parent.inputs
    else:
        _, b, g, m, v = h.parent.inputs
    if param_init:
        assert np.allclose(b.d, beta_init)
        assert np.allclose(g.d, gamma_init)
        assert np.allclose(m.d, mean_init)
        assert np.allclose(v.d, var_init)
    else:
        assert np.allclose(b.d, 0)
        assert np.allclose(g.d, 1)
        assert np.allclose(m.d, 0)
        assert np.allclose(v.d, 1)

    # Check execution
    if output_stat:
        forward_backward_all(*y)
    else:
        y.forward()
        # TODO: Enable when implemented
        if batch_stat:
            y.backward()

    # Check values
    # TODO

    # Check args
    assert h.parent.info.type_name == 'FusedBatchNormalization'
    args = h.parent.info.args
    assert args['axes'] == axes
    assert np.isclose(args['decay_rate'], decay_rate)
    assert np.isclose(args['eps'], eps)
    assert args['batch_stat'] == batch_stat
    assert args['nonlinearity'] == nonlinearity

    # Check created parameters
    assert h.parent.inputs[0] == x
    num_inputs = 5
    if with_z:
        num_inputs = 6
        h.parent.inputs[5] == z
    assert len(h.parent.inputs) == num_inputs
    assert len(nn.get_parameters()) == 2
    assert len(nn.get_parameters(grad_only=False)) == 4
    beta, gamma, mean, var = [
        nn.get_parameters(grad_only=False)['bn/' + name]
        for name in ['beta', 'gamma', 'mean', 'var']
    ]
    assert beta.shape == p_shape
    assert gamma.shape == p_shape
    assert mean.shape == p_shape
    assert var.shape == p_shape

    assert beta.need_grad
    assert gamma.need_grad
    assert not mean.need_grad
    assert not var.need_grad

    _, b, g, m, v = h.parent.inputs[:5]
    assert b.need_grad == (not fix_parameters)
    assert g.need_grad == (not fix_parameters)
    assert not m.need_grad
    assert not v.need_grad
Beispiel #7
0
 def bn(h, z=None):
     axes = [3 if channel_last else 1]
     return PF.fused_batch_normalization(h,
                                         z,
                                         axes=axes,
                                         batch_stat=not test)
Beispiel #8
0
def resnet_imagenet(x,
                    num_classes,
                    num_layers,
                    shortcut_type,
                    test,
                    tiny=False,
                    channel_last=False):
    """
    Args:
        x : Variable
        num_classes : Number of classes of outputs
        num_layers : Number of layers of ResNet chosen from (18, 34, 50, 101, 152)
        shortcut_type : 'c', 'b', ''
            'c' : Use Convolution anytime
            'b' : Use Convolution if numbers of channels of input
                  and output mismatch.
            '' : Use Identity mapping if channels match, otherwise zero padding.
        test : Construct net for testing.
        tiny (bool): Tiny imagenet mode. Input image must be (3, 56, 56).
        channel_last (bool):
            Channel dimmension comes at last in an input image. A.k.a NHWC order.
    """
    layers = {
        18: ((2, 2, 2, 2), basicblock, 1),
        34: ((3, 4, 6, 3), basicblock, 1),
        50: ((3, 4, 6, 3), bottleneck, 4),
        101: ((3, 4, 23, 3), bottleneck, 4),
        152: ((3, 8, 36, 3), bottleneck, 4)
    }

    counts, block, ocoef = layers[num_layers]
    logger.debug(x.shape)
    axes = [3 if channel_last else 1]
    with nn.parameter_scope("conv1"):
        stride = (1, 1) if tiny else (2, 2)
        r = pf_convolution(x,
                           64, (7, 7),
                           stride=stride,
                           channel_last=channel_last)
        r = PF.fused_batch_normalization(r, axes=axes, batch_stat=not test)
        r = F.max_pooling(r, (3, 3),
                          stride,
                          pad=(1, 1),
                          channel_last=channel_last)
    hidden = {}
    hidden['r0'] = r
    ochannels = [64, 128, 256, 512]
    strides = [1, 2, 2, 2]
    logger.debug(r.shape)
    for i in range(4):
        with nn.parameter_scope("res{}".format(i + 1)):
            r = layer(r,
                      block,
                      ochannels[i] * ocoef,
                      counts[i], (strides[i], strides[i]),
                      shortcut_type,
                      test,
                      channel_last=channel_last)
        hidden['r{}'.format(i + 1)] = r
        logger.debug(r.shape)
    pool_shape = r.shape[-2:]
    if channel_last:
        pool_shape = r.shape[1:3]
    r = F.average_pooling(r, pool_shape, channel_last=channel_last)
    with nn.parameter_scope("fc"):
        r = pf_affine(r, num_classes, channel_last=channel_last)
    return r, hidden