def mpusim_fully_connected(inputs,
                            units,
                            activation=None,
                            use_bias=True,
                            kernel_initializer=None,
                            bias_initializer=tf.zeros_initializer(),
                            kernel_regularizer=None,
                            bias_regularizer=None,
                            activity_regularizer=None,
                            activations_datatype_size_byte=1,
                            weights_datatype_size_byte=1,
                            results_datatype_size_byte=4,
                            systolic_array_height=256,
                            systolic_array_width=256,
                            activation_fifo_depth=8,
                            accumulator_array_height=4096,
                            log_file_output_dir='.',
                            model_name='unnamed'):
    """
    A wrapper around `mpusim_fc`.
    One difference to maintain backward-compatibility:
    Default weight initializer is variance_scaling_initializer(2.0).
    Variable Names:
    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)  # deprecated
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')

    inputs = batch_flatten(inputs)
    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = mpusim_fc(units=units,
                            activation=activation,
                            use_bias=use_bias,
                            kernel_initializer=kernel_initializer,
                            bias_initializer=bias_initializer,
                            kernel_regularizer=kernel_regularizer,
                            bias_regularizer=bias_regularizer,
                            activity_regularizer=activity_regularizer,
                            activations_datatype_size_byte=activations_datatype_size_byte,
                            weights_datatype_size_byte=weights_datatype_size_byte,
                            results_datatype_size_byte=results_datatype_size_byte,
                            systolic_array_height=systolic_array_height,
                            systolic_array_width=systolic_array_width,
                            activation_fifo_depth=activation_fifo_depth,
                            accumulator_array_height=accumulator_array_height,
                            log_file_output_dir=log_file_output_dir,
                            model_name=model_name,
                            _reuse=tf.get_variable_scope().reuse)
        ret = layer.apply(inputs, scope=tf.get_variable_scope())
        ret = tf.identity(ret, name='output')

    ret.variables = VariableHolder(W=layer.kernel)
    
    if use_bias:
        ret.variables.b = layer.bias
    return ret
def FullyConnectedWithTrackedMults(x,
                                   out_dim,
                                   network_complexity=None,
                                   W_init=None,
                                   b_init=None,
                                   nl=tf.identity,
                                   use_bias=True):
    """
    Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor.
    It is an equivalent of `tf.layers.dense` except for naming conventions.

    Args:
        x (tf.Tensor): a tensor to be flattened except for the first dimension.
        out_dim (int): output dimension
        W_init: initializer for W. Defaults to `variance_scaling_initializer`.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function
        use_bias (bool): whether to use bias.

    Returns:
        tf.Tensor: a NC tensor named ``output`` with attribute `variables`.

    Variable Names:

    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """
    x = symbf.batch_flatten(x)

    if W_init is None:
        W_init = tf.contrib.layers.variance_scaling_initializer()
    if b_init is None:
        b_init = tf.constant_initializer()

    # if get_current_tower_context().is_main_training_tower:
    network_complexity['weights'] += out_dim * x.get_shape().as_list()[1]
    network_complexity['mults'] += out_dim * x.get_shape().as_list()[1]
    if use_bias:
        network_complexity['weights'] += out_dim

    W = tf.get_variable('W', (x.get_shape().as_list()[1], out_dim),
                        initializer=W_init)
    if use_bias:
        b = tf.get_variable('b', out_dim, initializer=W_init)

    product = tf.matmul(x, W)

    ret = nl(tf.nn.bias_add(product, b) if use_bias else product,
             name='output')
    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b = b

    return ret
Exemple #3
0
def Conv3D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1):
    """
    A wrapper around `tf.layers.Conv3D`.
    Some differences to maintain backward-compatibility:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if split == 1:
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv3D(
                filters,
                kernel_size,
                strides=strides,
                padding=padding,
                data_format=data_format,
                dilation_rate=dilation_rate,
                activation=activation,
                use_bias=use_bias,
                kernel_initializer=kernel_initializer,
                bias_initializer=bias_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
                activity_regularizer=activity_regularizer,
                _reuse=tf.get_variable_scope().reuse)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        # group conv implementation
        pass
    return ret
Exemple #4
0
def GrConv2D(x,
             out_channel,
             kernel_shape,
             padding='SAME',
             stride=1,
             dilation_rate=1,
             W_init=None,
             b_init=None,
             nl=tf.identity,
             split=1,
             use_bias=True,
             data_format='channels_last'):

    if data_format == 'NHWC' or data_format == 'channels_last':
        data_format = 'channels_last'
    elif data_format == 'NCHW' or data_format == 'channels_first':
        data_format = 'channels_first'
    else:
        print "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa unknown data format"
    in_shape = x.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[GrConv2D] Input cannot have unknown channel!"
    assert in_channel % split == 0
    assert out_channel % split == 0

    kernel_shape = shape2d(kernel_shape)
    padding = padding.upper()
    filter_shape = kernel_shape + [in_channel / split, out_channel]
    stride = shape2d(stride)

    if W_init is None:
        W_init = tf.contrib.layers.variance_scaling_initializer()
    if b_init is None:
        b_init = tf.constant_initializer()

    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = tf.layers.Conv2D(filters=out_channel,
                                 kernel_size=kernel_shape,
                                 strides=stride,
                                 padding=padding,
                                 data_format=data_format,
                                 dilation_rate=dilation_rate,
                                 activation=lambda x: nl(x, name='output'),
                                 use_bias=use_bias,
                                 kernel_initializer=W_init,
                                 bias_initializer=b_init,
                                 trainable=True)
        ret = layer.apply(x, scope=tf.get_variable_scope())

    ret.variables = VariableHolder(W=layer.kernel)
    if use_bias:
        ret.variables.b = layer.bias
    return ret
Exemple #5
0
def Conv3DTranspose(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1, 1),
        padding='same',
        data_format='channels_last',
        activation=None,
        use_bias=False,
        kernel_initializer=tf.contrib.layers.
    variance_scaling_initializer(
        2.0
    ),  #tf.contrib.layers.xavier_initializer(), #tf.initializers.variance_scaling(distribution='uniform'),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None):
    """
	A wrapper around `tf.layers.Conv2DTranspose`.
	Some differences to maintain backward-compatibility:
	1. Default kernel initializer is variance_scaling_initializer(2.0),.
	2. Default padding is 'same'
	Variable Names:
	* ``W``: weights
	* ``b``: bias
	"""

    with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
        layer = tf.layers.Conv3DTranspose(
            filters,
            kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer)
        ret = layer.apply(inputs, scope=tf.get_variable_scope())

    ret.variables = VariableHolder(W=layer.kernel)
    if use_bias:
        ret.variables.b = layer.bias
    return tf.identity(ret, name='output')
def RescaleActivationLayer(inputs, decay=0.9, bit_a=8):
    in_shape = inputs.get_shape().as_list()
    moving_max = tf.get_variable('activation_max/EMA', [in_shape[-1]],
                                 initializer=tf.constant_initializer(),
                                 trainable=False)
    moving_min = tf.get_variable('activation_min/EMA', [in_shape[-1]],
                                 initializer=tf.constant_initializer(),
                                 trainable=False)

    named_inputs = tf.identity(inputs, name='rescaling_input_activation')
    # xn = (named_inputs - moving_min) / tf.pow(tf.constant(2.0), log2(moving_max) - tf.constant(float(bit_a)))
    xn = (named_inputs -
          (moving_min + moving_max) / 2.0) / (moving_max - moving_min)
    named_xn = tf.identity(xn, name='rescaled_activation')
    named_xn = tf.Print(named_xn, [named_xn])

    ctx = get_current_tower_context()
    if ctx.is_main_training_tower:
        ret = update_ema(xn, moving_max, moving_min, decay)
    else:
        ret = tf.identity(xn, name='output')
    vh = ret.variables = VariableHolder(mean=moving_max, variance=moving_min)
    return ret
def Conv2DWithTrackedMults(x,
                           out_channel,
                           kernel_shape,
                           network_complexity=None,
                           padding='SAME',
                           stride=1,
                           W_init=None,
                           b_init=None,
                           nl=tf.identity,
                           split=1,
                           use_bias=True,
                           data_format='NHWC'):
    """
    2D convolution on 4D inputs.
    Args:
        x (tf.Tensor): a 4D tensor.
            Must have known number of channels, but can have other unknown dimensions.
        out_channel (int): number of output channel.
        kernel_shape: (h, w) tuple or a int.
        stride: (h, w) tuple or a int.
        padding (str): 'valid' or 'same'. Case insensitive.
        split (int): Split channels as used in Alexnet. Defaults to 1 (no split).
        W_init: initializer for W. Defaults to `variance_scaling_initializer`.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.
    Returns:
        tf.Tensor named ``output`` with attribute `variables`.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
    assert in_channel % split == 0
    assert out_channel % split == 0

    kernel_shape = shape2d(kernel_shape)
    padding = padding.upper()
    filter_shape = kernel_shape + [in_channel / split, out_channel]
    stride = shape4d(stride, data_format=data_format)

    if W_init is None:
        W_init = tf.contrib.layers.variance_scaling_initializer()
    if b_init is None:
        b_init = tf.constant_initializer()

    W = tf.get_variable('W', filter_shape, initializer=W_init)
    network_complexity['weights'] += filter_shape[0] * filter_shape[
        1] * filter_shape[2] * filter_shape[3]

    if use_bias:
        b = tf.get_variable('b', [out_channel], initializer=b_init)
        network_complexity['weights'] += out_channel

    assert split == 1
    xsh = x.get_shape().as_list()
    network_complexity['mults'] += xsh[1] * xsh[2] * filter_shape[
        0] * filter_shape[1] * filter_shape[2] * filter_shape[3] / (stride[2] *
                                                                    stride[2])
    conv = tf.nn.conv2d(x, W, stride, padding, data_format=data_format)

    ret = nl(
        tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv,
        name='output')
    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b = b

    return ret
def Conv3D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=tf.contrib.layers.variance_scaling_initializer(2.0),
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1):
    """
    A wrapper around `tf.layers.Conv2D`.
    Some differences to maintain backward-compatibility:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if split == 1:
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv3D(filters,
                                     kernel_size,
                                     strides=strides,
                                     padding=padding,
                                     data_format=data_format,
                                     dilation_rate=dilation_rate,
                                     activation=activation,
                                     use_bias=use_bias,
                                     kernel_initializer=kernel_initializer,
                                     bias_initializer=bias_initializer,
                                     kernel_regularizer=kernel_regularizer,
                                     bias_regularizer=bias_regularizer,
                                     activity_regularizer=activity_regularizer)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        # group conv implementation
        data_format = get_data_format(data_format, tfmode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv3D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv now!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == (1, 1) or get_tf_version_number(
        ) >= 1.5, 'TF>=1.5 required for group dilated conv'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel / split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = dict(data_format=data_format)
        if get_tf_version_number() >= 1.5:
            kwargs['dilations'] = shape4d(dilation_rate,
                                          data_format=data_format)

        W = tf.get_variable('W', filter_shape, initializer=kernel_initializer)

        if use_bias:
            b = tf.get_variable('b', [out_channel],
                                initializer=bias_initializer)

        inputs = tf.split(inputs, split, channel_axis)
        kernels = tf.split(W, split, 3)
        outputs = [
            tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs)
            for i, k in zip(inputs, kernels)
        ]
        conv = tf.concat(outputs, channel_axis)
        if activation is None:
            activation = tf.identity
        ret = activation(tf.nn.bias_add(conv, b, data_format=data_format)
                         if use_bias else conv,
                         name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Exemple #9
0
def mod_conv2d(x,
               y,
               fmaps,
               kernel,
               demodulate=True,
               gain=1,
               use_wscale=True,
               lrmul=1,
               fused_modconv=True,
               eps=1e-8,
               padding='SAME',
               name="mod_conv2d"):
    shape = x.get_shape().as_list()  # [n, h, w, c]
    cin = shape[-1]
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        # Get weight
        w = get_weight([kernel, kernel, cin, fmaps],
                       gain=gain,
                       use_wscale=use_wscale,
                       lrmul=lrmul,
                       name='W')
        ww = w[tf.newaxis]  # introduce minibatch dimension

        # Modulate
        s = get_bias(
            cin, base_std=0, use_wscale=use_wscale, lrmul=lrmul, name='bs') + 1
        vh = VariableHolder(W=w, bs=s)
        s = tf.tile(s[tf.newaxis],
                    [tf.shape(x)[0], 1])  # introduce minibatch dimension
        if y is not None:
            y_style, w_style = dense(y,
                                     cin,
                                     gain=gain,
                                     use_wscale=use_wscale,
                                     lrmul=lrmul)
            s = s + y_style
            vh.Ws = w_style
        ww = ww * tf.cast(s[:, tf.newaxis, tf.newaxis, :, tf.newaxis],
                          w.dtype)  # scale input feature maps

        # Demodulate
        if demodulate:
            d = tf.rsqrt(
                tf.reduce_sum(tf.square(ww), axis=[1, 2, 3], keepdims=True) +
                eps)  # scaling factor
            ww = ww * d

        # Reshape/scale input
        if fused_modconv:
            x = tf.reshape(tf.transpose(x, [0, 3, 1, 2]),
                           [1, -1, shape[1], shape[2]])  # [1, n*cin, h, w]
            w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]),
                           [kernel, kernel, cin, -1])  # [k, k, cin, n*cout]
            x = tf.nn.conv2d(x,
                             tf.cast(w, x.dtype),
                             data_format='NCHW',
                             strides=[1, 1, 1, 1],
                             padding=padding)
            out_shape = x.get_shape().as_list()
            x = tf.transpose(
                tf.reshape(x, [-1, fmaps, out_shape[2], out_shape[3]]),
                [0, 2, 3, 1])
        else:
            x = x * tf.cast(s[:, tf.newaxis, tf.newaxis, :], x.dtype)
            x = tf.nn.conv2d(x,
                             tf.cast(w, x.dtype),
                             data_format='NHWC',
                             strides=[1, 1, 1, 1],
                             padding=padding)
            if demodulate:
                x = x * tf.cast(tf.reshape(d, [-1, 1, 1, fmaps]), x.dtype)
        ret = tf.identity(x)
        ret.variables = vh
        return ret
def BatchNorm_SplitGPU(x, use_local_stat=None, decay=0.9, epsilon=1e-5,
              use_scale=True, use_bias=True,
              gamma_init=tf.constant_initializer(1.0), data_format='NHWC',
              internal_update=False, split_num = 1):
    """
    """
    print split_num
    if data_format == 'channels_last':
       data_format = 'NHWC'
    assert data_format == 'NHWC'
    shape = x.get_shape().as_list()
    ndims = len(shape)
    assert ndims in [2, 4]
    if ndims == 2:
        data_format = 'NHWC'
    if data_format == 'NCHW':
        n_out = shape[1]
    else:
        n_out = shape[-1]  # channel
    assert n_out is not None, "Input to BatchNorm cannot have unknown channels!"
    beta, gamma, moving_mean, moving_var = get_bn_variables(n_out, use_scale, use_bias, gamma_init)

    ctx = get_current_tower_context()
    if use_local_stat is None:
        use_local_stat = ctx.is_training
    use_local_stat = bool(use_local_stat)
    
    if use_local_stat:
        if ndims == 2:
            x = tf.reshape(x, [-1, 1, 1, n_out])    # fused_bn only takes 4D input
            # fused_bn has error using NCHW? (see #190)    
        
        inputs = tf.concat(tf.split(x, split_num, 0), -1) # N/S_n x H x W x C*S_n
        beta_, gamma_ = None, None
        beta_ = tf.reshape([beta]*split_num, [-1])
        gamma_ = tf.reshape([gamma]*split_num, [-1])
        xn, batch_mean, batch_var = tf.nn.fused_batch_norm(inputs, gamma_, beta_,epsilon=epsilon,is_training=True, data_format=data_format)
        xn = tf.concat(tf.split(xn, split_num, 3), 0)      
        
        """
        """
        # inputs = tf.concat(tf.split(x, split_num, 0), -1) # N/split_num x H x W x C*split_num
        # axis = [0, 1, 2]
        # batch_mean, batch_var = tf.nn.moments(inputs, axis) # C*split_num
        # beta_, gamma_ = None, None
        # beta_ = tf.reshape([beta]*split_num, [-1])
        # gamma_ = tf.reshape([gamma]*split_num, [-1])
        # xn = tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta_, gamma_, epsilon)
        # xn = tf.concat(tf.split(xn, split_num, 3), 0)

        if ndims == 2:
            xn = tf.squeeze(xn, [1, 2])
    else:
        if ctx.is_training:
            assert get_tf_version_number() < 1.4, \
                "Fine tuning a BatchNorm model with fixed statistics is only " \
                "supported after https://github.com/tensorflow/tensorflow/pull/12580 "
            if ctx.is_main_training_tower:  # only warn in first tower
                logger.warn("[BatchNorm] Using moving_mean/moving_variance in training.")
            # Using moving_mean/moving_variance in training, which means we
            # loaded a pre-trained BN and only fine-tuning the affine part.
            xn, batch_mean, batch_var = tf.nn.fused_batch_norm(
                x, gamma, beta,
                mean=moving_mean, variance=moving_var, epsilon=epsilon,
                data_format=data_format, is_training=False)
        else:
           
            if ndims == 4 and data_format == 'NCHW':
                [g, b, mm, mv] = [reshape_for_bn(_, ndims, n_out, data_format)
                                  for _ in [gamma, beta, moving_mean, moving_var]]
                xn = tf.nn.batch_normalization(x, mm, mv, b, g, epsilon)
                batch_mean = tf.concat([moving_mean] * split_num, 0)
                batch_var = tf.concat([moving_var] * split_num, 0)
            else:
                # avoid the reshape if possible (when channel is the last dimension)
                xn = tf.nn.batch_normalization(
                    x, moving_mean, moving_var, beta, gamma, epsilon)
                batch_mean = tf.concat([moving_mean] * split_num, 0)
                batch_var = tf.concat([moving_var] * split_num, 0)

    # maintain EMA only on one GPU is OK, even in replicated mode.
    # because training time doesn't use EMA
    if ctx.is_main_training_tower:
        add_model_variable(moving_mean)
        add_model_variable(moving_var)
    if ctx.is_main_training_tower and use_local_stat:
        # print (xn)
        ret = update_bn_ema(xn, batch_mean[:n_out], batch_var[:n_out], moving_mean, moving_var, decay, internal_update)
    else:
        ret = tf.identity(xn, name='output')
    ret = tf.identity(xn, name='output')

    vh = ret.variables = VariableHolder(mean=moving_mean, variance=moving_var)
    if use_scale:
        vh.gamma = gamma
    if use_bias:
        vh.beta = beta
    assert batch_mean is not None, 'batch_mean outputs is None'
    return ret
Exemple #11
0
def Conv(inputs,
         filters,
         kernel_size,
         strides=(1, 1),
         padding='same',
         data_format='channels_last',
         dilation_rate=(1, 1),
         activation=None,
         use_bias=True,
         kernel_initializer=None,
         bias_initializer=tf.zeros_initializer(),
         kernel_regularizer=None,
         bias_regularizer=None,
         activity_regularizer=None,
         split=1,
         norm=False):
    """
    Similar to `tf.layers.Conv2D`, but with some differences:
    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group convolution.
    Variable Names:
    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(
                2.0)  # deprecated
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(
                2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if True:
        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (
            1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel // split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = {"data_format": data_format}
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate,
                                          data_format=data_format)

        # matching input dtype (ex. tf.float16) since the default dtype of variable if tf.float32
        inputs_dtype = inputs.dtype
        W = tf.get_variable('parseweigth',
                            filter_shape,
                            dtype=inputs_dtype,
                            initializer=kernel_initializer)
        if norm:
            use_bias = False
            W = tf.reshape(W, kernel_shape + [4, in_channel // 4, out_channel])
            W = tf.nn.softmax(W, 2)
            W = tf.reshape(W, filter_shape)
        #dynamics = tf.reduce_mean(inputs, 0)
        #dynamics = tf.transpose(dynamics, [1,2,0])
        #dynamics = tf.image.resize_images(dynamics, kernel_shape)
        #dynamics = tf.expand_dims(dynamics, -1)
        #W = W  +  0.001 * dynamics #tf.random_normal(shape = tf.shape(W), mean = 0.0, stddev = 0.012, dtype = tf.float32)

        #W = W *tf.random_uniform(shape=W.get_shape().as_list(), minval=0., maxval=2.)

        if use_bias:
            b = tf.get_variable('parsebias', [out_channel],
                                dtype=inputs_dtype,
                                initializer=bias_initializer)

        if split == 1:
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            try:
                conv = tf.nn.conv2d(inputs, W, stride, padding.upper(),
                                    **kwargs)
            except ValueError:
                log_once(
                    "CUDNN group convolution support is only available with "
                    "https://github.com/tensorflow/tensorflow/pull/25818 . "
                    "Will fall back to a loop-based slow implementation instead!",
                    'warn')

        ret = tf.nn.bias_add(conv, b,
                             data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Exemple #12
0
def mpusim_depthwise_convolution2d(inputs,
                                   kernel_size,
                                   strides=(1, 1),
                                   padding='valid',
                                   depth_multiplier=1,
                                   data_format='channels_last',
                                   activation=None,
                                   use_bias=True,
                                   depthwise_initializer='glorot_uniform',
                                   bias_initializer='zeros',
                                   depthwise_regularizer=None,
                                   bias_regularizer=None,
                                   depthwise_constraint=None,
                                   bias_constraint=None,
                                   activations_datatype_size_byte=1,
                                   weights_datatype_size_byte=1,
                                   results_datatype_size_byte=4,
                                   systolic_array_height=256,
                                   systolic_array_width=256,
                                   activation_fifo_depth=8,
                                   accumulator_array_height=4096,
                                   log_file_output_dir='.',
                                   model_name='unnamed'):

    #depthwise_initializer = initializers.get(depthwise_initializer)
    #depthwise_regularizer = regularizers.get(depthwise_regularizer)
    #depthwise_constraint = constraints.get(depthwise_constraint)
    #bias_initializer = initializers.get(bias_initializer)

    data_format = get_data_format(data_format, keras_mode=False)
    input_shape = inputs.get_shape().as_list()

    strides = shape4d(strides, data_format=data_format)

    if len(input_shape) < 4:
        raise ValueError(
            'Inputs to `mpusim_depthwise_conv2d` should have rank 4. '
            'Received input shape:', str(input_shape))

    if data_format == 'NCHW':
        raise ValueError('mpusim_depthwise_convolution2d '
                         'only supports NHWC data format')
    else:
        channel_axis = 3

    if input_shape[channel_axis] is None:
        raise ValueError('The channel dimension of the inputs to '
                         '`mpusim_depthwise_convolution2d` '
                         'should be defined. Found `None`.')

    input_dim = int(input_shape[channel_axis])

    depthwise_kernel_shape = (kernel_size[0], kernel_size[1], input_dim,
                              depth_multiplier)

    depthwise_kernel = tf.get_variable('W',
                                       shape=depthwise_kernel_shape,
                                       initializer=depthwise_initializer,
                                       regularizer=depthwise_regularizer,
                                       constraint=depthwise_constraint)

    if use_bias:
        biases = tf.get_variable('b',
                                 shape=(input_dim * depth_multiplier, ),
                                 initializer=bias_initializer,
                                 regularizer=bias_regularizer,
                                 constraint=bias_constraint)

    result = mpusim_depthwise_conv2d(
        inputs,
        depthwise_kernel,
        strides=strides,
        padding=padding,
        data_format=data_format,
        activations_datatype_size_byte=activations_datatype_size_byte,
        weights_datatype_size_byte=weights_datatype_size_byte,
        results_datatype_size_byte=results_datatype_size_byte,
        systolic_array_height=systolic_array_height,
        systolic_array_width=systolic_array_width,
        activation_fifo_depth=activation_fifo_depth,
        accumulator_array_height=accumulator_array_height,
        log_file_output_dir=log_file_output_dir,
        model_name=model_name)

    if use_bias:
        result = tf.nn.bias_add(result, bias, data_format=data_format)

    if activation is not None:
        result = activation(result)

    result = tf.identity(result, name='output')

    result.variables = VariableHolder(W=depthwise_kernel)

    if use_bias:
        result.variables.b = biases

    return result
def MaskedConv2D(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1,
        masking=False):
    """
    A wrapper around `tf.layers.Conv2D`.
    Some differences to maintain backward-compatibility:

    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group conv.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    if (masking == False) and (split == 1) and (dilation_rate == [1, 1]):
        # tf.layers.Conv2D has bugs with dilations (https://github.com/tensorflow/tensorflow/issues/26797)
        with rename_get_variable({'kernel': 'W', 'bias': 'b'}):
            layer = tf.layers.Conv2D(
                filters,
                kernel_size,
                strides=strides,
                padding=padding,
                data_format=data_format,
                dilation_rate=dilation_rate,
                activation=activation,
                use_bias=use_bias,
                kernel_initializer=kernel_initializer,
                bias_initializer=bias_initializer,
                kernel_regularizer=kernel_regularizer,
                bias_regularizer=bias_regularizer,
                activity_regularizer=activity_regularizer,
                _reuse=tf.get_variable_scope().reuse)
            ret = layer.apply(inputs, scope=tf.get_variable_scope())
            ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=layer.kernel)
        if use_bias:
            ret.variables.b = layer.bias

    else:
        if masking == True:
            assert split == 1, "Pruining group conv is not supported yet"

        # group conv implementation
        data_format = get_data_format(data_format, keras_mode=False)
        in_shape = inputs.get_shape().as_list()
        channel_axis = 3 if data_format == 'NHWC' else 1
        in_channel = in_shape[channel_axis]
        assert in_channel is not None, "[Conv2D] Input cannot have unknown channel!"
        assert in_channel % split == 0

        assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
            "Not supported by group conv or dilated conv!"

        out_channel = filters
        assert out_channel % split == 0
        assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'

        kernel_shape = shape2d(kernel_size)
        filter_shape = kernel_shape + [in_channel / split, out_channel]
        stride = shape4d(strides, data_format=data_format)

        kwargs = dict(data_format=data_format)
        if get_tf_version_tuple() >= (1, 5):
            kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)

        W = tf.get_variable(
            'W', filter_shape, initializer=kernel_initializer)

        if use_bias:
            b = tf.get_variable('b', [out_channel], initializer=bias_initializer)

        if split == 1:
            if masking:
                W = pruning.apply_mask(W)
            conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
        else:
            conv = None
            if get_tf_version_tuple() >= (1, 13):
                try:
                    conv = tf.nn.conv2d(inputs, W, stride, padding.upper(), **kwargs)
                except ValueError:
                    log_once("CUDNN group convolution support is only available with "
                             "https://github.com/tensorflow/tensorflow/pull/25818 . "
                             "Will fall back to a loop-based slow implementation instead!", 'warn')
            if conv is None:
                inputs = tf.split(inputs, split, channel_axis)
                kernels = tf.split(W, split, 3)
                outputs = [tf.nn.conv2d(i, k, stride, padding.upper(), **kwargs)
                           for i, k in zip(inputs, kernels)]
                conv = tf.concat(outputs, channel_axis)

        ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv
        if activation is not None:
            ret = activation(ret)
        ret = tf.identity(ret, name='output')

        ret.variables = VariableHolder(W=W)
        if use_bias:
            ret.variables.b = b
    return ret
Exemple #14
0
def mpusim_conv2d(
        inputs,
        filters,
        kernel_size,
        strides=(1, 1),
        padding='same',
        data_format='channels_last',
        dilation_rate=(1, 1),
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=tf.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        split=1,
        activations_datatype_size_byte=1,
        weights_datatype_size_byte=1,
        results_datatype_size_byte=4,
        systolic_array_height=256,
        systolic_array_width=256,
        activation_fifo_depth=8,
        accumulator_array_height=4096,
        log_file_output_dir='.',
        model_name='unnamed'):
    """
    Similar to `tf.layers.Conv2D`, but with some differences:

    1. Default kernel initializer is variance_scaling_initializer(2.0).
    2. Default padding is 'same'.
    3. Support 'split' argument to do group convolution.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    if kernel_initializer is None:
        if get_tf_version_tuple() <= (1, 12):
            kernel_initializer = tf.contrib.layers.variance_scaling_initializer(2.0)
        else:
            kernel_initializer = tf.keras.initializers.VarianceScaling(2.0, distribution='untruncated_normal')
    dilation_rate = shape2d(dilation_rate)

    # group conv implementation
    data_format = get_data_format(data_format, keras_mode=False)
    in_shape = inputs.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[mpusim_conv2d] Input cannot have unknown channel!"
    assert in_channel % split == 0

    assert kernel_regularizer is None and bias_regularizer is None and activity_regularizer is None, \
        "Not supported by group conv or dilated conv!"

    out_channel = filters
    assert out_channel % split == 0
    assert dilation_rate == [1, 1] or get_tf_version_tuple() >= (1, 5), 'TF>=1.5 required for dilated conv.'

    kernel_shape = shape2d(kernel_size)
    filter_shape = kernel_shape + [in_channel / split, out_channel]
    stride = shape4d(strides, data_format=data_format)

    kwargs = dict(data_format=data_format)
    if get_tf_version_tuple() >= (1, 5):
        kwargs['dilations'] = shape4d(dilation_rate, data_format=data_format)

    W = tf.get_variable(
            'W', filter_shape, initializer=kernel_initializer)

    if use_bias:
        b = tf.get_variable('b', [out_channel], initializer=bias_initializer)

    if split == 1:
        conv = mpu_sim_conv2d_lib.mpu_sim_conv2d(inputs,
                                                    W,
                                                    activations_datatype_size_byte,
                                                    weights_datatype_size_byte,
                                                    results_datatype_size_byte,
                                                    systolic_array_height,
                                                    systolic_array_width,
                                                    activation_fifo_depth,
                                                    accumulator_array_height,
                                                    log_file_output_dir,
                                                    model_name,
                                                    stride,
                                                    padding.upper(),
                                                    **kwargs)
    else:
        
        inputs = tf.split(inputs, split, channel_axis)
        kernels = tf.split(W, split, 3)
        outputs = [mpu_sim_conv2d_lib.mpu_sim_conv2d(input_block,
                                                        kernel_block,
                                                        activations_datatype_size_byte,
                                                        weights_datatype_size_byte,
                                                        results_datatype_size_byte,
                                                        systolic_array_height,
                                                        systolic_array_width,
                                                        activation_fifo_depth,
                                                        accumulator_array_height,
                                                        log_file_output_dir,
                                                        model_name,
                                                        stride,
                                                        padding.upper(),
                                                        **kwargs)
                    for input_block, kernel_block in zip(inputs, kernels)]
        conv = tf.concat(outputs, channel_axis)

    ret = tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv
    if activation is not None:
        ret = activation(ret)
    ret = tf.identity(ret, name='output')

    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b=b
    return ret
Exemple #15
0
def BatchNorm(inputs,
              axis=None,
              training=None,
              momentum=0.9,
              epsilon=1e-5,
              center=True,
              scale=True,
              beta_initializer=tf.zeros_initializer(),
              gamma_initializer=tf.ones_initializer(),
              virtual_batch_size=None,
              internal_update=False):
    """
    Mostly equivalent to `tf.layers.batch_normalization`, but different in
    the following:

    1. Accepts `data_format` when `axis` is None. For 2D input, this argument will be ignored.
    2. Default value for `momentum` and `epsilon` is different.
    3. Default value for `training` is automatically obtained from `TowerContext`.
    4. Support the `internal_update` option.

    Args:
        internal_update (bool): if False, add EMA update ops to
            `tf.GraphKeys.UPDATE_OPS`. If True, update EMA inside the layer
            by control dependencies.

    Variable Names:

    * ``beta``: the bias term. Will be zero-inited by default.
    * ``gamma``: the scale term. Will be one-inited by default. Input will be transformed by ``x * gamma + beta``.
    * ``mean/EMA``: the moving average of mean.
    * ``variance/EMA``: the moving average of variance.

    Note:
        1. About multi-GPU training: moving averages across GPUs are not aggregated.
           Batch statistics are computed independently.  This is consistent with most frameworks.
        2. Combinations of ``training`` and ``ctx.is_training``:
            * ``training == ctx.is_training``: standard BN, EMA are
                maintained during training and used during inference. This is
                the default.
            * ``training and not ctx.is_training``: still use batch statistics in inference.
            * ``not training and ctx.is_training``: use EMA to normalize in
                training. This is useful when you load a pre-trained BN and
                don't want to fine tune the EMA. EMA will not be updated in
                this case.
    """
    # parse shapes
    shape = inputs.get_shape().as_list()
    ndims = len(shape)

    assert axis is not None

    # parse training/ctx
    ctx = get_current_tower_context()
    if training is None:
        training = ctx.is_training
    training = bool(training)
    TF_version = get_tf_version_number()
    if not training and ctx.is_training:
        assert TF_version >= 1.4, \
            "Fine tuning a BatchNorm model with fixed statistics is only " \
            "supported after https://github.com/tensorflow/tensorflow/pull/12580 "
        if ctx.is_main_training_tower:  # only warn in first tower
            logger.warn(
                "[BatchNorm] Using moving_mean/moving_variance in training.")
        # Using moving_mean/moving_variance in training, which means we
        # loaded a pre-trained BN and only fine-tuning the affine part.

    coll_bk = backup_collection([tf.GraphKeys.UPDATE_OPS])
    with rename_get_variable({
            'moving_mean': 'mean/EMA',
            'moving_variance': 'variance/EMA'
    }):
        if TF_version >= 1.5:
            layer = tf.layers.BatchNormalization(
                axis=axis,
                momentum=momentum,
                epsilon=epsilon,
                center=center,
                scale=scale,
                beta_initializer=beta_initializer,
                gamma_initializer=gamma_initializer,
                virtual_batch_size=virtual_batch_size,
                fused=True,
                _reuse=tf.get_variable_scope().reuse)
        else:
            assert virtual_batch_size is None, "Feature not supported in this version of TF!"
            layer = tf.layers.BatchNormalization(
                axis=axis,
                momentum=momentum,
                epsilon=epsilon,
                center=center,
                scale=scale,
                beta_initializer=beta_initializer,
                gamma_initializer=gamma_initializer,
                fused=True,
                _reuse=tf.get_variable_scope().reuse)
        xn = layer.apply(inputs,
                         training=training,
                         scope=tf.get_variable_scope())

    # maintain EMA only on one GPU is OK, even in replicated mode.
    # because training time doesn't use EMA
    if ctx.is_main_training_tower:
        for v in layer.non_trainable_variables:
            add_model_variable(v)
    if not ctx.is_main_training_tower or internal_update:
        restore_collection(coll_bk)

    if training and internal_update:
        assert layer.updates
        with tf.control_dependencies(layer.updates):
            ret = tf.identity(xn, name='output')
    else:
        ret = tf.identity(xn, name='output')

    vh = ret.variables = VariableHolder(
        moving_mean=layer.moving_mean,
        mean=layer.moving_mean,  # for backward-compatibility
        moving_variance=layer.moving_variance,
        variance=layer.moving_variance)  # for backward-compatibility
    if scale:
        vh.gamma = layer.gamma
    if center:
        vh.beta = layer.beta
    return ret
Exemple #16
0
def AtrousConv2D(x,
                 out_channel,
                 kernel_shape,
                 padding='SAME',
                 rate=1,
                 W_init=None,
                 b_init=None,
                 nl=tf.identity,
                 use_bias=False,
                 data_format='NHWC'):
    """
    2D AtrousConvolution on 4D inputs.

    Args:
        x (tf.Tensor): a 4D tensor.
            Must have known number of channels, but can have other unknown dimensions.
        out_channel (int): number of output channel.
        kernel_shape: (h, w) tuple or a int.
        stride: (h, w) tuple or a int.
        rate: A positive int32, In the literature, the same parameter is sometimes called input stride or dilation.
        padding (str): 'valid' or 'same'. Case insensitive.
        W_init: initializer for W. Defaults to `variance_scaling_initializer`.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.

    Returns:
        tf.Tensor named ``output`` with attribute `variables`.

    Variable Names:

    * ``W``: weights
    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()
    channel_axis = 3 if data_format == 'NHWC' else 1
    in_channel = in_shape[channel_axis]
    assert in_channel is not None, "[AtrousConv2D] Input cannot have unknown channel!"

    kernel_shape = shape2d(kernel_shape)
    padding = padding.upper()
    filter_shape = kernel_shape + [in_channel, out_channel]

    if W_init is None:
        W_init = tf.contrib.layers.variance_scaling_initializer()
    if b_init is None:
        b_init = tf.constant_initializer()

    W = tf.get_variable('W', filter_shape, initializer=W_init)

    if use_bias:
        b = tf.get_variable('b', [out_channel], initializer=b_init)

    conv = tf.nn.atrous_conv2d(x, W, rate, padding)

    ret = nl(
        tf.nn.bias_add(conv, b, data_format=data_format) if use_bias else conv,
        name='output')
    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b = b
    return ret