예제 #1
0
def preact_residual_dmixconv_block(data, channels, channels_operating, name, kernels=None, act_type='relu', use_se=True):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :return: symbol
    """
    bn1 = mx.sym.BatchNorm(data=data, name=name + '_bn1')
    conv1 = mx.sym.Convolution(data=bn1, num_filter=channels_operating, kernel=(1, 1), pad=(0, 0), no_bias=True,
                               name=name + '_conv1')
    bn2 = mx.sym.BatchNorm(data=conv1, name=name + '_bn2')
    act1 = get_act(data=bn2, act_type=act_type, name=name + '_act1')
    conv2 = mix_conv(data=act1, channels=channels_operating, kernels=kernels, name=name + 'conv2')
    bn3 = mx.sym.BatchNorm(data=conv2, name=name + '_bn3')
    act2 = get_act(data=bn3, act_type=act_type, name=name + '_act2')
    out = mx.sym.Convolution(data=act2, num_filter=channels, kernel=(1, 1),
                               pad=(0, 0), no_bias=True, name=name + '_conv3')
    # out = mx.sym.BatchNorm(data=conv3, name=name + '_bn4')
    if use_se:
        out = channel_squeeze_excitation(out, channels, name=name + '_se', ratio=4, act_type=act_type,
                                         use_hard_sigmoid=True)
    out_sum = mx.sym.broadcast_add(data, out, name=name + '_add')

    return out_sum
예제 #2
0
def bottleneck_residual_block_v2(data,
                                 channels,
                                 channels_operating,
                                 name,
                                 kernel,
                                 act_type='relu',
                                 norm_type="bn",
                                 se_type=None):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :param se_ratio: Squeeze excitation ratio
    :param use_se: Boolean if a squeeze excitation module will be used
    :param se_type: Squeeze excitation module type. Available [None, "se", "cbam", "ca_se", "cm_se", "sa_se", "sm_se"]
    :return: symbol
    """
    if se_type:
        next_input = get_se_layer(data,
                                  channels,
                                  se_type,
                                  name=name + '_se',
                                  use_hard_sigmoid=True)
    else:
        next_input = data
    conv1 = mx.sym.Convolution(data=next_input,
                               num_filter=channels_operating,
                               kernel=(1, 1),
                               pad=(0, 0),
                               no_bias=True,
                               name=name + '_conv1')
    bn1 = get_norm_layer(data=conv1, norm_type=norm_type, name=name + '_bn1')
    act1 = get_act(data=bn1, act_type=act_type, name=name + '_act1')
    conv2 = mx.sym.Convolution(data=act1,
                               num_filter=channels_operating,
                               kernel=(kernel, kernel),
                               stride=(1, 1),
                               num_group=channels_operating,
                               pad=(kernel // 2, kernel // 2),
                               no_bias=True,
                               name=name + '_conv2')
    bn2 = get_norm_layer(data=conv2, norm_type=norm_type, name=name + '_bn2')
    act2 = get_act(data=bn2, act_type=act_type, name=name + '_act2')
    conv3 = mx.sym.Convolution(data=act2,
                               num_filter=channels,
                               kernel=(1, 1),
                               pad=(0, 0),
                               no_bias=True,
                               name=name + '_conv3')
    bn3 = get_norm_layer(data=conv3, norm_type=norm_type, name=name + '_bn3')
    sum_out = mx.sym.broadcast_add(bn3, data, name=name + '_add')
    return sum_out
예제 #3
0
def sandglass_block(data,
                    channels,
                    channels_reduced,
                    name,
                    kernel,
                    act_type='relu',
                    norm_type="bn",
                    se_type="eca_se"):
    """
    Rethinking Bottleneck Structure for EfficientMobile Network Design, D. Zhou and Q. Hou et al.
    """
    first_kernel = kernel
    conv1 = mx.sym.Convolution(data=data,
                               num_filter=channels,
                               kernel=(first_kernel, first_kernel),
                               pad=(first_kernel // 2, first_kernel // 2),
                               num_group=channels,
                               no_bias=True,
                               name=name + '_conv1')
    bn1 = get_norm_layer(data=conv1, norm_type=norm_type, name=name + '_bn1')
    act1 = get_act(data=bn1, act_type=act_type, name=name + '_act1')
    if se_type:
        next_input = get_se_layer(act1,
                                  channels,
                                  se_type,
                                  name=name + '_se',
                                  use_hard_sigmoid=True)
    else:
        next_input = act1
    conv2 = mx.sym.Convolution(data=next_input,
                               num_filter=channels_reduced,
                               kernel=(1, 1),
                               pad=(0, 0),
                               no_bias=False,
                               name=name + '_conv2')
    conv3 = mx.sym.Convolution(data=conv2,
                               num_filter=channels,
                               kernel=(1, 1),
                               pad=(0, 0),
                               no_bias=True,
                               name=name + '_conv3')
    bn2 = get_norm_layer(data=conv3, norm_type=norm_type, name=name + '_bn2')
    act2 = get_act(data=bn2, act_type=act_type, name=name + '_act2')
    last_kernel = 3
    conv4 = mx.sym.Convolution(data=act2,
                               num_filter=channels,
                               kernel=(last_kernel, last_kernel),
                               pad=(last_kernel // 2, last_kernel // 2),
                               num_group=channels,
                               no_bias=False,
                               name=name + '_conv4')
    sum_out = mx.sym.broadcast_add(conv4, data, name=name + '_add')
    return sum_out
예제 #4
0
def preact_residual_block(data, channels, name, kernel=3, act_type='relu'):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :return:
    """

    bn1 = mx.sym.BatchNorm(data=data, name=name + '_bn1')
    conv1 = mx.sym.Convolution(data=bn1,
                               num_filter=channels,
                               kernel=(kernel, kernel),
                               pad=(kernel // 2, kernel // 2),
                               num_group=1,
                               no_bias=True,
                               name=name + '_conv1')
    bn2 = mx.sym.BatchNorm(data=conv1, name=name + '_bn2')
    act1 = get_act(data=bn2, act_type=act_type, name=name + '_act1')
    conv2 = mx.sym.Convolution(data=act1,
                               num_filter=channels,
                               kernel=(kernel, kernel),
                               stride=(1, 1),
                               pad=(kernel // 2, kernel // 2),
                               no_bias=True,
                               name=name + '_conv2')
    sum = mx.sym.broadcast_add(data, conv2, name=name + '_add')

    return sum
예제 #5
0
def residual_block(data, channels, name, kernel=3, act_type='relu', use_se=False):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :param use_se: If true, a squeeze excitation will be used
    :return:
    """

    if use_se:
        se = channel_squeeze_excitation(data, channels, name=name + '_se', ratio=2, act_type=act_type)
        conv1 = mx.sym.Convolution(data=se, num_filter=channels, kernel=(kernel, kernel),
                                   pad=(kernel // 2, kernel // 2), num_group=1,
                                   no_bias=True, name=name + '_conv1')
    else:
        conv1 = mx.sym.Convolution(data=data, num_filter=channels, kernel=(kernel, kernel),
                                   pad=(kernel // 2, kernel // 2), num_group=1,
                                   no_bias=True, name=name + '_conv1')
    act1 = get_act(data=conv1, act_type=act_type, name=name + '_act1')
    bn1 = mx.sym.BatchNorm(data=act1, name=name + '_bn1')

    # kernel = 3
    conv2 = mx.sym.Convolution(data=bn1, num_filter=channels, kernel=(kernel, kernel), stride=(1, 1),
                               num_group=1, pad=(kernel // 2, kernel // 2), no_bias=True,
                               name=name + '_conv2')
    bn2 = mx.sym.BatchNorm(data=conv2, name=name + '_bn2')

    sum = mx.sym.broadcast_add(data, bn2, name=name + '_add')

    return sum
예제 #6
0
def bottleneck_residual_block(data, channels, channels_operating, name, kernel=3, act_type='relu', use_se=False,
                              data_variant=None):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param channels_operating: Number of filters used for 3x3, 5x5, 7x7,.. convolution
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :param use_se: If true, a squeeze excitation will be used
    :param data_variant: Data input which holds the current active variant information
    :return:
    """

    if data_variant is not None:
        first_input = mx.sym.Concat(*[data, data_variant], name=name + '_concat')
        add_channels = NB_CHANNELS_VARIANTS
    else:
        first_input = data
        add_channels = 0

    if use_se:
        se = channel_squeeze_excitation(first_input, channels+add_channels, name=name + '_se', ratio=2)
        conv1 = mx.sym.Convolution(data=se, num_filter=channels_operating, kernel=(1, 1), pad=(0, 0),
                                   no_bias=True, name=name + '_conv1')
    else:
        conv1 = mx.sym.Convolution(data=first_input, num_filter=channels_operating, kernel=(1, 1), pad=(0, 0),
                                   no_bias=True, name=name + '_conv1')
    bn1 = mx.sym.BatchNorm(data=conv1, name=name + '_bn1')
    act1 = get_act(data=bn1, act_type=act_type, name=name + '_act1')
    conv2 = mx.sym.Convolution(data=act1, num_filter=channels_operating, kernel=(kernel, kernel), stride=(1, 1),
                               num_group=channels_operating, pad=(kernel // 2, kernel // 2), no_bias=True,
                               name=name + '_conv2')
    bn2 = mx.sym.BatchNorm(data=conv2, name=name + '_bn2')
    act2 = get_act(data=bn2, act_type=act_type, name=name + '_act2')
    conv3 = mx.sym.Convolution(data=act2, num_filter=channels, kernel=(1, 1), pad=(0, 0),
                               no_bias=True, name=name + '_conv3')
    bn3 = mx.sym.BatchNorm(data=conv3, name=name + '_bn3')
    sum = mx.sym.broadcast_add(bn3, data, name=name+'_add')

    return sum
예제 #7
0
def preact_resnet_symbol(channels=256, channels_value_head=8,
                   channels_policy_head=81, value_fc_size=256, value_kernelsize=7, res_blocks=19, act_type='relu',
                   n_labels=4992, grad_scale_value=0.01, grad_scale_policy=0.99, select_policy_from_plane=True):
    """
    Creates the alpha zero model symbol based on the given parameters.

    :param channels: Used for all convolution operations. (Except the last 2)
    :param workspace: Parameter for convolution
    :param value_fc_size: Fully Connected layer size. Used for the value output
    :param num_res_blocks: Number of residual blocks to stack. In the paper they used 19 or 39 residual blocks
    :param bn_mom: batch normalization momentum
    :param act_type: Activation function which will be used for all intermediate layers
    :param n_labels: Number of labels the for the policy
    :param grad_scale_value: Constant scalar which the gradient for the value outputs are being scaled width.
                            (They used 1.0 for default and 0.01 in the supervised setting)
    :param grad_scale_policy: Constant scalar which the gradient for the policy outputs are being scaled width.
                            (They used 1.0 for default and 0.99 in the supervised setting)
    :return: mxnet symbol of the model
    """
    # get the input data
    data = mx.sym.Variable(name='data')

    body = get_stem(data=data, channels=channels, act_type=act_type)

    for idx in range(res_blocks):
        body = preact_residual_block(body, channels, name='res_block%d' % idx, kernel=3,
                              act_type=act_type)

    body = mx.sym.BatchNorm(data=body, name='stem_bn1')
    body = get_act(data=body, act_type=act_type, name='stem_act1')

    # for policy output
    policy_out = policy_head(data=body, channels=channels, act_type=act_type, channels_policy_head=channels_policy_head,
                             select_policy_from_plane=select_policy_from_plane, n_labels=n_labels,
                             grad_scale_policy=grad_scale_policy, use_se=False, no_bias=True)

    # for value output
    value_out = value_head(data=body, channels_value_head=channels_value_head, value_kernelsize=1, act_type=act_type,
                           value_fc_size=value_fc_size, grad_scale_value=grad_scale_value, use_se=False,
                           use_mix_conv=False)

    # group value_out and policy_out together
    sym = mx.symbol.Group([value_out, policy_out])

    return sym
예제 #8
0
def rise_mobile_v3_symbol(channels=256, channels_operating_init=128, channel_expansion=64, act_type='relu',
                          channels_value_head=32, channels_policy_head=81, value_fc_size=128, dropout_rate=0.15,
                          select_policy_from_plane=True, use_se=True, res_blocks=13, n_labels=4992):
    """
    RISEv3 architecture
    :param channels: Main number of channels
    :param channels_operating_init: Initial number of channels at the start of the net for the depthwise convolution
    :param channel_expansion: Number of channels to add after each residual block
    :param act_type: Activation type to use
    :param channels_value_head: Number of channels for the value head
    :param value_fc_size: Number of units in the fully connected layer of the value head
    :param channels_policy_head: Number of channels for the policy head
    :param dropout_rate: Droput factor to use. If 0, no dropout will be applied. Value must be in [0,1]
    :param select_policy_from_plane: True, if policy head type shall be used
    :param use_se: Indicates if a squeeze excitation layer shall be used
    :param res_blocks: Number of residual blocks
    :param n_labels: Number of policy target labels (used for select_policy_from_plane=False)
    :return: symbol
    """
    # get the input data
    data = mx.sym.Variable(name='data')

    data = get_stem(data=data, channels=channels, act_type=act_type)

    cur_channels = channels_operating_init

    kernels = [
        [3],  # 0
        [3],  # 1
        [3, 5],  # 2
        [3, 5],  # 3
        [3, 5, 7, 9],  # 4
        [3, 5],  # 5
        [3, 5],  # 6
        [3, 5],  # 7
        [3, 5],  # 8
        [3, 5],  # 9
        [3, 5],  # 10
        [3, 5],  # 11
        [3, 5],  # 12
    ]
    for idx in range(res_blocks):

        cur_kernels = kernels[idx]
        if idx == 4 or idx >= 9:
            use_se = True
        else:
            use_se = False
        data = preact_residual_dmixconv_block(data=data, channels=channels, channels_operating=cur_channels,
                                              kernels=cur_kernels, name='dconv_%d' % idx, use_se=use_se)
        cur_channels += channel_expansion
    # return data
    data = mx.sym.BatchNorm(data=data, name='stem_bn1')
    data = get_act(data=data, act_type=act_type, name='stem_act1')

    if dropout_rate != 0:
        data = mx.sym.Dropout(data, p=dropout_rate)

    value_out = value_head(data=data, act_type=act_type, use_se=use_se, channels_value_head=channels_value_head,
                           value_fc_size=value_fc_size, use_mix_conv=True)
    policy_out = policy_head(data=data, act_type=act_type, channels_policy_head=channels_policy_head, n_labels=n_labels,
                             select_policy_from_plane=select_policy_from_plane, use_se=False, channels=channels)
    # group value_out and policy_out together
    sym = mx.symbol.Group([value_out, policy_out])

    return sym
예제 #9
0
def preact_residual_dmixconv_block(data,
                                   channels,
                                   channels_operating,
                                   name,
                                   kernels=None,
                                   act_type='relu',
                                   se_ratio=4,
                                   se_type="se"):
    """
    Returns a residual block without any max pooling operation
    :param data: Input data
    :param channels: Number of filters for all CNN-layers
    :param name: Name for the residual block
    :param act_type: Activation function to use
    :param se_ratio: Squeeze excitation ratio
    :param use_se: Boolean if a squeeze excitation module will be used
    :param se_type: Squeeze excitation module type. Available [None, "se", "cbam", "ca_se", "cm_se", "sa_se", "sm_se"]
    :return: symbol
    """
    bn1 = mx.sym.BatchNorm(data=data, name=name + '_bn1')
    conv1 = mx.sym.Convolution(data=bn1,
                               num_filter=channels_operating,
                               kernel=(1, 1),
                               pad=(0, 0),
                               no_bias=True,
                               name=name + '_conv1')
    bn2 = mx.sym.BatchNorm(data=conv1, name=name + '_bn2')
    act1 = get_act(data=bn2, act_type=act_type, name=name + '_act1')
    conv2 = mix_conv(data=act1,
                     channels=channels_operating,
                     kernels=kernels,
                     name=name + 'conv2')
    bn3 = mx.sym.BatchNorm(data=conv2, name=name + '_bn3')
    out = get_act(data=bn3, act_type=act_type, name=name + '_act2')
    out = mx.sym.Convolution(data=out,
                             num_filter=channels,
                             kernel=(1, 1),
                             pad=(0, 0),
                             no_bias=True,
                             name=name + '_conv3')
    if se_type is not None:
        if se_type == "se":
            out = channel_squeeze_excitation(out,
                                             channels,
                                             name=name + '_se',
                                             ratio=se_ratio,
                                             act_type=act_type,
                                             use_hard_sigmoid=True)
        elif se_type == "cbam":
            out = convolution_block_attention_module(out,
                                                     channels,
                                                     name=name + '_se',
                                                     ratio=se_ratio,
                                                     act_type=act_type,
                                                     use_hard_sigmoid=True)
        elif se_type == "ca_se":
            out = ca_se(out,
                        channels,
                        name=name + '_ca_se',
                        ratio=se_ratio,
                        act_type=act_type,
                        use_hard_sigmoid=True)
        elif se_type == "cm_se":
            out = cm_se(out,
                        channels,
                        name=name + '_cm_se',
                        ratio=se_ratio,
                        act_type=act_type,
                        use_hard_sigmoid=True)
        elif se_type == "sa_se":
            out = sa_se(out, name=name + 'sa_se', use_hard_sigmoid=True)
        elif se_type == "sm_se":
            out = sm_se(out, name=name + 'sm_se', use_hard_sigmoid=True)
        else:
            raise Exception(f'Unsupported se_type "{se_type}"')
    out_sum = mx.sym.broadcast_add(data, out, name=name + '_add')

    return out_sum
예제 #10
0
def rise_mobile_v3_symbol(channels=256,
                          channels_operating_init=128,
                          channel_expansion=64,
                          act_type='relu',
                          channels_value_head=8,
                          channels_policy_head=81,
                          value_fc_size=256,
                          dropout_rate=0.15,
                          grad_scale_value=0.01,
                          grad_scale_policy=0.99,
                          select_policy_from_plane=True,
                          kernels=None,
                          n_labels=4992,
                          se_ratio=4,
                          se_types="se"):
    """
    RISEv3 architecture
    :param channels: Main number of channels
    :param channels_operating_init: Initial number of channels at the start of the net for the depthwise convolution
    :param channel_expansion: Number of channels to add after each residual block
    :param act_type: Activation type to use
    :param channels_value_head: Number of channels for the value head
    :param value_fc_size: Number of units in the fully connected layer of the value head
    :param channels_policy_head: Number of channels for the policy head
    :param dropout_rate: Droput factor to use. If 0, no dropout will be applied. Value must be in [0,1]
    :param grad_scale_value: Constant scalar which the gradient for the value outputs are being scaled width.
                            (0.01 is recommended for supervised learning with little data)
    :param grad_scale_policy: Constant scalar which the gradient for the policy outputs are being scaled width.
    :param select_policy_from_plane: True, if policy head type shall be used
    :param kernels: List of kernel sizes used for the residual blocks. The length of the list corresponds to the number
    of residual blocks.
    :param n_labels: Number of policy target labels (used for select_policy_from_plane=False)
    :param se_ratio: Reduction ration used in the squeeze excitation module
    :param se_types: List of squeeze exciation modules to use for each residual layer.
     The length of this list must be the same as len(kernels). Available types:
    - "se": Squeeze excitation block - Hu et al. - https://arxiv.org/abs/1709.01507
    - "cbam": Convolutional Block Attention Module (CBAM) - Woo et al. - https://arxiv.org/pdf/1807.06521.pdf
    - "ca_se": Same as "se"
    - "cm_se": Squeeze excitation with max operator
    - "sa_se": Spatial excitation with average operator
    - "sm_se": Spatial excitation with max operator
    :return: symbol
    """
    if len(kernels) != len(se_types):
        raise Exception(
            f'The length of "kernels": {len(kernels)} must be the same as'
            f' the length of "se_types": {len(se_types)}')

    valid_se_types = [None, "se", "cbam", "ca_se", "cm_se", "sa_se", "sm_se"]
    for se_type in se_types:
        if se_type not in valid_se_types:
            raise Exception(
                f"Unavailable se_type: {se_type}. Available se_types include {se_types}"
            )

    # get the input data
    data = mx.sym.Variable(name='data')

    data = get_stem(data=data, channels=channels, act_type=act_type)

    if kernels is None:
        kernels = [3] * 13

    cur_channels = channels_operating_init

    for idx, cur_kernels in enumerate(kernels):

        data = preact_residual_dmixconv_block(data=data,
                                              channels=channels,
                                              channels_operating=cur_channels,
                                              kernels=cur_kernels,
                                              name='dconv_%d' % idx,
                                              se_ratio=se_ratio,
                                              se_type=se_types[idx])
        cur_channels += channel_expansion

    data = mx.sym.BatchNorm(data=data, name='stem_bn1')
    data = get_act(data=data, act_type=act_type, name='stem_act1')

    if dropout_rate != 0:
        data = mx.sym.Dropout(data, p=dropout_rate)

    value_out = value_head(data=data,
                           act_type=act_type,
                           use_se=False,
                           channels_value_head=channels_value_head,
                           value_fc_size=value_fc_size,
                           use_mix_conv=False,
                           grad_scale_value=grad_scale_value)
    policy_out = policy_head(data=data,
                             act_type=act_type,
                             channels_policy_head=channels_policy_head,
                             n_labels=n_labels,
                             select_policy_from_plane=select_policy_from_plane,
                             use_se=False,
                             channels=channels,
                             grad_scale_policy=grad_scale_policy)
    # group value_out and policy_out together
    sym = mx.symbol.Group([value_out, policy_out])

    return sym