Beispiel #1
0
    def __init__(self,
                 channels_init=64,
                 growth_rate=32,
                 n_layers=10,
                 bottleneck_factor=4,
                 dropout=0,
                 n_labels=1000,
                 channels_value_head=8,
                 channels_policy_head=16,
                 value_fc_size=256,
                 **kwargs):
        """
        Constructor
        :param channels_init: Number of channels for the first convolutional layer
        :param growth_rate: Number of channels which increase per layer
        :param n_layers: Number of layers
        :param bottleneck_factor: Bottleneck factor which determines how much more layers used for the 1x1 convolution
        :param dropout: Dropout factor, if 0% then no dropout will be used
        :param n_labels: Number of final labels to predict, here moves
        :param channels_value_head: Number of channels in the final value head
        :param channels_policy_head: Number of channels in the final policy head
        :param value_fc_size: Size of the fully connected layer in the value head
        :param kwargs: Optional additional arguments
        """

        super(DenseNet, self).__init__(**kwargs)

        with self.name_scope():
            self.features = nn.HybridSequential(prefix="")
            # add initial convolutional layer
            self.features.add(
                nn.Conv2D(channels_init,
                          kernel_size=3,
                          padding=1,
                          use_bias=False))

            # add dense blocks
            for layer_idx in range(n_layers):
                self.features.add(
                    _make_dense_block(n_layers, bottleneck_factor, growth_rate,
                                      dropout, layer_idx))

            # we need to add a batch-norm and activation because _make_dense_block() starts with them
            self.features.add(nn.BatchNorm())
            self.features.add(nn.Activation("relu"))

        # create the two heads which will be used in the hybrid fwd pass
        self.value_head = _ValueHeadAlphaZero("value", channels_value_head,
                                              value_fc_size, 0.9, "relu")
        self.policy_head = _PolicyHeadAlphaZero("policy", channels_policy_head,
                                                n_labels, 0.9, "relu")
    def __init__(self,
                 n_labels=2272,
                 channels=256,
                 channels_value_head=8,
                 channels_policy_head=16,
                 nb_res_blocks_x=7,
                 nb_shuffle_blocks=19,
                 nb_shuffle_blocks_neck=19,
                 value_fc_size=256,
                 bn_mom=0.9,
                 act_type="relu",
                 squeeze_excitation_type=None,
                 select_policy_from_plane=True,
                 use_rise_stem=False,
                 **kwargs):  # Too many local variables (22/15)
        """
        Creates the alpha zero gluon net description based on the given parameters.

        :param n_labels: Number of labels the for the policy
        :param channels: Used for all convolution operations. (Except the last 2)
        :param nb_res_blocks_x: Number of residual blocks to stack. In the paper they used 19 or 39 residual blocks
        :param value_fc_size: Fully Connected layer size. Used for the value output
        :param bn_mom: Batch normalization momentum
        :param squeeze_excitation_type: Available types: [None, "cSE", "sSE", "scSE", "mixed"]
                                        cSE: Channel-wise-squeeze-excitation
                                        sSE: Spatial-wise-squeeze-excitation
                                        scSE: Channel-spatial-wise-squeeze-excitation
                                        mixed: Use cSE and sSE interchangeably
        :return: gluon net description
        """

        super(ShuffleRise, self).__init__(**kwargs, prefix="")
        self.body = HybridSequential(prefix="")

        with self.name_scope():
            se_type = None

            if use_rise_stem:
                self.body.add(
                    _StemRise(name="stem",
                              channels=channels,
                              se_type=squeeze_excitation_type))
            else:
                self.body.add(
                    _StemAlphaZero(name="stem",
                                   channels=channels,
                                   bn_mom=bn_mom,
                                   act_type=act_type,
                                   se_type=se_type))

        for i in range(nb_res_blocks_x):
            unit_name = "res_unit%d" % i
            self.body.add(
                ResidualBlockX(unit_name,
                               channels=channels,
                               bn_mom=0.9,
                               act_type=act_type,
                               se_type=squeeze_excitation_type))

        for i in range(nb_shuffle_blocks):
            unit_name = "shuffle_unit%d" % i
            self.body.add(
                _ShuffleBlock(unit_name,
                              in_channels=channels,
                              se_type=squeeze_excitation_type,
                              act_type=act_type,
                              id=i))
            channels += 28

        for i in range(nb_shuffle_blocks_neck):
            unit_name = "shuffle_unit_neck_%d" % i
            self.body.add(
                _ShuffleBlockNeck(unit_name,
                                  nb_in_channels=channels,
                                  se_type=squeeze_excitation_type,
                                  act_type=act_type))

        se_type = None
        # create the two heads which will be used in the hybrid fwd pass
        self.value_head = _ValueHeadAlphaZero("value", channels_value_head,
                                              value_fc_size, bn_mom, act_type,
                                              se_type)
        self.policy_head = _PolicyHeadAlphaZero("policy", channels_policy_head,
                                                n_labels, bn_mom, act_type,
                                                se_type,
                                                select_policy_from_plane)
Beispiel #3
0
    def __init__(self,
                 nb_input_channels=34,
                 n_labels=2272,
                 channels=512,
                 channels_value_head=4,
                 channels_policy_head=8,
                 nb_res_blocks=6,
                 value_fc_size=512,
                 bn_mom=0.9,
                 act_type="relu",
                 use_se=True,
                 **kwargs):  # Too many local variables (22/15)
        """
        Creates the alpha zero gluon net description based on the given parameters.
        :param nb_input_channels: Number of input channels of the board representation (only needed for the first SE)
        :param n_labels: Number of labels the for the policy
        :param channels: Used for all convolution operations. (Except the last 2)
        :param nb_res_blocks_x: Number of residual blocks to stack. In the paper they used 19 or 39 residual blocks
        :param value_fc_size: Fully Connected layer size. Used for the value output
        :param bn_mom: Batch normalization momentum
        :return: gluon net description
        """

        super(WideResnetSE, self).__init__(**kwargs, prefix="")
        self.body = HybridSequential(prefix="")

        with self.name_scope():

            # activate squeeze excitation layers if needed
            if use_se:
                # use the combination of channel and spatial excitation because it's almost for free
                # with a low amount of channels
                se_type = "csSE"
            else:
                se_type = None

            # add the initial convolutional layer
            self.body.add(
                _StemAlphaZero(
                    name="stem",
                    channels=channels,
                    bn_mom=bn_mom,
                    act_type=act_type,
                    se_type=se_type,
                    nb_input_channels=nb_input_channels,
                ))

            for i in range(nb_res_blocks):
                unit_name = "unit%d" % i

                # add all the residual blocks
                self.body.add(
                    ResidualBlockX(
                        unit_name,
                        channels=channels,
                        bn_mom=0.9,
                        act_type=act_type,
                        se_type=
                        None,  # deactivate SE for all middle layers because to reduce computation cost
                    ))

            # create the two heads which will be used in the hybrid fwd pass
            self.value_head = _ValueHeadAlphaZero("value", channels_value_head,
                                                  value_fc_size, bn_mom,
                                                  act_type, se_type)
            self.policy_head = _PolicyHeadAlphaZero("policy",
                                                    channels_policy_head,
                                                    n_labels, bn_mom, act_type,
                                                    se_type)