def __init__(self):
        super(DenseNet, self).__init__()
        with self.name_scope():
            self.features = nn.HybridSequential()
            self.features.add(
                nn.Conv2D(64, 3, padding=1, use_bias=False),
                nn.BatchNorm(),
                nn.Activation('relu'),
                # nn.MaxPool2D(pool_size=2, strides=2),
                nn.Conv2D(64, 2, strides=2, use_bias=False),
            )
            self.features.add(_make_dense_block(8, 4, 8, 0, 1))
            self.features.add(_make_transition(128, 2, 0, 0.2))

            self.features.add(_make_dense_block(8, 4, 8, 0, 2))
            self.features.add(_make_transition(192, (2, 1), (0, 1), 0.2))

            self.features.add(_make_dense_block(8, 4, 8, 0, 3))

            self.features.add(
                nn.BatchNorm(), nn.Activation('relu'),
                nn.Conv2D(512, 3, padding=0, use_bias=False), nn.BatchNorm(),
                nn.Activation('relu'),
                nn.Conv2D(1024, 2, padding=(0, 1), use_bias=False),
                nn.BatchNorm(), nn.Activation('relu'))
Exemple #2
0
    def __init__(self):
        super(DenseNet, self).__init__()
        with self.name_scope():
            self.features = nn.HybridSequential()
            self.features.add(
                nn.Conv2D(64, 5, padding=2, strides=2, use_bias=False))
            self.features.add(_make_dense_block(8, 4, 8, 0, 1))
            self.features.add(_make_transition(128, 2, 0, 0.2))

            self.features.add(_make_dense_block(8, 4, 8, 0, 2))
            self.features.add(_make_transition(128, (2, 1), 0, 0.2))

            self.features.add(_make_dense_block(8, 4, 8, 0, 3))

            self.features.add(nn.BatchNorm(), nn.Activation('relu'))
    def __init__(self,
                 channels_init=64,
                 growth_rate=32,
                 n_layers=10,
                 bottleneck_factor=4,
                 dropout=0,
                 n_labels=1000,
                 channels_value_head=8,
                 channels_policy_head=16,
                 value_fc_size=256,
                 **kwargs):
        """
        Constructor
        :param channels_init: Number of channels for the first convolutional layer
        :param growth_rate: Number of channels which increase per layer
        :param n_layers: Number of layers
        :param bottleneck_factor: Bottleneck factor which determines how much more layers used for the 1x1 convolution
        :param dropout: Dropout factor, if 0% then no dropout will be used
        :param n_labels: Number of final labels to predict, here moves
        :param channels_value_head: Number of channels in the final value head
        :param channels_policy_head: Number of channels in the final policy head
        :param value_fc_size: Size of the fully connected layer in the value head
        :param kwargs: Optional additional arguments
        """

        super(DenseNet, self).__init__(**kwargs)

        with self.name_scope():
            self.features = nn.HybridSequential(prefix="")
            # add initial convolutional layer
            self.features.add(
                nn.Conv2D(channels_init,
                          kernel_size=3,
                          padding=1,
                          use_bias=False))

            # add dense blocks
            for layer_idx in range(n_layers):
                self.features.add(
                    _make_dense_block(n_layers, bottleneck_factor, growth_rate,
                                      dropout, layer_idx))

            # we need to add a batch-norm and activation because _make_dense_block() starts with them
            self.features.add(nn.BatchNorm())
            self.features.add(nn.Activation("relu"))

        # create the two heads which will be used in the hybrid fwd pass
        self.value_head = _ValueHeadAlphaZero("value", channels_value_head,
                                              value_fc_size, 0.9, "relu")
        self.policy_head = _PolicyHeadAlphaZero("policy", channels_policy_head,
                                                n_labels, 0.9, "relu")