Exemple #1
0
 def conv(x, name):
     name = prefix + name
     return klayers.Conv2D(filter_size,
                           kernel_size,
                           name=name,
                           padding="same",
                           use_bias=False)(x)
Exemple #2
0
 def conv(x, step):
     name = prefix + "conv%s" % step
     x = klayers.Conv2D(filter_size,
                        kernel_size,
                        name=name,
                        padding="same",
                        use_bias=False)(x)
     return x
Exemple #3
0
    def block(x):
        x = klayers.Conv2D(*args,
                           name=name + '_conv2d',
                           use_bias=False,
                           **kwds)(x)

        if do_bn:
            x = bn(x, name + '_bn')

        return act(x, activation, name + '_' + activation)
Exemple #4
0
 def conv(x, name, kernel_size=None):
     name = prefix + name
     if kernel_size is None:
         kernel_size = cnn_kernel_size
     x = klayers.Conv2D(cnn_filter_size,
                        kernel_size,
                        name=name,
                        padding="same",
                        use_bias=False,
                        **kwds)(x)
     return x
Exemple #5
0
    def block(x):
        x = klayers.Conv2D(*args, name=conv_name, **kwds)(x)

        if do_bn:
            x = klayers.BatchNormalization(axis=get_bn_axis(), name=bn_name)(x)

        if activation == "leakyrelu":
            x = klayers.LeakyReLU(alpha=0.03, name=act_name)(x)
        else:
            x = klayers.Activation(activation, name=act_name)(x)
        return x
Exemple #6
0
    def block(tensor):
        x = klayers.Conv2D(*args, name=name_conv0, padding="same",
                           **kwds)(tensor)

        x = klayers.BatchNormalization(axis=get_bn_axis(), name=name_bn0)(x)

        if activation == "leakyrelu":
            x = klayers.LeakyReLU(alpha=0.03, name=name_act0)(x)
        else:
            x = klayers.Activation(activation, name=name_act0)(x)

        x = klayers.Conv2D(*args, name=name_conv1, padding="same", **kwds)(x)

        x = klayers.BatchNormalization(axis=get_bn_axis(), name=name_bn1)(x)

        x = klayers.add([tensor, x], name=name_add)
        if activation == "leakyrelu":
            x = klayers.LeakyReLU(alpha=0.03, name=name_act_after)(x)
        else:
            x = klayers.Activation(activation, name=name_act_after)(x)

        return x
Exemple #7
0
def get_network_model(conf, generation_descr):
    assert isinstance(conf, confs.NNModelConfig)

    activation = 'leakyrelu' if conf.leaky_relu else 'relu'

    # inputs:
    if is_channels_first():
        inputs_board = klayers.Input(shape=(conf.input_channels,
                                            conf.input_columns,
                                            conf.input_rows),
                                     name="inputs_board")
    else:
        inputs_board = klayers.Input(shape=(conf.input_columns,
                                            conf.input_rows,
                                            conf.input_channels),
                                     name="inputs_board")

    # XXX config abuse:
    v2 = conf.residual_layers <= 0
    if v2:
        layer = klayers.Conv2D(conf.cnn_filter_size,
                               1,
                               padding="same",
                               use_bias=False,
                               name='initial-conv')(inputs_board)

        # XXX hard coding dropout
        # XXX hard coding layers
        #for convs in [1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 2, 2, 2, 1, 1, 1, 1, 1, 1]:
        for i, c in enumerate(
            [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1]):
            layer = residual_block_v2(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      c,
                                      prefix="ResLayer_%s_" % i,
                                      dropout=0.3,
                                      activation=activation)(layer)

    else:
        # initial conv2d/Resnet on cords
        layer = conv2d_block(conf.cnn_filter_size,
                             conf.cnn_kernel_size,
                             activation=activation,
                             padding="same",
                             name='initial-conv')(inputs_board)

        # AG0 way:
        for i in range(conf.residual_layers):
            layer = residual_block_v1(conf.cnn_filter_size,
                                      conf.cnn_kernel_size,
                                      prefix="ResLayer_%s_" % i,
                                      activation=activation)(layer)

    # policy
    ########
    # similar to AG0, but with multiple policy heads
    assert conf.multiple_policies
    number_of_policies = conf.role_count
    assert number_of_policies == len(conf.policy_dist_count)

    policy_heads = []
    for idx, count in enumerate(conf.policy_dist_count):
        # residual net -> flattened for policy head
        # XXX 2, should be based on size of policy...
        to_flatten = conv2d_block(2,
                                  1,
                                  name='to_flatten_policy_head_%s' % idx,
                                  activation=activation,
                                  padding='valid')(layer)

        flat = klayers.Flatten()(to_flatten)

        # output: policy head(s)
        if conf.dropout_rate_policy > 0:
            flat = klayers.Dropout(conf.dropout_rate_policy)(flat)

        head = klayers.Dense(count,
                             name="policy_%d" % idx,
                             activation="softmax")(flat)

        policy_heads.append(head)

    # value
    #######
    # XXX config abuse:

    if generation_descr.draw_head:
        num_value_heads = 3
    else:
        num_value_heads = 2

    value_v3 = conf.value_hidden_size == 0
    value_v2 = conf.value_hidden_size < 0
    if value_v3:
        assert conf.input_columns == conf.input_rows
        average_layer = layer
        dims = conf.input_columns
        while dims >= 5:
            if dims % 2 == 1:
                average_layer = klayers.AveragePooling2D(4, 1)(average_layer)
                dims -= 3
            else:
                average_layer = klayers.AveragePooling2D(2, 2)(average_layer)
                dims /= 2

        assert dims < conf.input_columns

        to_flatten1 = conv2d_block(32,
                                   1,
                                   name='reward_flatten1',
                                   activation=activation,
                                   do_bn=False,
                                   padding='valid')(average_layer)

        to_flatten2 = conv2d_block(1,
                                   1,
                                   name='reward_flatten2',
                                   activation=activation,
                                   do_bn=False,
                                   padding='valid')(layer)

        flat = klayers.concatenate(
            [klayers.Flatten()(to_flatten1),
             klayers.Flatten()(to_flatten2)])

        if conf.dropout_rate_value > 0:
            flat = klayers.Dropout(conf.dropout_rate_value)(flat)

        hidden = klayers.Dense(256, name="value_hidden")(flat)
        hidden = act(hidden, 'crelu', name="value_hidden_act")

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(hidden)

    elif value_v2:
        assert conf.input_columns == conf.input_rows
        output_layer = layer
        dims = conf.input_columns
        while dims > 5:
            if dims % 2 == 1:
                output_layer = klayers.AveragePooling2D(4, 1)(output_layer)
                dims -= 3
            else:
                output_layer = klayers.AveragePooling2D(2, 2)(output_layer)
                dims /= 2

        # XXX 16 - hardcoded
        to_flatten = klayers.Conv2D(16,
                                    1,
                                    name='to_flatten_value_head',
                                    padding='valid',
                                    activation=activation)(output_layer)

        if conf.dropout_rate_value > 0:
            to_flatten = klayers.Dropout(conf.dropout_rate_value)(to_flatten)

        flat = klayers.Flatten()(to_flatten)

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(flat)

    else:
        # old way, as per AG0
        to_flatten = conv2d_block(1,
                                  1,
                                  name='to_flatten_value_head',
                                  padding='valid',
                                  activation=activation)(layer)
        flat = klayers.Flatten()(to_flatten)

        hidden = klayers.Dense(conf.value_hidden_size,
                               name="value_hidden_layer",
                               activation="relu")(flat)

        if conf.dropout_rate_value > 0:
            hidden = klayers.Dropout(conf.dropout_rate_value)(hidden)

        value_head = klayers.Dense(num_value_heads,
                                   activation="sigmoid",
                                   name="value")(hidden)

    # model:
    outputs = policy_heads + [value_head]

    model = keras_models.Model(inputs=[inputs_board], outputs=outputs)

    # add in weight decay?  XXX rename conf to reflect it is weight decay and use +ve value instead
    # of hard coded value.
    # XXX this hasn't been tested

    if conf.l2_regularisation:
        for layer in model.layers:
            # XXX To get global weight decay in keras regularizers have to be added to every layer
            # in the model. In my models these layers are batch normalization (beta/gamma
            # regularizer) and dense/convolutions (W_regularizer/b_regularizer) layers.

            if hasattr(layer, 'kernel_regularizer'):
                # XXX too much?  Is it doubled from paper?  XXX 5e-3 ?
                layer.kernel_regularizer = keras_regularizers.l2(1e-4)

    return model