Esempio n. 1
0
    def build_network(self, scope_name):
        net_shape = [None] + [s for s in self.input_shape]
        print(net_shape)
        with tf.variable_scope(scope_name):
            self.is_train = tf.placeholder(tf.bool, name="is_train");
            self.global_step = tf.placeholder(tf.int32, name="global_step")

            # Input Layer
            self.network_inputs[scope_name] = tf.placeholder(tf.float32, shape=net_shape, name="inputs")

            # Conv Layers "Tower"
            conv = ops.conv_layer(self.network_inputs[scope_name], filters, kernel_size, stride, activation,
                                  "conv_first", useBatchNorm, drop_out, self.is_train, weight_initializer)
            for i in range(num_blocks):
                conv = ops.residual_conv_block(conv, filters, kernel_size, stride, activation, "conv" + str(i),
                                               useBatchNorm, drop_out, self.is_train, weight_initializer)

            # Policy and value heads
            # - Compute conv output size
            tower_conv_out_size = ops.conv_out_size(self.input_size, kernel_size, 1, stride)
            # TODO - manage correctly padding (if stride and/or filter size change)
            value_conv_out_size = ops.conv_out_size(tower_conv_out_size, v_kernel_size, 0, v_stride) * v_filters
            policy_conv_out_size = ops.conv_out_size(tower_conv_out_size, p_kernel_size, 0, p_stride) * p_filters

            # - Declare weights and biases
            weights = {
                'policy': tf.get_variable('w_policy', shape=[policy_conv_out_size, self.policy_size],
                                          initializer=tf.contrib.layers.xavier_initializer()),
                'value': tf.get_variable('w_value', shape=[value_conv_out_size, v_dense_size],
                                         initializer=weight_initializer),
                'value_out': tf.get_variable('w_value_out', shape=[v_dense_size, 1],
                                             initializer=tf.contrib.layers.xavier_initializer()
                                             # tf.truncated_normal_initializer()
                                             )
            }
            biases = {
                'policy': tf.get_variable('b_policy', shape=[self.policy_size],
                                          initializer=tf.constant_initializer(0.0)),
                'value': tf.get_variable('b_value', shape=[v_dense_size],
                                         initializer=tf.constant_initializer(0.0)),
                'value_out': tf.get_variable('b_value_out', shape=[1],
                                             initializer=tf.constant_initializer(0.0))
            }

            # - Policy head
            policy_conv = ops.conv_layer(conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv",
                                         useBatchNorm, drop_out, self.is_train, weight_initializer)
            policy_conv = tf.contrib.layers.flatten(policy_conv)
            self.policy_out = ops.basic_layer(policy_conv, weights["policy"], biases["policy"], tf.identity, False, 0.0,
                                              self.is_train)
            self.policy_out_prob = p_activation(self.policy_out)

            # - Value head
            value_conv = ops.conv_layer(conv, v_filters, v_kernel_size, v_stride, activation, "value_conv",
                                        useBatchNorm, drop_out, self.is_train, weight_initializer)
            value_conv = tf.contrib.layers.flatten(value_conv)
            value_out = ops.basic_layer(value_conv, weights["value"], biases["value"], activation, False, head_drop_out,
                                        self.is_train)
            self.value_out = ops.basic_layer(value_out, weights["value_out"], biases["value_out"], v_activation, False,
                                             0.0, self.is_train)
Esempio n. 2
0
    def build_network(self, scope_name):
        net_shape = [None] + [s for s in self.input_shape]
        print(net_shape)
        with tf.variable_scope(scope_name):
            self.is_train = tf.placeholder(tf.bool, name="is_train");
            self.global_step = tf.placeholder(tf.int32, name="global_step")

            # Input Layer
            self.network_inputs[scope_name] = tf.placeholder(tf.float32, shape=net_shape, name="inputs")

            # Conv Layers "Tower"
            conv = ops.conv_layer(self.network_inputs[scope_name], filters, kernel_size, stride, activation,
                                  "conv_first", useBatchNorm, drop_out, self.is_train, weight_initializer)
            for i in range(num_blocks):
                conv = ops.residual_conv_block(conv, filters, kernel_size, stride, activation, "conv" + str(i),
                                               useBatchNorm, drop_out, self.is_train, weight_initializer)

            # Policy and value heads
            # - Compute conv output size
            tower_conv_out_size = ops.conv_out_size(self.input_size, kernel_size, 1, stride)
            # TODO - manage correctly padding (if stride and/or filter size change)
            value_conv_out_size = ops.conv_out_size(tower_conv_out_size, v_kernel_size, 0, v_stride) * v_filters
            policy_conv_out_size = ops.conv_out_size(tower_conv_out_size, p_kernel_size, 0, p_stride) * p_filters

            # - Declare dense shape
            policy_shape = [policy_conv_out_size, self.policy_size]
            value_shape = [value_conv_out_size, v_dense_size]
            value_out_shape = [v_dense_size, 1]
            
            """policy_shape2 = [policy_conv_out_size, 512]
            policy_shape = [512, self.policy_size]"""

            # - Policy head
            policy_conv = ops.conv_layer(conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv",
                                         useBatchNorm, drop_out, self.is_train, weight_initializer)
            policy_conv = ops.conv_layer(policy_conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv2",
                                         useBatchNorm, drop_out, self.is_train, weight_initializer)                                         
            policy_conv = tf.contrib.layers.flatten(policy_conv)
            #policy_conv = ops.dense_layer(policy_conv, policy_shape2, activation, "policy2", False, head_drop_out, self.is_train)
            self.policy_out = ops.dense_layer(policy_conv, policy_shape, tf.identity, "policy", False, 0.0, self.is_train)
            self.policy_out_prob = p_activation(self.policy_out)

            # - Value head
            value_conv = ops.conv_layer(conv, v_filters, v_kernel_size, v_stride, activation, "value_conv",
                                        useBatchNorm, drop_out, self.is_train, weight_initializer)
            value_conv = tf.contrib.layers.flatten(value_conv)
            value_out = ops.dense_layer(value_conv, value_shape, activation, "value", False, head_drop_out, self.is_train,
                                        weight_initializer=weight_initializer)
            self.value_out = ops.dense_layer(value_out, value_out_shape, v_activation, "value_out", False, 0.0, self.is_train)