def build_network(self, scope_name): net_shape = [None] + [s for s in self.input_shape] print(net_shape) with tf.variable_scope(scope_name): self.is_train = tf.placeholder(tf.bool, name="is_train"); self.global_step = tf.placeholder(tf.int32, name="global_step") # Input Layer self.network_inputs[scope_name] = tf.placeholder(tf.float32, shape=net_shape, name="inputs") # Conv Layers "Tower" conv = ops.conv_layer(self.network_inputs[scope_name], filters, kernel_size, stride, activation, "conv_first", useBatchNorm, drop_out, self.is_train, weight_initializer) for i in range(num_blocks): conv = ops.residual_conv_block(conv, filters, kernel_size, stride, activation, "conv" + str(i), useBatchNorm, drop_out, self.is_train, weight_initializer) # Policy and value heads # - Compute conv output size tower_conv_out_size = ops.conv_out_size(self.input_size, kernel_size, 1, stride) # TODO - manage correctly padding (if stride and/or filter size change) value_conv_out_size = ops.conv_out_size(tower_conv_out_size, v_kernel_size, 0, v_stride) * v_filters policy_conv_out_size = ops.conv_out_size(tower_conv_out_size, p_kernel_size, 0, p_stride) * p_filters # - Declare weights and biases weights = { 'policy': tf.get_variable('w_policy', shape=[policy_conv_out_size, self.policy_size], initializer=tf.contrib.layers.xavier_initializer()), 'value': tf.get_variable('w_value', shape=[value_conv_out_size, v_dense_size], initializer=weight_initializer), 'value_out': tf.get_variable('w_value_out', shape=[v_dense_size, 1], initializer=tf.contrib.layers.xavier_initializer() # tf.truncated_normal_initializer() ) } biases = { 'policy': tf.get_variable('b_policy', shape=[self.policy_size], initializer=tf.constant_initializer(0.0)), 'value': tf.get_variable('b_value', shape=[v_dense_size], initializer=tf.constant_initializer(0.0)), 'value_out': tf.get_variable('b_value_out', shape=[1], initializer=tf.constant_initializer(0.0)) } # - Policy head policy_conv = ops.conv_layer(conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv", useBatchNorm, drop_out, self.is_train, weight_initializer) policy_conv = tf.contrib.layers.flatten(policy_conv) self.policy_out = ops.basic_layer(policy_conv, weights["policy"], biases["policy"], tf.identity, False, 0.0, self.is_train) self.policy_out_prob = p_activation(self.policy_out) # - Value head value_conv = ops.conv_layer(conv, v_filters, v_kernel_size, v_stride, activation, "value_conv", useBatchNorm, drop_out, self.is_train, weight_initializer) value_conv = tf.contrib.layers.flatten(value_conv) value_out = ops.basic_layer(value_conv, weights["value"], biases["value"], activation, False, head_drop_out, self.is_train) self.value_out = ops.basic_layer(value_out, weights["value_out"], biases["value_out"], v_activation, False, 0.0, self.is_train)
def build_network(self, scope_name): net_shape = [None] + [s for s in self.input_shape] print(net_shape) with tf.variable_scope(scope_name): self.is_train = tf.placeholder(tf.bool, name="is_train"); self.global_step = tf.placeholder(tf.int32, name="global_step") # Input Layer self.network_inputs[scope_name] = tf.placeholder(tf.float32, shape=net_shape, name="inputs") # Conv Layers "Tower" conv = ops.conv_layer(self.network_inputs[scope_name], filters, kernel_size, stride, activation, "conv_first", useBatchNorm, drop_out, self.is_train, weight_initializer) for i in range(num_blocks): conv = ops.residual_conv_block(conv, filters, kernel_size, stride, activation, "conv" + str(i), useBatchNorm, drop_out, self.is_train, weight_initializer) # Policy and value heads # - Compute conv output size tower_conv_out_size = ops.conv_out_size(self.input_size, kernel_size, 1, stride) # TODO - manage correctly padding (if stride and/or filter size change) value_conv_out_size = ops.conv_out_size(tower_conv_out_size, v_kernel_size, 0, v_stride) * v_filters policy_conv_out_size = ops.conv_out_size(tower_conv_out_size, p_kernel_size, 0, p_stride) * p_filters # - Declare dense shape policy_shape = [policy_conv_out_size, self.policy_size] value_shape = [value_conv_out_size, v_dense_size] value_out_shape = [v_dense_size, 1] """policy_shape2 = [policy_conv_out_size, 512] policy_shape = [512, self.policy_size]""" # - Policy head policy_conv = ops.conv_layer(conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv", useBatchNorm, drop_out, self.is_train, weight_initializer) policy_conv = ops.conv_layer(policy_conv, p_filters, p_kernel_size, p_stride, activation, "policy_conv2", useBatchNorm, drop_out, self.is_train, weight_initializer) policy_conv = tf.contrib.layers.flatten(policy_conv) #policy_conv = ops.dense_layer(policy_conv, policy_shape2, activation, "policy2", False, head_drop_out, self.is_train) self.policy_out = ops.dense_layer(policy_conv, policy_shape, tf.identity, "policy", False, 0.0, self.is_train) self.policy_out_prob = p_activation(self.policy_out) # - Value head value_conv = ops.conv_layer(conv, v_filters, v_kernel_size, v_stride, activation, "value_conv", useBatchNorm, drop_out, self.is_train, weight_initializer) value_conv = tf.contrib.layers.flatten(value_conv) value_out = ops.dense_layer(value_conv, value_shape, activation, "value", False, head_drop_out, self.is_train, weight_initializer=weight_initializer) self.value_out = ops.dense_layer(value_out, value_out_shape, v_activation, "value_out", False, 0.0, self.is_train)