def build_arch_baseline(input, is_train: bool, num_classes: int): bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=32, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1') tf.logging.info('output shape: {}'.format(output.get_shape())) with tf.variable_scope('relu_conv2') as scope: output = slim.conv2d(output, num_outputs=64, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2') tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.flatten(output) output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu) tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.dropout(output, 0.5, scope='dp') output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None) tf.logging.info('output shape: {}'.format(output.get_shape())) return output
def create_inner_block( incoming, scope, nonlinearity=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(1e-3), bias_initializer=tf.zeros_initializer(), regularizer=None, increase_dim=False, summarize_activations=True): n = incoming.get_shape().as_list()[-1] stride = 1 if increase_dim: n *= 2 stride = 2 incoming = slim.conv2d( incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME", normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer, biases_initializer=bias_initializer, weights_regularizer=regularizer, scope=scope + "/1") if summarize_activations: tf.summary.histogram(incoming.name + "/activations", incoming) incoming = slim.dropout(incoming, keep_prob=0.6) incoming = slim.conv2d( incoming, n, [3, 3], 1, activation_fn=None, padding="SAME", normalizer_fn=None, weights_initializer=weights_initializer, biases_initializer=bias_initializer, weights_regularizer=regularizer, scope=scope + "/2") return incoming
def localization_squeezenet(self, inputs): with tf.variable_scope('localization_network'): with slim.arg_scope([slim.conv2d], activation_fn = tf.nn.relu, padding = 'SAME', weights_initializer = tf.constant_initializer(0.0)): conv1 = slim.conv2d(inputs, 64, [3,3], 2, padding = 'VALID', scope='conv1') pool1 = slim.max_pool2d(conv1, [2,2], 2, scope='pool1') fire2 = self.fire_module(pool1, 16, 64, scope = 'fire2') fire3 = self.fire_module(fire2, 16, 64, scope = 'fire3', res_connection=True) fire4 = self.fire_module(fire3, 32, 128, scope = 'fire4') pool4 = slim.max_pool2d(fire4, [2,2], 2, scope='pool4') fire5 = self.fire_module(pool4, 32, 128, scope = 'fire5', res_connection=True) fire6 = self.fire_module(fire5, 48, 192, scope = 'fire6') fire7 = self.fire_module(fire6, 48, 192, scope = 'fire7', res_connection=True) fire8 = self.fire_module(fire7, 64, 256, scope = 'fire8') pool8 = slim.max_pool2d(fire8, [2,2], 2, scope='pool8') fire9 = self.fire_module(pool8, 64, 256, scope = 'fire9', res_connection=True) conv10 = slim.conv2d(fire9, 128, [1,1], 1, scope='conv10') shape = int(np.prod(conv10.get_shape()[1:])) identity = np.array([[1., 0., 0.], [0., 1., 0.]]) identity = identity.flatten() fc11 = slim.fully_connected(tf.reshape(conv10, [-1, shape]), 6, biases_initializer = tf.constant_initializer(identity), scope='fc11') return fc11
def content_extractor(self, images, reuse=False): # images: (batch, 32, 32, 3) or (batch, 32, 32, 1) if images.get_shape()[3] == 1: # For mnist dataset, replicate the gray scale image 3 times. images = tf.image.grayscale_to_rgb(images) with tf.variable_scope('content_extractor', reuse=reuse): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train' or self.mode=='pretrain')): net = slim.conv2d(images, 64, [3, 3], scope='conv1') # (batch_size, 16, 16, 64) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d(net, 128, [3, 3], scope='conv2') # (batch_size, 8, 8, 128) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 256, [3, 3], scope='conv3') # (batch_size, 4, 4, 256) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 128, [4, 4], padding='VALID', scope='conv4') # (batch_size, 1, 1, 128) net = slim.batch_norm(net, activation_fn=tf.nn.tanh, scope='bn4') if self.mode == 'pretrain': net = slim.conv2d(net, 10, [1, 1], padding='VALID', scope='out') net = slim.flatten(net) return net
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, scope="conv{}".format(i)) out_size, kernel, stride = filters[-1] fc1 = slim.conv2d( inputs, out_size, kernel, stride, activation_fn=activation, padding="VALID", scope="fc1") fc2 = slim.conv2d( fc1, num_outputs, [1, 1], activation_fn=None, normalizer_fn=None, scope="fc2") return flatten(fc2), flatten(fc1)
def iter_func(self, state): sc = predictron_arg_scope() with tf.variable_scope('value'): value_net = slim.fully_connected(slim.flatten(state), 32, scope='fc0') value_net = layers.batch_norm(value_net, activation_fn=tf.nn.relu, scope='fc0/preact') value_net = slim.fully_connected(value_net, self.maze_size, activation_fn=None, scope='fc1') with slim.arg_scope(sc): net = slim.conv2d(state, 32, [3, 3], scope='conv1') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv1/preact') net_flatten = slim.flatten(net, scope='conv1/flatten') with tf.variable_scope('reward'): reward_net = slim.fully_connected(net_flatten, 32, scope='fc0') reward_net = layers.batch_norm(reward_net, activation_fn=tf.nn.relu, scope='fc0/preact') reward_net = slim.fully_connected(reward_net, self.maze_size, activation_fn=None, scope='fc1') with tf.variable_scope('gamma'): gamma_net = slim.fully_connected(net_flatten, 32, scope='fc0') gamma_net = layers.batch_norm(gamma_net, activation_fn=tf.nn.relu, scope='fc0/preact') gamma_net = slim.fully_connected(gamma_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') with tf.variable_scope('lambda'): lambda_net = slim.fully_connected(net_flatten, 32, scope='fc0') lambda_net = layers.batch_norm(lambda_net, activation_fn=tf.nn.relu, scope='fc0/preact') lambda_net = slim.fully_connected(lambda_net, self.maze_size, activation_fn=tf.nn.sigmoid, scope='fc1') net = slim.conv2d(net, 32, [3, 3], scope='conv2') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv2/preact') net = slim.conv2d(net, 32, [3, 3], scope='conv3') net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='conv3/preact') return net, reward_net, gamma_net, lambda_net, value_net
def _build_graph(self): normalized_input = tf.div(self._input, 255.0) #d = tf.divide(1.0, tf.sqrt(8. * 8. * 4.)) conv1 = slim.conv2d(normalized_input, 16, [8, 8], activation_fn=tf.nn.relu, padding='VALID', stride=4, biases_initializer=None) # weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(4. * 4. * 16.)) conv2 = slim.conv2d(conv1, 32, [4, 4], activation_fn=tf.nn.relu, padding='VALID', stride=2, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) flattened = slim.flatten(conv2) #d = tf.divide(1.0, tf.sqrt(2592.)) fc1 = slim.fully_connected(flattened, 256, activation_fn=tf.nn.relu, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) #d = tf.divide(1.0, tf.sqrt(256.)) # estimate of the value function self.value_func_prediction = slim.fully_connected(fc1, 1, activation_fn=None, biases_initializer=None) #weights_initializer=tf.random_uniform_initializer(minval=-d, maxval=d)) # softmax output with one entry per action representing the probability of taking an action self.policy_predictions = slim.fully_connected(fc1, self.output_size, activation_fn=tf.nn.softmax, biases_initializer=None)
def create_test_network_7(): """Aligned network for test, with a control dependency. The graph is similar to create_test_network_1(), except that it includes an assert operation on the left branch. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An 8x8 test image. x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') # Left branch. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') l1_shape = array_ops.shape(l1) assert_op = control_flow_ops.Assert( gen_math_ops.equal(l1_shape[1], 2), [l1_shape], summarize=4) # Right branch. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') # Addition. with ops.control_dependencies([assert_op]): nn.relu(l1 + l3, name='output') return g
def create_test_network_4(): """Misaligned network for test. The graph corresponds to a variation from the example from the second figure in go/cnn-rf-computation#arbitrary-computation-graphs. Layer 2 uses 'SAME' padding, which makes its padding dependent on the input image dimensionality. In this case, the effective padding will be undetermined, and the utility is not able to check the network alignment. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch. l2 = slim.conv2d(x, 1, [3, 3], stride=2, scope='L2', padding='SAME') l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') # Addition. nn.relu(l1 + l3, name='output') return g
def create_test_network_9(): """Aligned network for test, including an intermediate addition. The graph is the same as create_test_network_8(), except that VALID padding is changed to SAME. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='SAME') # Right branch before first addition. l2 = slim.conv2d(x, 1, [3, 3], stride=2, scope='L2', padding='SAME') l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='SAME') # First addition. l4 = nn.relu(l1 + l3) # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME') # Right branch after first addition. l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME') # Final addition. nn.relu(l5 + l6, name='output') return g
def build_arch(input, is_train, num_classes): data_size = int(input.get_shape()[1]) # initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) # bias_initializer = tf.constant_initializer(0.0) # weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) with slim.arg_scope([slim.conv2d], trainable=is_train):#, activation_fn=None, , , biases_initializer=bias_initializer, weights_regularizer=weights_regularizer with tf.variable_scope('conv1') as scope: output = slim.conv2d(input, num_outputs=256, kernel_size=[9, 9], stride=1, padding='VALID', scope=scope) data_size = data_size-8 assert output.get_shape() == [cfg.batch_size, data_size, data_size, 256] tf.logging.info('conv1 output shape: {}'.format(output.get_shape())) with tf.variable_scope('primary_caps_layer') as scope: output = slim.conv2d(output, num_outputs=32*8, kernel_size=[9, 9], stride=2, padding='VALID', scope=scope)#, activation_fn=None output = tf.reshape(output, [cfg.batch_size, -1, 8]) output = squash(output) data_size = int(np.floor((data_size-8)/2)) assert output.get_shape() == [cfg.batch_size, data_size*data_size*32, 8] tf.logging.info('primary capsule output shape: {}'.format(output.get_shape())) with tf.variable_scope('digit_caps_layer') as scope: with tf.variable_scope('u') as scope: u_hats = vec_transform(output, num_classes, 16) assert u_hats.get_shape() == [cfg.batch_size, num_classes, data_size*data_size*32, 16] tf.logging.info('digit_caps_layer u_hats shape: {}'.format(u_hats.get_shape())) with tf.variable_scope('routing') as scope: output = dynamic_routing(u_hats) assert output.get_shape() == [cfg.batch_size, num_classes, 16] tf.logging.info('the output capsule has shape: {}'.format(output.get_shape())) output_len = tf.norm(output, axis=-1) return output, output_len
def network_det(self,inputs,reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') shape = int(np.prod(pool5.get_shape()[1:])) fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) return out_detection
def create_test_network(): """Convolutional neural network for test. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch before first addition. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad') l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME') # First addition. l4 = nn.relu(l1 + l3, name='L4_relu') # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME') # Right branch after first addition. l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME') # Final addition. gen_math_ops.add(l5, l6, name='L7_add') return g
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def create_test_network(): """Convolutional neural network for test. Returns: name_to_node: Dict keyed by node name, each entry containing the node's NodeDef. """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch before first addition. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad') l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME') # First addition. l4 = nn.relu(l1 + l3, name='L4_relu') # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME') # Right branch after first addition. l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME') # Final addition. gen_math_ops.add(l5, l6, name='L7_add') name_to_node = graph_compute_order.parse_graph_nodes(g.as_graph_def()) return name_to_node
def build_feature_pyramid(self): ''' reference: https://github.com/CharlesShang/FastMaskRCNN build P2, P3, P4, P5, P6 :return: multi-scale feature map ''' feature_pyramid = {} with tf.variable_scope('feature_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)): feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'], kernel_size=[2, 2], stride=2, scope='build_P6') # P6 is down sample of P5 for layer in range(4, 1, -1): p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)] up_sample_shape = tf.shape(c) up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]], name='build_P%d/up_sample_nearest_neighbor' % layer) c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P%d/reduce_dimension' % layer) p = up_sample + c p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1, padding='SAME', scope='build_P%d/avoid_aliasing' % layer) feature_pyramid['P' + str(layer)] = p return feature_pyramid
def conv_net_kelz(inputs): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = slim.conv2d( inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm) net = slim.conv2d( net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2') net = slim.dropout(net, 0.25, scope='dropout2') net = slim.conv2d( net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3') net = slim.dropout(net, 0.25, scope='dropout3') # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape(net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value), 'flatten4') net = slim.fully_connected(net, 512, scope='fc5') net = slim.dropout(net, 0.5, scope='dropout5') return net
def create_test_network_8(): """Aligned network for test, including an intermediate addition. The graph is similar to create_test_network_1(), except that it includes a few more layers on top. The added layers compose two different branches whose receptive fields are different. This makes this test case more challenging; in particular, this test fails if a naive DFS-like algorithm is used for RF computation. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch before first addition. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') # First addition. l4 = nn.relu(l1 + l3) # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='VALID') # Right branch after first addition. l6_pad = array_ops.pad(l4, [[0, 0], [1, 0], [1, 0], [0, 0]]) l6 = slim.conv2d(l6_pad, 1, [3, 3], stride=2, scope='L6', padding='VALID') # Final addition. nn.relu(l5 + l6, name='output') return g
def make_tower(net): net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1') net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 500, scope='fc1') net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2') return net
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:0'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def build_model(self): sc = predictron_arg_scope() with tf.variable_scope('state'): with slim.arg_scope(sc): state = slim.conv2d(self.inputs, 32, [3, 3], scope='conv1') state = layers.batch_norm(state, activation_fn=tf.nn.relu, scope='conv1/preact') state = slim.conv2d(state, 32, [3, 3], scope='conv2') state = layers.batch_norm(state, activation_fn=tf.nn.relu, scope='conv2/preact') iter_template = tf.make_template('iter', self.iter_func, unique_name_='iter') rewards_arr = [] gammas_arr = [] lambdas_arr = [] values_arr = [] for k in range(self.max_depth): state, reward, gamma, lambda_, value = iter_template(state) rewards_arr.append(reward) gammas_arr.append(gamma) lambdas_arr.append(lambda_) values_arr.append(value) _, _, _, _, value = iter_template(state) # K + 1 elements values_arr.append(value) bs = tf.shape(self.inputs)[0] # [batch_size, K * maze_size] self.rewards = tf.pack(rewards_arr, axis=1) # [batch_size, K, maze_size] self.rewards = tf.reshape(self.rewards, [bs, self.max_depth, self.maze_size]) # [batch_size, K + 1, maze_size] self.rewards = tf.concat_v2(values=[tf.zeros(shape=[bs, 1, self.maze_size], dtype=tf.float32), self.rewards], axis=1, name='rewards') # [batch_size, K * maze_size] self.gammas = tf.pack(gammas_arr, axis=1) # [batch_size, K, maze_size] self.gammas = tf.reshape(self.gammas, [bs, self.max_depth, self.maze_size]) # [batch_size, K + 1, maze_size] self.gammas = tf.concat_v2(values=[tf.ones(shape=[bs, 1, self.maze_size], dtype=tf.float32), self.gammas], axis=1, name='gammas') # [batch_size, K * maze_size] self.lambdas = tf.pack(lambdas_arr, axis=1) # [batch_size, K, maze_size] self.lambdas = tf.reshape(self.lambdas, [-1, self.max_depth, self.maze_size]) # [batch_size, (K + 1) * maze_size] self.values = tf.pack(values_arr, axis=1) # [batch_size, K + 1, maze_size] self.values = tf.reshape(self.values, [-1, (self.max_depth + 1), self.maze_size]) self.build_preturns() self.build_lambda_preturns()
def expand(self, inputs, channels, scope): with slim.arg_scope([slim.conv2d], activation_fn = None, padding = 'SAME', weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): with tf.variable_scope(scope): e1x1 = slim.conv2d(inputs, channels, [1,1], 1, scope='e1x1') e3x3 = slim.conv2d(inputs, channels, [3,3], 1, scope='e3x3') expand = tf.concat(3, [e1x1, e3x3]) return expand
def _contruct_network(self, inputs): # Actor network and critic network share all shallow layers conv1 = slim.conv2d(inputs=inputs, num_outputs=16, activation_fn=tf.nn.relu, kernel_size=[8, 8], stride=[4, 4], padding='VALID') conv2 = slim.conv2d(inputs=conv1, num_outputs=32, activation_fn=tf.nn.relu, kernel_size=[4, 4], stride=[2, 2], padding='VALID') hidden = slim.fully_connected(inputs=slim.flatten(conv2), num_outputs=256, activation_fn=tf.nn.relu) # Recurrent network for temporal dependencies lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=256) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) self.state_in = (c_in, h_in) rnn_in = tf.expand_dims(hidden, [0]) step_size = tf.shape(inputs)[:1] state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) lstm_out, lstm_state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=rnn_in, initial_state=state_in, sequence_length=step_size, time_major=False) lstm_c, lstm_h = lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_out, [-1, 256]) # output for policy and value estimations self.policy = slim.fully_connected( inputs=rnn_out, num_outputs=self.a_dim, activation_fn=tf.nn.softmax, weights_initializer=normalized_columns_initializer(0.01), biases_initializer=None) self.value = slim.fully_connected( inputs=rnn_out, num_outputs=1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None)
def build(input): net=slim.conv2d(input,32,[3,3],rate=1,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv1') net=slim.conv2d(net,32,[3,3],rate=2,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv2') net=slim.conv2d(net,32,[3,3],rate=4,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv3') net=slim.conv2d(net,32,[3,3],rate=8,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv4') net=slim.conv2d(net,32,[3,3],rate=16,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv5') net=slim.conv2d(net,32,[3,3],rate=32,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv6') net=slim.conv2d(net,32,[3,3],rate=64,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv7') net=slim.conv2d(net,32,[3,3],rate=128,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv8') net=slim.conv2d(net,32,[3,3],rate=1,activation_fn=lrelu,normalizer_fn=nm,weights_initializer=identity_initializer(),scope='g_conv9') net=slim.conv2d(net,3,[1,1],rate=1,activation_fn=None,scope='g_conv_last') return net
def inference(self): _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.05)): # 1*H*W -> 32*H*W model = slim.conv2d(_x, 32, [3, 3], padding='SAME', scope='conv1') # 32*H*W -> 1024*H/16*W/16 model = slim.conv2d(model, 1024, [16, 16], padding='VALID', scope='conv2', stride=16) model = slim.conv2d_transpose(model, self.input_shape[2], [16, 16], stride=16, padding='VALID', activation_fn=None, scope='deconv_1') return model
def SSDHook(feature_map, hook_id): """ Takes input feature map, output the predictions tensor hook_id is for variable_scope unqie string ID """ with tf.variable_scope('ssd_hook_' + hook_id): # Note we have linear activation (i.e. no activation function) net_conf = slim.conv2d(feature_map, NUM_PRED_CONF, [3, 3], activation_fn=None, scope='conv_conf') net_conf = tf.contrib.layers.flatten(net_conf) net_loc = slim.conv2d(feature_map, NUM_PRED_LOC, [3, 3], activation_fn=None, scope='conv_loc') net_loc = tf.contrib.layers.flatten(net_loc) return net_conf, net_loc
def build_rpn(self): rpn_head = slim.conv2d(self.vgg_head, 512, [3, 3], scope='rpn_layer_head') rpn_anchor_scores = slim.conv2d(rpn_head, 18, [1, 1], scope='rpn_layer_scores') rpn_anchor_scores_shape = self._get_shape(rpn_anchor_scores) rpn_anchor_scores = tf.reshape(rpn_anchor_scores, shape=[rpn_anchor_scores_shape[0]*rpn_anchor_scores_shape[1]* rpn_anchor_scores_shape[2]*9, 2]) rpn_anchor_scores_pred = tf.nn.softmax(rpn_anchor_scores) rpn_anchor_bboxes = slim.conv2d(self.vgg_head, 36, [1, 1], scope='rpn_layer_bboxes') rpn_anchor_bboxes_shape = self._get_shape(rpn_anchor_bboxes) rpn_anchor_bboxes = tf.reshape(rpn_anchor_bboxes, shape=[rpn_anchor_bboxes_shape[0]*rpn_anchor_bboxes_shape[1]* rpn_anchor_bboxes_shape[2]*9, -1]) self.rpn_scores = rpn_anchor_scores self.rpn_bboxes = rpn_anchor_bboxes self.rpn_pred = rpn_anchor_scores_pred
def resface_block(lower_input,output_channels,stride,dim_match=True,scope=None): with tf.variable_scope(scope): net = slim.batch_norm(lower_input, activation_fn=None,scope='bn1') net = slim.conv2d(net, output_channels) net = slim.batch_norm(net,scope='bn2') net = slim.conv2d(net, output_channels,stride=stride) net = slim.batch_norm(net, activation_fn=None,scope='bn3') if dim_match==True: short_cut = lower_input else: short_cut = slim.conv2d(lower_input, output_channels, stride=2, kernel_size=1) short_cut = slim.batch_norm(short_cut, activation_fn=None,scope='shortcut_bn') return short_cut + net
def __init__(self,s_size,a_size,scope,trainer): with tf.variable_scope(scope): # quantile regression dqn self.quantile = 1.0 / N self.cumulative_probabilities = (2.0 * np.arange(N) + 1) / (2.0 * N) # network self.inputs = tf.placeholder(shape=[None,s_size],dtype=tf.float32) self.imageIn = tf.reshape(self.inputs,shape=[-1,84,84,1]) self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.imageIn,num_outputs=32, kernel_size=[8,8],stride=[4,4],padding='VALID') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv1,num_outputs=64, kernel_size=[4,4],stride=[2,2],padding='VALID') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.conv2,num_outputs=64, kernel_size=[3,3],stride=[1,1],padding='VALID') hidden = slim.fully_connected(slim.flatten(self.conv3),512,activation_fn=tf.nn.relu) self.out = slim.fully_connected(hidden, a_size * N, activation_fn=None, weights_initializer=normalized_columns_initializer(0.1), biases_initializer=None) self.out = tf.reshape(self.out, [-1, a_size, N]) self.Q = tf.reduce_sum(self.out * self.quantile, axis=2) #Only the worker network need ops for loss functions and gradient updating. if scope != 'global': self.actions_q = tf.placeholder(shape=[None, a_size, N], dtype=tf.float32) self.q_target = tf.placeholder(shape=[None, N], dtype=tf.float32) self.q_actiona = tf.multiply(self.out, self.actions_q) self.q_action = tf.reduce_sum(self.q_actiona, axis=1) self.u = self.q_target - self.q_action self.loss = tf.reduce_mean(tf.reduce_sum(tf.square(self.u),axis=1)) self.delta = tf.to_float(self.u < 0.0) self.loss1 = tf.abs(self.cumulative_probabilities - self.delta) self.loss2 = self.huber(self.u, k) #self.loss = tf.reduce_mean(tf.reduce_mean(self.loss1*self.loss2,axis=1)) #Get gradients from local network using local losses local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) self.gradients = tf.gradients(self.loss,local_vars) self.var_norms = tf.global_norm(local_vars) grads,self.grad_norms = tf.clip_by_global_norm(self.gradients,40.0) #Apply local gradients to global network global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'global') self.apply_grads = trainer.apply_gradients(zip(grads,global_vars))
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def build_arch(input, coord_add, is_train: bool, num_classes: int): test1 = [] data_size = int(input.get_shape()[1]) # xavier initialization is necessary here to provide higher stability # initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) # instead of initializing bias with constant 0, a truncated normal initializer is exploited here for higher stability bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=cfg.A, kernel_size=[5, 5], stride=2, padding='VALID', scope=scope, activation_fn=tf.nn.relu) data_size = int(np.floor((data_size - 4) / 2)) assert output.get_shape() == [ cfg.batch_size, data_size, data_size, cfg.A ] tf.logging.info('conv1 output shape: {}'.format( output.get_shape())) with tf.variable_scope('primary_caps') as scope: pose = slim.conv2d(output, num_outputs=cfg.B * 16, kernel_size=[1, 1], stride=1, padding='VALID', scope=scope, activation_fn=None) activation = slim.conv2d(output, num_outputs=cfg.B, kernel_size=[1, 1], stride=1, padding='VALID', scope='primary_caps/activation', activation_fn=tf.nn.sigmoid) pose = tf.reshape( pose, shape=[cfg.batch_size, data_size, data_size, cfg.B, 16]) activation = tf.reshape( activation, shape=[cfg.batch_size, data_size, data_size, cfg.B, 1]) output = tf.concat([pose, activation], axis=4) output = tf.reshape( output, shape=[cfg.batch_size, data_size, data_size, -1]) assert output.get_shape() == [ cfg.batch_size, data_size, data_size, cfg.B * 17 ] tf.logging.info('primary capsule output shape: {}'.format( output.get_shape())) with tf.variable_scope('conv_caps1') as scope: output = kernel_tile(output, 3, 2) data_size = int(np.floor((data_size - 2) / 2)) output = tf.reshape(output, shape=[ cfg.batch_size * data_size * data_size, 3 * 3 * cfg.B, 17 ]) activation = tf.reshape(output[:, :, 16], shape=[ cfg.batch_size * data_size * data_size, 3 * 3 * cfg.B, 1 ]) with tf.variable_scope('v') as scope: votes = mat_transform(output[:, :, :16], cfg.C, weights_regularizer, tag=True) tf.logging.info('conv cap 1 votes shape: {}'.format( votes.get_shape())) with tf.variable_scope('routing') as scope: miu, activation, _ = em_routing(votes, activation, cfg.C, weights_regularizer) tf.logging.info('conv cap 1 miu shape: {}'.format( miu.get_shape())) tf.logging.info( 'conv cap 1 activation before reshape: {}'.format( activation.get_shape())) pose = tf.reshape( miu, shape=[cfg.batch_size, data_size, data_size, cfg.C, 16]) tf.logging.info('conv cap 1 pose shape: {}'.format( pose.get_shape())) activation = tf.reshape( activation, shape=[cfg.batch_size, data_size, data_size, cfg.C, 1]) tf.logging.info('conv cap 1 activation after reshape: {}'.format( activation.get_shape())) output = tf.reshape(tf.concat([pose, activation], axis=4), [cfg.batch_size, data_size, data_size, -1]) tf.logging.info('conv cap 1 output shape: {}'.format( output.get_shape())) with tf.variable_scope('conv_caps2') as scope: output = kernel_tile(output, 3, 1) data_size = int(np.floor((data_size - 2) / 1)) output = tf.reshape(output, shape=[ cfg.batch_size * data_size * data_size, 3 * 3 * cfg.C, 17 ]) activation = tf.reshape(output[:, :, 16], shape=[ cfg.batch_size * data_size * data_size, 3 * 3 * cfg.C, 1 ]) with tf.variable_scope('v') as scope: votes = mat_transform(output[:, :, :16], cfg.D, weights_regularizer) tf.logging.info('conv cap 2 votes shape: {}'.format( votes.get_shape())) with tf.variable_scope('routing') as scope: miu, activation, _ = em_routing(votes, activation, cfg.D, weights_regularizer) pose = tf.reshape( miu, shape=[cfg.batch_size * data_size * data_size, cfg.D, 16]) tf.logging.info('conv cap 2 pose shape: {}'.format( votes.get_shape())) activation = tf.reshape( activation, shape=[cfg.batch_size * data_size * data_size, cfg.D, 1]) tf.logging.info('conv cap 2 activation shape: {}'.format( activation.get_shape())) # It is not clear from the paper that ConvCaps2 is full connected to Class Capsules, or is conv connected with kernel size of 1*1 and a global average pooling. # From the description in Figure 1 of the paper and the amount of parameters (310k in the paper and 316,853 in fact), I assume a conv cap plus a golbal average pooling is the design. with tf.variable_scope('class_caps') as scope: with tf.variable_scope('v') as scope: votes = mat_transform(pose, num_classes, weights_regularizer) assert votes.get_shape() == [ cfg.batch_size * data_size * data_size, cfg.D, num_classes, 16 ] tf.logging.info('class cap votes original shape: {}'.format( votes.get_shape())) coord_add = np.reshape( coord_add, newshape=[data_size * data_size, 1, 1, 2]) coord_add = np.tile(coord_add, [cfg.batch_size, cfg.D, num_classes, 1]) coord_add_op = tf.constant(coord_add, dtype=tf.float32) votes = tf.concat([coord_add_op, votes], axis=3) tf.logging.info('class cap votes coord add shape: {}'.format( votes.get_shape())) with tf.variable_scope('routing') as scope: miu, activation, test2 = em_routing(votes, activation, num_classes, weights_regularizer) tf.logging.info('class cap activation shape: {}'.format( activation.get_shape())) tf.summary.histogram(name="class_cap_routing_hist", values=test2) output = tf.reshape( activation, shape=[cfg.batch_size, data_size, data_size, num_classes]) output = tf.reshape(tf.nn.avg_pool(output, ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID'), shape=[cfg.batch_size, num_classes]) tf.logging.info('class cap output shape: {}'.format( output.get_shape())) pose = tf.nn.avg_pool(tf.reshape( miu, shape=[cfg.batch_size, data_size, data_size, -1]), ksize=[1, data_size, data_size, 1], strides=[1, 1, 1, 1], padding='VALID') pose_out = tf.reshape(pose, shape=[cfg.batch_size, num_classes, 18]) return output, pose_out
def ConvAvgPool(inputs, num_outputs, kernel_size, stride=1, padding='SAME', rate=1, name='ConvMeanPool', **kwargs): with tf.variable_scope(name): output = slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, padding=padding, rate=rate, **kwargs) output = slim.avg_pool2d(output, kernel_size=2, stride=2, padding='VALID') return output
def residual_gradient_conv(input, out_dim, is_training, name, gradient_type='type1',\ is_bn = 'BN', activation_fn = tf.nn.relu): ''' gradient_type: 'type0', 'type1', 'type2' ''' #pw = slim.conv2d(input, out_dim, [1,1],stride=[1,1],activation_fn=None,scope=name+'/rgc_pw_1',padding='SAME') net = slim.conv2d(input, out_dim, [3, 3], stride=[1, 1], activation_fn=None, scope=name + '/conv', padding='SAME') gradient_x = spatial_gradient_x(input, name) gradient_y = spatial_gradient_y(input, name) if gradient_type == 'type0': gradient_gabor = tf.pow(gradient_x, 2) + tf.pow(gradient_y, 2) gradient_gabor_pw = slim.conv2d(gradient_gabor, out_dim, [1, 1], stride=[1, 1], activation_fn=None, scope=name + '/rgc_pw_gabor', padding='SAME') gradient_gabor_pw = slim.batch_norm(gradient_gabor_pw, is_training=is_training, activation_fn=None, scope=name + '/gabor_bn') net = net + gradient_gabor_pw elif gradient_type == 'type1': gradient_gabor = tf.sqrt( tf.pow(gradient_x, 2) + tf.pow(gradient_y, 2) + 1e-8) gradient_gabor_pw = slim.conv2d(gradient_gabor, out_dim, [1, 1], stride=[1, 1], activation_fn=None, scope=name + '/rgc_pw_gabor', padding='SAME') gradient_gabor_pw = slim.batch_norm(gradient_gabor_pw, is_training=is_training, activation_fn=None, scope=name + '/gabor_bn') net = net + gradient_gabor_pw elif gradient_type == 'type2': net = net else: print('Unknown gradient_type for "residual_gradient_conv"') exit(1) if is_bn is None: pass elif is_bn == 'BN': net = slim.batch_norm(net, is_training=is_training, activation_fn=None, scope=name + '/gn') elif is_bn == 'GN': net = GroupNorm(net, is_training=is_training, activation_fn=None, scope=name + '/gn') else: print('Unknown Nomrlization Type!') exit(1) if activation_fn is not None: net = activation_fn(net) return net
def frame_detector_fn(spec_batch, is_training): assert tf.get_variable_scope().name == '' spec_batch.set_shape([None, None, 804]) assert isinstance(is_training, bool) with tf.variable_scope('frame_detector', reuse=tf.AUTO_REUSE): outputs = MiscFns.stft_to_hvqt_tf_fn(spec_batch) outputs.set_shape([None, None, 264]) outputs = outputs[..., None] outputs = slim.conv2d( scope='C_0', inputs=outputs, kernel_size=3, num_outputs=32, normalizer_fn=slim.batch_norm, normalizer_params=dict(is_training=is_training)) outputs = slim.conv2d( scope='C_1', inputs=outputs, kernel_size=3, num_outputs=32, normalizer_fn=slim.batch_norm, normalizer_params=dict(is_training=is_training)) outputs = slim.max_pool2d(scope='MP_1', inputs=outputs, stride=[1, 2], kernel_size=[1, 2], padding='VALID') outputs = slim.dropout(scope='DO_1', is_training=is_training, inputs=outputs, keep_prob=.75) outputs = slim.conv2d( scope='C_2', inputs=outputs, kernel_size=3, num_outputs=64, normalizer_fn=slim.batch_norm, normalizer_params=dict(is_training=is_training)) outputs = slim.max_pool2d(scope='MP_2', inputs=outputs, stride=[1, 2], kernel_size=[1, 2], padding='VALID') outputs = slim.dropout(scope='DO_2', is_training=is_training, inputs=outputs, keep_prob=.75) f = outputs.shape[2].value c = outputs.shape[3].value assert f is not None and c is not None b, t, _, _ = tf.unstack(tf.shape(outputs)) outputs = tf.reshape(outputs, [b, t, f * c]) outputs = slim.fully_connected( scope='FC_3', inputs=outputs, num_outputs=512, normalizer_fn=slim.batch_norm, normalizer_params=dict(is_training=is_training)) outputs.set_shape([None, None, 512]) outputs = slim.dropout(scope='DO_3', is_training=is_training, inputs=outputs, keep_prob=.5) outputs = slim.fully_connected(scope='FC_4', inputs=outputs, num_outputs=88, activation_fn=None) outputs.set_shape([None, None, 88]) return outputs
def c_b_r(self,inputs, numOut,ksize=1,stride=1,name='cbr'): with tf.name_scope(name): x = slim.conv2d(inputs, num_outputs=numOut, kernel_size=ksize,stride=stride, activation_fn=None) x = slim.batch_norm(inputs=x, decay=0.9, is_training=self.training, activation_fn=tf.nn.relu) return x
def build(self): ''' Build the graph defining the resnet. The resnet consists of two conv layers followed by six residual layers. The output tensor from the residual layers are then processed by a fully connected layer. Finally, a batch normalization and l2 normalization ops are applied to obtain feature the vector projected onto the unit hypersphere. Output: appearance feature map/descriptor which is a numpy array with the dimension of 128. ''' # build the resnet operations with self.resnet_graph.as_default(): with tf.variable_scope('input'): # dummy nodes that provide entry points for input image to the graph img_input = tf.placeholder(tf.float32,self.ph_shape,"roi") with slim.arg_scope( self.resnet_arg_scope() ): # conv and max pooling layers net = slim.conv2d(img_input, num_outputs = 32, scope='conv1_1') net = slim.conv2d(net, num_outputs = 32, scope = 'conv1_2' ) net = slim.max_pool2d(net, [3, 3], scope="pool1") # residual layers that learn the residual mappings net = self.residual_layer(net, 'conv2_1', increase_dim=False, is_first=True) net = self.residual_layer(net, 'conv2_3', increase_dim=False) net = self.residual_layer(net, 'conv3_1', increase_dim=True) net = self.residual_layer(net, 'conv3_3', increase_dim=False) net = self.residual_layer(net, 'conv4_1', increase_dim=True) net = self.residual_layer(net, 'conv4_3', increase_dim=False) # the number of feature maps obtained from the last residual layer feat_num = net.get_shape().as_list()[-1] # flattens the last feature maps into 1D for each batch net = slim.flatten(net, scope = 'feat_1d') # add dropout op to the input to the fc layer net = slim.dropout(net, keep_prob=0.6, scope = 'dropout') features = slim.fully_connected(net, feat_num, scope='fc1') # remove the feature distribution change before normalization features = slim.batch_norm(features, scope='ball') # add euclidean norm op euc_norm = tf.norm(features, axis = [-2,-1], keep_dims=True) # add a small constant to prevent division by zero euc_norm += tf.constant(1e-8,tf.float32) # project the features to unit hypersphere features = tf.div(features, euc_norm, 'norm_feat')
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
def _conv_1x1(input_layer, n_classes): return slim.conv2d(input_layer, n_classes, [1, 1], activation_fn=None)
def __init__(self, num_classes, input_tensor, is_training=False, data_format='NHWC', priors_rule='object_detection_api', priors=[], mobilenet_version='v2', depth_multiplier=1.0, min_depth=16, weight_regularization=4e-5): """ Args: num_classes: Number of classes including a background class. input_tensor: Input 4D tensor. is_training: Is training or inference stage. data_format: 'NHWC' or 'NCHW'. priors_rule: 'caffe', 'object_detection_api', 'custom'. priors: List of list of prior sizes (relative sizes). Only for priors_rule='custom'. mobilenet_version: 'v1' or 'v2'. depth_multiplier: MobileNet depth multiplier. min_depth: Minimum channels count in MobileNet. weight_regularization: l2 weight regularization scale. """ assert data_format in ['NHWC', 'NCHW'] assert priors_rule in ['caffe', 'object_detection_api', 'custom'] self.data_format = data_format if self.data_format == 'NCHW': input_tensor = tf.transpose(input_tensor, [0, 3, 1, 2]) self.input_shape = input_tensor.get_shape().as_list() self.input_tensor = input_tensor if self.data_format == 'NCHW': spatial_dim_axis = [2, 3] elif self.data_format == 'NHWC': spatial_dim_axis = [1, 2] self.version = mobilenet_version super(MobileNetSSD, self).__init__(num_classes=num_classes, input_shape=self.input_shape, data_format=data_format) self.is_training = is_training if mobilenet_version == 'v2': mobilenet_base = mobilenet_v2_base base_scope = mobilenet_v2.training_scope base_layers = [ 'layer_7/output', 'layer_15/expansion_output', 'layer_19' ] elif mobilenet_version == 'v1': mobilenet_base = mobilenet_v1_base base_scope = mobilenet.training_scope base_layers = [ 'Conv2d_5_pointwise', 'Conv2d_11_pointwise', 'Conv2d_13_pointwise' ] else: tf.logging.error( 'Wrong MobileNet version = {}'.format(mobilenet_version)) exit(0) def scope_fn(): batch_norm_params = { 'is_training': self.is_training, 'center': True, 'scale': True, 'decay': 0.9997, 'epsilon': 0.001, 'fused': True, 'data_format': data_format } affected_ops = [ slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose ] with slim.arg_scope([slim.batch_norm], **batch_norm_params): with slim.arg_scope( affected_ops, weights_regularizer=slim.l2_regularizer( scale=float(weight_regularization)), weights_initializer=tf.truncated_normal_initializer( mean=0, stddev=0.03), activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm) as scope: return scope with slim.arg_scope(base_scope(is_training=None)): with slim.arg_scope(scope_fn()): _, image_features = mobilenet_base( self.input_tensor, final_endpoint=base_layers[-1], depth_multiplier=depth_multiplier, min_depth=min_depth, use_explicit_padding=False, is_training=self.is_training) head_feature_map_names = base_layers[-2:] head_feature_map_tensors = [ image_features[name] for name in head_feature_map_names ] feature_map = image_features[base_layers[-1]] depths = [512, 256, 256, 128] depths = [int(d * depth_multiplier) for d in depths] with tf.variable_scope('extra_features'): with slim.arg_scope(scope_fn()): for i, depth in enumerate(depths): intermediate_layer = slim.conv2d( feature_map, int(depth / 2), [1, 1], stride=1, scope='intermediate_{0}'.format(i + 1)) feature_map = slim.separable_conv2d( intermediate_layer, None, [3, 3], depth_multiplier=1, padding='SAME', stride=2, scope='feature_map_dw_{0}'.format(i + 1)) output_feature_name = 'feature_map_{0}'.format(i + 1) feature_map = slim.conv2d(feature_map, int(depth), [1, 1], padding='SAME', stride=1, scope=output_feature_name) head_feature_map_names.append(output_feature_name) head_feature_map_tensors.append(feature_map) variances = [0.1, 0.1, 0.2, 0.2] if priors_rule == 'caffe': scale = [0.2, 0.35, 0.5, 0.65, 0.8, 0.95] dicts = self._create_caffe_priors(self.input_shape, spatial_dim_axis, scale, variances, head_feature_map_tensors, head_feature_map_names) elif priors_rule == 'object_detection_api': scale = [0.2, 0.35, 0.5, 0.65, 0.8, 0.95, 1.] dicts = self._create_obj_det_priors(self.input_shape, spatial_dim_axis, scale, variances, head_feature_map_tensors, head_feature_map_names) elif priors_rule == 'custom': assert len(priors) == len(head_feature_map_tensors) dicts = self._create_custom_priors(self.input_shape, spatial_dim_axis, priors, variances, head_feature_map_tensors, head_feature_map_names) with slim.arg_scope(scope_fn()): self.create_heads(head_feature_map_tensors, dicts)
def model(): is_training = tf.placeholder(tf.bool, []) train_images, train_label = data.get_train_data(batch_size) test_images, test_label = data.get_test_data(batch_size) x = tf.cond(is_training, lambda: train_images, lambda: test_images) y_ = tf.cond(is_training, lambda: train_label, lambda: test_label) y_ = tf.cast(y_, tf.int64) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.crelu, normalizer_fn=slim.batch_norm, weights_regularizer=slim.l2_regularizer(0.005), normalizer_params={ 'is_training': is_training, 'decay': 0.95 }): conv1 = slim.conv2d( x, 48, [9, 9], weights_initializer=tf.truncated_normal_initializer(mean=-0.08, stddev=0.63)) pool1 = slim.max_pool2d(conv1, [4, 4], stride=4, padding='SAME') conv2 = slim.conv2d( pool1, 43, [7, 7], weights_initializer=tf.truncated_normal_initializer(mean=-0.23, stddev=0.22)) pool2 = slim.max_pool2d(conv2, [4, 4], stride=4, padding='SAME') pool3 = slim.avg_pool2d(pool2, [3, 3], stride=3, padding='SAME') flatten = slim.flatten(pool3) logits = slim.fully_connected( flatten, 2, activation_fn=None, weights_initializer=tf.truncated_normal_initializer( mean=0.726, stddev=0.397992), biases_initializer=tf.constant_initializer(0.1, dtype=tf.float32)) correct_prediction = tf.equal(tf.argmax(logits, 1), y_) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) regularization_loss = tf.add_n(slim.losses.get_regularization_losses()) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y_, logits=logits)) + regularization_loss step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) # lr = tf.train.exponential_decay(0.1, # step, # 550*30, # 0.9, # staircase=True) # # # optimizer = tf.train.GradientDescentOptimizer(lr) optimizer = tf.train.AdamOptimizer(0.001) # lr_summary = tf.summary.scalar('lr', lr) train_step = slim.learning.create_train_op(cross_entropy, optimizer, global_step=step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) cross_entropy = control_flow_ops.with_dependencies([updates], cross_entropy) loss_summary = tf.summary.scalar('loss', cross_entropy) accuracy_summary = tf.summary.scalar('accuracy', accuracy) merge_summary = tf.summary.merge([loss_summary, accuracy_summary]) return is_training, train_step, step, accuracy, cross_entropy, merge_summary
def build_network(self, is_training): initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) head = self.head # directly extract head features of backbone #head = self.image_to_head(is_training) sp = self.sp_to_head() pool_H = self.crop_pool_layer(head, self.H_boxes, 'Crop_H', cfg.POOLING_SIZE) pool_O = self.crop_pool_layer(head, self.O_boxes, 'Crop_O', cfg.POOLING_SIZE) fc_H = self.res5(pool_H, is_training, False) # 2048 fc_O = self.res5(pool_O, is_training, True) # 2048 fc1_H = slim.fully_connected(fc_H, self.num_fc, scope='fc1_H') fc1_H = slim.dropout(fc1_H, keep_prob=self.keep_prob, scope='drop1_H') fc2_H = slim.fully_connected(fc1_H, self.num_fc2, scope='fc2_H') fc2_H = slim.dropout(fc2_H, keep_prob=self.keep_prob, scope='drop2_H') fc1_O = slim.fully_connected(fc_O, self.num_fc, scope='fc1_O') fc1_O = slim.dropout(fc1_O, keep_prob=self.keep_prob, scope='drop1_O') fc2_O = slim.fully_connected(fc1_O, self.num_fc2, scope='fc2_O') fc2_O = slim.dropout(fc2_O, keep_prob=self.keep_prob, scope='drop2_O') self.predictions['H_solo'] = fc2_H # Network Component # whether use union box if self.use_u: pool_U = self.crop_pool_layer(head, self.U_boxes, 'Crop_U', cfg.POOLING_SIZE) fc_U = self.res5(pool_U, is_training, True) fc1_U = slim.fully_connected(fc_U, self.num_fc, scope='fc1_U') fc1_U = slim.dropout(fc1_U, keep_prob=self.keep_prob, scope='drop1_U') fc2_U = slim.fully_connected(fc1_U, self.num_fc2, scope='fc2_U') fc2_U = slim.dropout(fc2_U, keep_prob=self.keep_prob, scope='drop2_U') self.predictions['fc_U'] = fc2_U # whether use bodypart if self.use_bp: bodyparts = tf.reshape(self.bodyparts, [-1, 5]) # N, 6, 5 -> N*6, 5 pool_bp = self.crop_pool_layer(head, bodyparts, 'Crop_part', 5) fc1_bp = slim.fully_connected(pool_bp, 256, scope='fc1_bp') fc1_bp = slim.dropout(fc1_bp, keep_prob=self.keep_prob, scope='dropout1_bp') fc2_bp = tf.reshape(fc1_bp, [-1, 6 * 256]) fc2_bp = slim.fully_connected(fc2_bp, 1024, scope='fc2_bp') fc2_bp = slim.dropout(fc2_bp, keep_prob=self.keep_prob, scope='dropout2_bp') self.predictions['bp_out'] = fc2_bp # whether use skeleton box if self.use_skebox: # skeleton input skeboxes = tf.reshape(self.skeboxes, [-1, 5]) # N*17, 5 pool5_skbox = self.crop_pool_layer(head, skeboxes, 'Crop_SK', 5) fc_skbox = self.res5(pool5_skbox, is_training, True) # N*17, channel fc1_sk = slim.fully_connected(fc_skbox, 256, scope='fc1_sk') fc1_sk = slim.dropout(fc1_sk, keep_prob=self.keep_prob, scope='dropout1_sk') fc1_sk = tf.reshape(fc1_sk, [-1, 17 * 256]) fc2_sk = slim.fully_connected(fc1_sk, 1024, scope='fc2_sk') fc2_sk = slim.dropout(fc2_sk, keep_prob=self.keep_prob, scope='dropout2_sk') fc3_sk = slim.fully_connected(fc2_sk, 1024, scope='fc3_sk') fc3_sk = slim.dropout(fc3_sk, keep_prob=self.keep_prob, scope='dropout3_sk') self.predictions['skbox_out'] = fc3_sk # whether use pose map if self.use_pm or self.use_bi_pm: conv1_pm = slim.conv2d(self.spatial[:, :, :, 2:], 32, [5, 5], padding='VALID', scope='conv1_pm') pool1_pm = slim.max_pool2d(conv1_pm, [2, 2], scope='pool1_pm') conv2_pm = slim.conv2d(pool1_pm, 16, [5, 5], padding='VALID', scope='conv2_pm') pool2_pm = slim.max_pool2d(conv2_pm, [2, 2], scope='pool2_pm') pool2_flat_pm = slim.flatten(pool2_pm) pm_fc = slim.fully_connected(pool2_flat_pm, 1024, scope='pm_fc') pm_fc = slim.dropout(pm_fc, keep_prob=self.keep_prob, is_training=is_training, scope='dropout_pm') self.predictions['pm_out'] = pm_fc # whether use spatial GCN if self.use_sg: x = self.spatial_GCN(self.SGinput, self.use_sg_att) # input N, C, T, V, M self.predictions['sgcn_out'] = x # whether use appearance GCN if self.use_ag: # body node input bodyparts = tf.reshape(self.bodyparts, [-1, 5]) # N, 6, 5 -> N*6, 5 pool_bp = self.crop_pool_layer(head, bodyparts, 'Crop_part', 5) fc1_bp = slim.fully_connected(pool_bp, 256, scope='fc1_bp') # skeleton input skeboxes = tf.reshape(self.skeboxes[:self.H_num], [-1, 5]) # N*17, 5 pool5_skbox = self.crop_pool_layer(head, skeboxes, 'Crop_SK', 5) fc_skbox = self.res5(pool5_skbox, is_training, True) # N*17, channel fc1_sk = slim.fully_connected(fc_skbox, 256, scope='fc1_sk') bp_in = tf.reshape(fc1_bp, [-1, 6, 256]) ske_in = tf.reshape(fc1_sk, [-1, 17, 256]) o_in = tf.reshape(slim.fully_connected(fc_O, 256, scope='ag_o_fc'), [-1, 1, 256]) agraph_x = tf.concat([ske_in, bp_in, o_in], axis=1) x = self.appearance_GCN(agraph_x, self.use_ag_att) self.predictions['agcn_out'] = x fc_HOsp = tf.concat([fc2_H, fc2_O, sp], 1) if self.use_u: fc_HOsp = tf.concat([fc_HOsp, self.predictions['fc_U']], axis=1) if self.use_sg: fc_HOsp = tf.concat([fc_HOsp, self.predictions['sgcn_out']], axis=1) # + 512 if self.use_ag: fc_HOsp = tf.concat([fc_HOsp, self.predictions['agcn_out']], axis=1) # + 1024 if self.use_pm: fc_HOsp = tf.concat([fc_HOsp, self.predictions['pm_out']], axis=1) # + 1024 if self.use_skebox: fc_HOsp = tf.concat([fc_HOsp, self.predictions['skbox_out']], axis=1) # + 1024 if self.use_bp: fc_HOsp = tf.concat([fc_HOsp, self.predictions['bp_out']], axis=1) # + 1024 self.region_classification(fc_HOsp, is_training, initializer, 'classification') if self.use_binary: if self.binary_type == 0: # Remain iCAN components for TIN binary head_phi = slim.conv2d(head, 512, [1, 1], scope='head_phi') head_g = slim.conv2d(head, 512, [1, 1], scope='head_g') Att_H = self.attention_pool_layer_H(head_phi, fc_H, is_training, 'Att_H') Att_H = self.attention_norm_H(Att_H, 'Norm_Att_H') att_head_H = tf.multiply(head_g, Att_H) Att_O = self.attention_pool_layer_O(head_phi, fc_O, is_training, 'Att_O') Att_O = self.attention_norm_O(Att_O, 'Norm_Att_O') att_head_O = tf.multiply(head_g, Att_O) pool5_SH = self.bottleneck(att_head_H, 'bottleneck', False) pool5_SO = self.bottleneck(att_head_O, 'bottleneck', True) fc7_SH, fc7_SO = self.head_to_tail(fc_H, fc_O, pool5_SH, pool5_SO, is_training) fc9_binary = self.binary_discriminator(fc_H, fc_O, fc7_SH, fc7_SO, sp, is_training, 'fc_binary') elif self.binary_type == 1: fc_bi = tf.concat([fc_H, fc_O, sp], axis=1) fc1_bi = slim.fully_connected(fc_bi, self.num_fc, scope='fc1_bi') fc1_bi = slim.dropout(fc1_bi, keep_prob=self.keep_prob, scope='drop1_bi') fc2_bi = slim.fully_connected(fc1_bi, self.num_fc, scope='fc2_bi') fc9_binary = slim.dropout(fc2_bi, keep_prob=self.keep_prob, scope='drop2_bi') elif self.binary_type == 2: fc_bi = tf.concat([fc_H, sp, self.predictions['pm_out']], axis=1) fc1_bi = slim.fully_connected(fc_bi, self.num_fc, scope='fc1_bi') fc1_bi = slim.dropout(fc1_bi, keep_prob=self.keep_prob, scope='drop1_bi') fc2_bi = slim.fully_connected(fc1_bi, self.num_fc, scope='fc2_bi') fc9_binary = slim.dropout(fc2_bi, keep_prob=self.keep_prob, scope='drop2_bi') else: raise NotImplementedError self.binary_classification(fc9_binary, is_training, initializer, 'binary_classification')
def teacher(input_images, keep_prob, lambda_decay=FLAGS.lambda_decay, is_training=True, weight_decay=0.00004, batch_norm_decay=0.99, batch_norm_epsilon=0.001): with tf.variable_scope("Teacher_model"): net, endpoints = resnet.resnet_v2(inputs=input_images, lambda_decay=lambda_decay, num_classes=FLAGS.num_class, is_training=True, scope='resnet_v2_50') # co_trained layers var_scope = 'Teacher_model/resnet_v2_50/' co_list_0 = slim.get_model_variables(var_scope + 'Conv2d_0') # co_list_1 = slim.get_model_variables(var_scope +'InvertedResidual_16_') # co_list_2 = slim.get_model_variables(var_scope +'InvertedResidual_24_') t_co_list = co_list_0 base_var_list = slim.get_variables() # for _ in range(2): # base_var_list.pop() lambda_c_list = slim.get_variables_by_name('lambda_c') lambda_b_list = slim.get_variables_by_name('lambda_b') t_lambda_list = lambda_c_list + lambda_b_list # print(lambda_b_list) # exit() t_net_var_list =[] for v in base_var_list: if v not in t_co_list and v not in t_lambda_list: t_net_var_list.append(v) # feature & attention t_g0 = endpoints["InvertedResidual_{}_{}".format(256, 2)] t_at0 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(t_g0), -1), axis=0, name='t_at0') t_g1 = endpoints["InvertedResidual_{}_{}".format(512, 3)] t_at1 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(t_g1), -1), axis=0, name='t_at1') part_feature = endpoints["InvertedResidual_{}_{}".format(1024, 5)] t_at2 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(part_feature), -1), axis=0, name='t_at2') object_feature = endpoints["InvertedResidual_{}_{}".format(2048, 2)] t_at3 = tf.nn.l2_normalize(tf.reduce_sum(tf.square(object_feature), -1), axis=0, name='t_at3') # print(t_at1.get_shape().as_list()) # exit() t_g = (t_g0, t_g1, part_feature, object_feature) t_at = (t_at0, t_at1, t_at2, t_at3) fc_obj = slim.max_pool2d(object_feature, (6, 8), scope="GMP1") batch_norm_params = { 'center': True, 'scale': True, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, } fc_obj = slim.conv2d(fc_obj, M, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope='fc_obj') fc_obj = tf.nn.dropout(fc_obj, keep_prob=keep_prob) fc_obj = slim.flatten(fc_obj) # fc_part = slim.conv2d(part_feature, M * k, #卷积核个数 [1, 1], #卷积核高宽 activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, # 标准化器设置为BN normalizer_params=batch_norm_params, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay) ) # print('part',fc_part.get_shape()) fc_part = slim.max_pool2d(fc_part, (12, 16), scope="GMP2") ft_list = tf.split(fc_part, num_or_size_splits=FLAGS.num_class, axis=-1) #最后一维度(C) cls_list = [] for i in range(M): ft = tf.transpose(ft_list[i], [0, 1, 3, 2]) cls = layers_lib.pool(ft, [1, 10], "AVG") cls = layers.flatten(cls) cls_list.append(cls) fc_ccp = tf.concat(cls_list, axis=-1) #cross_channel_pooling (N, M) fc_part = slim.conv2d(fc_part, FLAGS.num_class, [1, 1], activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), biases_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), scope="fc_part") fc_part = tf.nn.dropout(fc_part, keep_prob=keep_prob) fc_part = slim.flatten(fc_part) t_var_list = slim.get_model_variables() t_fc_var_list = [] for var in t_var_list: if var not in base_var_list: t_fc_var_list.append(var) return t_g, t_at, fc_obj, fc_part, fc_ccp, t_co_list, t_net_var_list, t_fc_var_list, t_lambda_list, t_var_list
def residual_conv_unit(input, depth=256): net = tf.nn.relu(input) net = slim.conv2d(net, depth, [3, 3]) net = tf.nn.relu(net) net = slim.conv2d(net, depth, [3, 3]) return input + net
def FCN_Seg(self, is_training=True): #Set training hyper-parameters self.is_training = is_training self.normalizer = tc.layers.batch_norm self.bn_params = {'is_training': self.is_training} print("input", self.tgt_image) with tf.variable_scope('First_conv'): conv1 = tc.layers.conv2d(self.tgt_image, 32, 3, 1, normalizer_fn=self.normalizer, normalizer_params=self.bn_params) print("Conv1 shape") print(conv1.get_shape()) x = inverted_bottleneck(conv1, 1, 16, 0,self.normalizer, self.bn_params, 1) #print("Conv 1") #print(x.get_shape()) #180x180x24 x = inverted_bottleneck(x, 6, 24, 1,self.normalizer, self.bn_params, 2) x = inverted_bottleneck(x, 6, 24, 0,self.normalizer, self.bn_params, 3) print("Block One dim ") print(x) DB2_skip_connection = x #90x90x32 x = inverted_bottleneck(x, 6, 32, 1,self.normalizer, self.bn_params, 4) x = inverted_bottleneck(x, 6, 32, 0,self.normalizer, self.bn_params, 5) print("Block Two dim ") print(x) DB3_skip_connection = x #45x45x96 x = inverted_bottleneck(x, 6, 64, 1,self.normalizer, self.bn_params, 6) x = inverted_bottleneck(x, 6, 64, 0,self.normalizer, self.bn_params, 7) x = inverted_bottleneck(x, 6, 64, 0,self.normalizer, self.bn_params, 8) x = inverted_bottleneck(x, 6, 64, 0,self.normalizer, self.bn_params, 9) x = inverted_bottleneck(x, 6, 96, 0,self.normalizer, self.bn_params, 10) x = inverted_bottleneck(x, 6, 96, 0,self.normalizer, self.bn_params, 11) x = inverted_bottleneck(x, 6, 96, 0,self.normalizer, self.bn_params, 12) print("Block Three dim ") print(x) DB4_skip_connection = x #23x23x160 x = inverted_bottleneck(x, 6, 160, 1,self.normalizer, self.bn_params, 13) x = inverted_bottleneck(x, 6, 160, 0,self.normalizer, self.bn_params, 14) x = inverted_bottleneck(x, 6, 160, 0,self.normalizer, self.bn_params, 15) print("Block Four dim ") print(x) #23x23x320 x = inverted_bottleneck(x, 6, 320, 0,self.normalizer, self.bn_params, 16) print("Block Four dim ") print(x) # Configuration 1 - single upsampling layer if self.configuration == 1: #input is features named 'x' # TODO(1.1) - incorporate a upsample function which takes the features of x # and produces 120 output feature maps, which are 16x bigger in resolution than # x. Remember if dim(upsampled_features) > dim(imput image) you must crop # upsampled_features to the same resolution as imput image # output feature name should match the next convolution layer, for instance # current_up5 ######################################################################## # current_up5 -- 16x ######################################################################## #--- Use the slim.conv2d_transpose function provided in the presentation #--- Tricky part is to calculate kernel as stride is just the upscale factor # MAGIC https://cv-tricks.com/image-segmentation/transpose-convolution-in-tensorflow/ #--- Upscale 16x time using the stride and rectify output #current_up5 = slim.conv2d_transpose(x, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="config1_16x_current_up5") #current_up5 = tf.nn.relu(current_up5) current_up5 = TransitionUp_elu(x, 120, 16, "config2_1x_current_up5") #--- crop if bigger current_up5 = crop(current_up5, self.tgt_image) ######################################################################## End_maps_decoder1 = slim.conv2d(current_up5, self.N_classes, [1, 1], scope='Final_decoder') #(batchsize, width, height, N_classes) Reshaped_map = tf.reshape(End_maps_decoder1, (-1, self.N_classes)) print("End map size Decoder: ") print(Reshaped_map) # Configuration 2 - single upsampling layer plus skip connection if self.configuration == 2: #input is features named 'x' # TODO (2.1) - implement the refinement block which upsample the data 2x like in configuration 1 # but that also fuse the upsampled features with the corresponding skip connection (DB4_skip_connection) # through concatenation. After that use a convolution with kernel 3x3 to produce 256 output feature maps ######################################################################## # DB4_skip_connection -- 2x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up5 = slim.conv2d_transpose(x, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up5") current_up5 = TransitionUp_elu(x, 256, 2, "config2_2x_current_up5") #--- crop the bigger current_up5 = crop(current_up5, DB4_skip_connection) DB4_skip_connection = crop(DB4_skip_connection, current_up5) #--- Complying with Figure 1. Concatenation current_up5 = Concat_layers(current_up5, DB4_skip_connection) #--- Complying with Figure 1. Convolution current_up5 = tc.layers.conv2d(current_up5, 256, [3, 3], scope='config2_2x_conv') ######################################################################## # TODO (2.2) - incorporate a upsample function which takes the features from TODO (2.1) # and produces 120 output feature maps, which are 8x bigger in resolution than # TODO (2.1). Remember if dim(upsampled_features) > dim(imput image) you must crop # upsampled_features to the same resolution as imput image # output feature name should match the next convolution layer, for instance # current_up3 ######################################################################## # current_up3 -- 8x ######################################################################## #current_up3 = slim.conv2d_transpose(current_up5, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="config2_8x_current_up3") #current_up3 = tf.nn.relu(current_up3) current_up3 = TransitionUp_elu(current_up5, 120, 8, "config2_1x_current_up5") #--- crop if bigger current_up3 = crop(current_up3, self.tgt_image) ######################################################################## End_maps_decoder1 = slim.conv2d(current_up3, self.N_classes, [1, 1], scope='Final_decoder') #(batchsize, width, height, N_classes) Reshaped_map = tf.reshape(End_maps_decoder1, (-1, self.N_classes)) print("End map size Decoder: ") print(Reshaped_map) # Configuration 3 - Two upsampling layer plus skip connection if self.configuration == 3: #input is features named 'x' # TODO (3.1) - implement the refinement block which upsample the data 2x like in configuration 1 # but that also fuse the upsampled features with the corresponding skip connection (DB4_skip_connection) # through concatenation. After that use a convolution with kernel 3x3 to produce 256 output feature maps ######################################################################## # DB4_skip_connection -- 2x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up5 = slim.conv2d_transpose(x, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up5") current_up5 = TransitionUp_elu(x, 256, 2, "config3_2x_current_up5") #--- crop the bigger current_up5 = crop(current_up5, DB4_skip_connection) DB4_skip_connection = crop(DB4_skip_connection, current_up5) #--- Complying with Figure 1. Concatenation current_up5 = Concat_layers(current_up5, DB4_skip_connection) #--- Complying with Figure 1. Convolution current_up5 = slim.conv2d(current_up5, 256, [3, 3], scope='config3_2x_conv') ######################################################################## # TODO (3.2) - Repeat TODO(3.1) now producing 160 output feature maps and fusing the upsampled features # with the corresponding skip connection (DB3_skip_connection) through concatenation. ######################################################################## # DB3_skip_connection -- 4x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up3 = slim.conv2d_transpose(current_up5, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up3") current_up3 = TransitionUp_elu(current_up5, 160 , 2, "config3_4x_current_up3") #--- crop the bigger current_up3 = crop(current_up3, DB3_skip_connection) DB3_skip_connection = crop(DB3_skip_connection, current_up3) #--- Complying with Figure 1. Concatenation current_up3 = Concat_layers(current_up3, DB3_skip_connection) #--- Complying with Figure 1. Convolution current_up3 = slim.conv2d(current_up3, 160, [3, 3], scope='config3_4x_conv') ######################################################################## # TODO (3.3) - incorporate a upsample function which takes the features from TODO (3.2) # and produces 120 output feature maps which are 4x bigger in resolution than # TODO (3.2). Remember if dim(upsampled_features) > dim(imput image) you must crop # upsampled_features to the same resolution as imput image # output feature name should match the next convolution layer, for instance # current_up4 ######################################################################## # current_up4 -- 16x ######################################################################## #--- Upscale the remaining 4x #current_up4 = slim.conv2d_transpose(current_up3, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="config3_16x_current_up4") #current_up4 = tf.nn.relu(current_up4) current_up4 = TransitionUp_elu(current_up3, 120, 4, "config2_1x_current_up5") #--- crop if bigger current_up4 = crop(current_up4, self.tgt_image) ######################################################################## End_maps_decoder1 = slim.conv2d(current_up4, self.N_classes, [1, 1], scope='Final_decoder') #(batchsize, width, height, N_classes) Reshaped_map = tf.reshape(End_maps_decoder1, (-1, self.N_classes)) print("End map size Decoder: ") print(Reshaped_map) #Full configuration if self.configuration == 4: ###################################################################################### ######################################### DECODER Full ############################################# #--- Commenting this line per feedback of Tutor. Use x directly #current_up2 = TransitionUp_elu(x, 96, 2, name='Upconv2') # TODO (4.1) - implement the refinement block which upsample the data 2x like in configuration 1 # but that also fuse the upsampled features with the corresponding skip connection (DB4_skip_connection) # through concatenation. After that use a convolution with kernel 3x3 to produce 256 output feature maps ######################################################################## # DB4_skip_connection -- 2x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up5 = slim.conv2d_transpose(x, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up5") current_up5 = TransitionUp_elu(x, 256, 2, "config4_2x_current_up5") #--- crop the bigger current_up5 = crop(current_up5, DB4_skip_connection) DB4_skip_connection = crop(DB4_skip_connection, current_up5) #--- Complying with Figure 1. Concatenation current_up5 = Concat_layers(current_up5, DB4_skip_connection) #--- Complying with Figure 1. Convolution current_up5 = slim.conv2d(current_up5, 256, [3, 3], scope='config3_2x_conv') ######################################################################## # TODO (4.2) - Repeat TODO(4.1) now producing 160 output feature maps and fusing the upsampled features # with the corresponding skip connection (DB3_skip_connection) through concatenation. ######################################################################## # DB3_skip_connection -- 4x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up3 = slim.conv2d_transpose(current_up5, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up3") current_up3 = TransitionUp_elu(current_up5, 160 , 2, "config4_4x_current_up3") #--- crop the bigger current_up3 = crop(current_up3, DB3_skip_connection) DB3_skip_connection = crop(DB3_skip_connection, current_up3) #--- Complying with Figure 1. Concatenation current_up3 = Concat_layers(current_up3, DB3_skip_connection) #--- Complying with Figure 1. Convolution current_up3 = slim.conv2d(current_up3, 160, [3, 3], scope='config3_4x_conv') ######################################################################## # TODO (4.3) - Repeat TODO(4.2) now producing 96 output feature maps and fusing the upsampled features # with the corresponding skip connection (DB2_skip_connection) through concatenation. ######################################################################## # current_up3 -- 8x ######################################################################## #--- Complying with Figure1. Transition up elu #--- current_up2 = slim.conv2d_transpose(current_up3, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up2") current_up2 = TransitionUp_elu(current_up3, 96 , 2, "config4_8x_current_up2") #--- crop the bigger current_up2 = crop(current_up2, DB2_skip_connection) DB2_skip_connection = crop(DB2_skip_connection, current_up2) #--- Complying with Figure 1. Concatenation current_up2 = Concat_layers(current_up2, DB2_skip_connection) #--- Complying with Figure 1. Convolution current_up2 = slim.conv2d(current_up2, 96, [3, 3], scope='config4_8x_conv') ######################################################################## # TODO (4.4) - incorporate a upsample function which takes the features from TODO(4.3) # and produce 120 output feature maps which are 2x bigger in resolution than # TODO(4.3). Remember if dim(upsampled_features) > dim(imput image) you must crop # upsampled_features to the same resolution as imput image # output feature name should match the next convolution layer, for instance # current_up4 ######################################################################## # current_up3 -- 16x ######################################################################## #current_up4 = slim.conv2d_transpose(current_up2, n_channels, [kernel_size, kernel_size], stride=upscale_factor, scope="current_up4") #current_up4 = tf.nn.relu(current_up4) current_up4 = TransitionUp_elu(current_up2, 120, 2, "config2_1x_current_up5") #--- crop if bigger current_up4 = crop(current_up4, self.tgt_image) ######################################################################## End_maps_decoder1 = slim.conv2d(current_up4, self.N_classes, [1, 1], scope='Final_decoder') #(batchsize, width, height, N_classes) Reshaped_map = tf.reshape(End_maps_decoder1, (-1, self.N_classes)) print("End map size Decoder: ") print(Reshaped_map) return Reshaped_map
def model(): x = tf.placeholder(dtype=tf.float32, shape=[batch_size, 32, 32, 3], name='Input') y = tf.placeholder(dtype=tf.float32, shape=[batch_size], name='True_Y') y = tf.cast(y, tf.int64) keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='dropout') is_training = tf.placeholder(tf.bool, shape=()) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.crelu, normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': is_training, 'decay': 0.95 }): h = slim.conv2d(inputs=x, num_outputs=24, kernel_size=2, weights_regularizer=slim.l2_regularizer(0.0016)) h = slim.conv2d(inputs=h, num_outputs=57, kernel_size=3, weights_regularizer=slim.l2_regularizer(0.0001)) h = slim.conv2d(inputs=h, num_outputs=63, kernel_size=5, weights_regularizer=slim.l2_regularizer(0.0096)) h = slim.conv2d(inputs=h, num_outputs=35, kernel_size=5, weights_regularizer=slim.l2_regularizer(0.0071)) h = slim.conv2d(inputs=h, num_outputs=76, kernel_size=3, weights_regularizer=slim.l2_regularizer(0.0015)) h = slim.max_pool2d(h, kernel_size=2, stride=2) flatten = slim.flatten(h) full = slim.fully_connected(flatten, 512) drop_full = slim.dropout(full, keep_prob) with tf.name_scope('accuracy'): logits = slim.fully_connected(drop_full, 10, activation_fn=None) correct_prediction = tf.equal(tf.argmax(logits, 1), y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits)) + tf.add_n( tf.losses.get_regularization_losses()) with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer() step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) train_op = slim.learning.create_train_op(loss, optimizer, global_step=step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_data, train_label = get_data.get_train_data(True) validate_data, validate_label = get_data.get_test_data(True) epochs = total_epochs for current_epoch in range(epochs): train_loss_list = [] train_accu_list = [] total_length = train_data.shape[0] idx = np.arange(total_length) np.random.shuffle(idx) train_data = train_data[idx] train_label = train_label[idx] total_steps = total_length // batch_size for step in range(total_steps): batch_train_data = train_data[step * batch_size:(step + 1) * batch_size] batch_train_label = train_label[step * batch_size:(step + 1) * batch_size] _, loss_v, accuracy_str = sess.run( [train_op, loss, accuracy], { x: batch_train_data, y: batch_train_label, keep_prob: 0.5, is_training: True }) train_loss_list.append(loss_v) train_accu_list.append(accuracy_str) #test test_length = validate_data.shape[0] test_steps = test_length // batch_size test_loss_list = [] test_accu_list = [] for step in range(test_steps): batch_test_data = validate_data[step * batch_size:(step + 1) * batch_size] batch_test_label = validate_label[step * batch_size:(step + 1) * batch_size] loss_v, accuracy_str = sess.run( [loss, accuracy], { x: batch_test_data, y: batch_test_label, keep_prob: 1.0, is_training: False }) test_loss_list.append(loss_v) test_accu_list.append(accuracy_str) print( '{}, epoch:{}/{}, step:{}/{}, loss:{:.6f}, accu:{:.4f}, test loss:{:.6f}, accu:{:.4f}' .format(datetime.now(), current_epoch, total_epochs, total_steps * current_epoch + step, total_steps * epochs, np.mean(train_loss_list), np.mean(train_accu_list), np.mean(test_loss_list), np.mean(test_accu_list)))
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets_h, bbox_targets_r = \ tf.py_func(proposal_target_layer, [rois, gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets_h = tf.reshape(bbox_targets_h, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r = tf.reshape(bbox_targets_r, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=bbox_targets_r, cls_score_r=cls_score_r, labels=labels) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
def _network(inputs, image_shape, gt_bboxes, cls_names): if 'backbones' not in sys.path: sys.path.append('backbones') cnn = import_module(frc.BACKBONE, package='backbones') # CNN feature_map = cnn.inference(inputs) features = slim.conv2d(feature_map, 512, [3, 3], normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.995, 'epsilon': 0.0001 }, weights_regularizer=slim.l2_regularizer( frc.L2_WEIGHT), scope='rpn_feature') # RPN image_shape = tf.cast(tf.reshape(image_shape, [-1]), dtype=tf.int32) gt_bboxes = tf.cast(tf.reshape(gt_bboxes, [-1, 5]), dtype=tf.int32) rpn_cls_loss, rpn_cls_acc, rpn_bbox_loss, rois, labels, bbox_targets = rpn( features, image_shape, gt_bboxes) # Image summary for RPN rois class_names = frc.CLS_NAMES + cls_names display_rois_img = inputs[0] display_bg_indices = tf.reshape(tf.where(tf.equal(labels, 0)), [-1]) display_fg_indices = tf.reshape(tf.where(tf.not_equal(labels, 0)), [-1]) display_bg_rois = tf.gather(rois, display_bg_indices) display_fg_rois = tf.gather(rois, display_fg_indices) display_bg_img = tf.py_func(draw_rectangle, [display_rois_img, display_bg_rois], [tf.uint8]) display_fg_img = tf.py_func(draw_rectangle, [display_rois_img, display_fg_rois], [tf.uint8]) rpn_image_bg_summary = tf.summary.image('class_rois/background', display_bg_img) rpn_image_fg_summary = tf.summary.image('class_rois/foreground', display_fg_img) # RCNN cls_score, bbox_pred = faster_rcnn(features, rois, image_shape) cls_prob = slim.softmax(cls_score) cls_categories = tf.cast(tf.argmax(cls_prob, axis=1), dtype=tf.int32) rcnn_cls_acc = tf.reduce_mean( tf.cast(tf.equal(cls_categories, tf.cast(labels, tf.int32)), tf.float32)) final_bbox, final_score, final_categories = process_faster_rcnn( rois, bbox_pred, cls_prob, image_shape) rcnn_bbox_loss, rcnn_cls_loss = build_faster_rcnn_losses( bbox_pred, bbox_targets, cls_prob, labels, frc.NUM_CLS + 1) # ------------------------------BEGIN SUMMARY-------------------------------- # Add predicted bbox with confidence 0.25, 0.5, 0.75 and ground truth in image summary. with tf.name_scope('rcnn_image_summary'): # display_indices_25 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.25) & # tf.less(final_score, 0.5) & # tf.not_equal(final_categories, 0)), [-1]) # display_indices_50 = tf.reshape(tf.where(tf.greater_equal(final_score, 0.5) & # tf.less(final_score, 0.75) & # tf.not_equal(final_categories, 0)), [-1]) display_indices_75 = tf.reshape( tf.where( tf.greater_equal(final_score, 0.75) & tf.not_equal(final_categories, 0)), [-1]) # display_bboxes_25 = tf.gather(final_bbox, display_indices_25) # display_bboxes_50 = tf.gather(final_bbox, display_indices_50) display_bboxes_75 = tf.gather(final_bbox, display_indices_75) # display_categories_25 = tf.gather(final_categories, display_indices_25) # display_categories_50 = tf.gather(final_categories, display_indices_50) display_categories_75 = tf.gather(final_categories, display_indices_75) # display_image_25 = tf.py_func(draw_rectangle_with_name, # [inputs[0], display_bboxes_25, display_categories_25, class_names], # [tf.uint8]) # display_image_50 = tf.py_func(draw_rectangle_with_name, # [inputs[0], display_bboxes_50, display_categories_50, class_names], # [tf.uint8]) display_image_75 = tf.py_func( draw_rectangle_with_name, [inputs[0], display_bboxes_75, display_categories_75, class_names], [tf.uint8]) display_image_gt = tf.py_func( draw_rectangle_with_name, [inputs[0], gt_bboxes[:, :-1], gt_bboxes[:, -1], class_names], [tf.uint8]) rcnn_gt_image_summary = tf.summary.image('detection/gt', display_image_gt) # tf.summary.image('detection/25', display_image_25) # tf.summary.image('detection/50', display_image_50) rcnn_75_image_summary = tf.summary.image('detection/75', display_image_75) image_summary = tf.summary.merge([ rpn_image_bg_summary, rpn_image_fg_summary, rcnn_75_image_summary, rcnn_gt_image_summary ]) # -------------------------------END SUMMARY--------------------------------- loss_dict = { 'rpn_cls_loss': rpn_cls_loss, 'rpn_bbox_loss': rpn_bbox_loss, 'rcnn_cls_loss': rcnn_cls_loss, 'rcnn_bbox_loss': rcnn_bbox_loss } acc_dict = {'rpn_cls_acc': rpn_cls_acc, 'rcnn_cls_acc': rcnn_cls_acc} return final_bbox, final_score, final_categories, loss_dict, acc_dict, image_summary
def CNN_1(stock_name, x_batch, y_batch): t1 = time.time() LR = .001 epsilonADAM = 1e-8 time_lenght = 80 num_nodes = 1 stock_num = 0 num_levels = 10 num_inputs = num_levels * 2 + 4 num_stocks = 1 batches = 500 num_classes = 3 cnn_filter_size = 3 pooling_filter_size = 2 num_filters_per_size = (64, 128, 256, 512) num_rep_block = (1, 1, 1, 1) epoch_limit = 40 keep_prob_train = 0.95 T = 1000 * batches T_eval = 100 * batches levels = 10 folder = '/home/leifan/Data/1Y/20Stocks_LoadRNNdata_1/' stock_file_name = stock_name + '_loadRNN_1.hdf5' model_identifier = stock_name HDF5_file = h5py.File(folder + stock_file_name, 'r') X_np_train = HDF5_file['X_train'] Y_np_train = HDF5_file['y_train'] X_np_eval = HDF5_file['X_test'] Y_np_eval = HDF5_file['y_test'] training_case = [ LR, num_rep_block, time_lenght, pooling_filter_size, cnn_filter_size, num_filters_per_size, num_levels, epsilonADAM, keep_prob_train, T, T_eval, batches, stock_name ] HP = '_' + 'num_rep_block' + str(num_rep_block) + '_' + 'batches' + str( batches) + '_' + 'time_lenght' + str( time_lenght) + '_' + 'Dropout' + str(keep_prob_train) def resUnit(input_layer, num_filters_per_size_i, cnn_filter_size, i, j): with tf.variable_scope("res_unit_" + str(i) + "_" + str(j)): part1 = slim.conv2d(input_layer, num_filters_per_size_i, [1, cnn_filter_size], activation_fn=None) part2 = slim.batch_norm(part1, activation_fn=None) part3 = tf.nn.relu(part2) part4 = slim.conv2d(part3, num_filters_per_size_i, [1, cnn_filter_size], activation_fn=None) part5 = slim.batch_norm(part4, activation_fn=None) part6 = tf.nn.relu(part5) output = part6 return output input_x1 = tf.placeholder(tf.float32, shape=[None, num_inputs, time_lenght], name="input_x") input_x = tf.reshape(input_x1, [-1, num_inputs, time_lenght, 1]) input_y = tf.placeholder(tf.int64, shape=[None], name="input_y") actual_y = input_y[:] dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") h = slim.conv2d(input_x, num_filters_per_size[0], [num_inputs, cnn_filter_size], normalizer_fn=slim.batch_norm, scope='conv0', padding='VALID') for i in range(0, len(num_filters_per_size)): for j in range(0, num_rep_block[i]): h = resUnit(h, num_filters_per_size[i], cnn_filter_size, i, j) h = slim.max_pool2d(h, [1, pooling_filter_size], scope='pool_%s' % i) # ================ Layer FC ================ # Global avg max pooling h = math_ops.reduce_mean(h, [1, 2], name='pool5', keep_dims=True) # Conv h = slim.conv2d(h, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='output') # FC & Dropout scores = slim.flatten(h) u_prob = tf.nn.softmax(scores) pred1D = tf.argmax(scores, 1, name="predictions") with tf.name_scope("evaluate"): losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=scores, labels=input_y) loss = tf.reduce_mean(losses) correct_predictions = tf.equal(pred1D, input_y) accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") session = tf.Session() header = ' ' path = header + stock_name + '_1' save_path = path + '/' + 'save' path = path + '/' save_path = save_path + '/' path_saver = save_path + "Test" + model_identifier + HP + ".ckpt" #save model saver = tf.train.Saver() saver.restore(session, path_saver) actual_out, probs = session.run([actual_y, u_prob], feed_dict={ input_x1: x_batch, input_y: y_batch, dropout_keep_prob: np.float32(1.0) }) return actual_out, probs
def inference(self, mode, inputs): is_training = mode == 'TRAIN' ###decode your inputs [image, im_info, gt_boxes] = inputs image.set_shape([None, None, None, 3]) im_info.set_shape([None, cfg.nr_info_dim]) if mode == 'TRAIN': gt_boxes.set_shape([None, None, 5]) ##end of decode num_anchors = len(cfg.anchor_scales) * len(cfg.anchor_ratios) bottleneck = resnet_v1.bottleneck blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1, 1)] * 2 + [(256, 64, 1, 1)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 2, 1)] + [(512, 128, 1, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2, 1)] + [(1024, 256, 1, 1)] * 22), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1, 2)] + [(2048, 512, 1, 2)] * 2) ] with slim.arg_scope(resnet_arg_scope(is_training=False)): with tf.variable_scope('resnet_v1_101', 'resnet_v1_101'): net = resnet_utils.conv2d_same( image, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d( net, [3, 3], stride=2, padding='SAME', scope='pool1') net, _ = resnet_v1.resnet_v1( net, blocks[0:1], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv3, _ = resnet_v1.resnet_v1( net, blocks[1:2], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, _ = resnet_v1.resnet_v1( net_conv3, blocks[2:3], global_pool=False, include_root_block=False, scope='resnet_v1_101') with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv5, _ = resnet_v1.resnet_v1( net_conv4, blocks[-1:], global_pool=False, include_root_block=False, scope='resnet_v1_101') initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): # rpn rpn = slim.conv2d( net_conv4, 512, [3, 3], trainable=is_training, weights_initializer=initializer, activation_fn=nn_ops.relu, scope="rpn_conv/3x3") rpn_cls_score = slim.conv2d( rpn, num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') rpn_bbox_pred = slim.conv2d( rpn, num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') # generate anchor height = tf.cast(tf.shape(rpn)[1], tf.float32) width = tf.cast(tf.shape(rpn)[2], tf.float32) anchors = generate_anchors_opr( height, width, cfg.stride[0], cfg.anchor_scales, cfg.anchor_ratios) # change it so that the score has 2 as its channel size rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = tf.nn.softmax(rpn_cls_prob, name='rpn_cls_prob') rpn_cls_prob = tf.reshape(rpn_cls_prob, tf.shape(rpn_cls_score)) rois, roi_scores = proposal_opr( rpn_cls_prob, rpn_bbox_pred, im_info, mode, cfg.stride, anchors, num_anchors, is_tfchannel=True, is_tfnms=False) if is_training: with tf.variable_scope('anchor') as scope: rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gt_boxes, im_info, cfg.stride, anchors, num_anchors], [tf.float32, tf.float32]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") with tf.control_dependencies([rpn_labels]): with tf.variable_scope('rpn_rois') as scope: rois, labels, bbox_targets = \ tf.py_func( proposal_target_layer, [rois, gt_boxes, im_info], [tf.float32, tf.float32, tf.float32]) labels = tf.to_int32(labels, name="to_int32") with tf.variable_scope( 'resnet_v1_101', 'resnet_v1_101', regularizer=tf.contrib.layers.l2_regularizer( cfg.weight_decay)): ps_chl = 7 * 7 * 10 ps_fm = rfcn_plus_plus_opr.global_context_module( net_conv5, prefix='conv_new_1', ks=15, chl_mid=256, chl_out=ps_chl) ps_fm = nn_ops.relu(ps_fm) [psroipooled_rois, _, _] = psalign_pooling_op.psalign_pool( ps_fm, rois, group_size=7, sample_height=2, sample_width=2, spatial_scale=1.0/16.0) #[psroipooled_rois, _] = psroi_pooling_op.psroi_pool( # ps_fm, rois, group_size=7, spatial_scale=1.0 / 16.0) psroipooled_rois = slim.flatten(psroipooled_rois) ps_fc_1 = slim.fully_connected( psroipooled_rois, 2048, weights_initializer=initializer, activation_fn=nn_ops.relu, trainable=is_training, scope='ps_fc_1') cls_score = slim.fully_connected( ps_fc_1, cfg.num_classes, weights_initializer=initializer, activation_fn=None, trainable=is_training, scope='cls_fc') bbox_pred = slim.fully_connected( ps_fc_1, 4 * cfg.num_classes, weights_initializer=initializer_bbox, activation_fn=None, trainable=is_training, scope='bbox_fc') cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") #conv_new_1 = slim.conv2d( # net_conv5, 1024, [1, 1], trainable=is_training, # weights_initializer=initializer, activation_fn=nn_ops.relu, # scope="conv_new_1") #rfcn_cls = slim.conv2d( # conv_new_1, 7 * 7 * cfg.num_classes, [1, 1], # trainable=is_training, weights_initializer=initializer, # activation_fn=None, scope="rfcn_cls") #rfcn_bbox = slim.conv2d( # conv_new_1, 7 * 7 * 4, [1, 1], trainable=is_training, # weights_initializer=initializer, # activation_fn=None, scope="rfcn_bbox") #[psroipooled_cls_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_cls, rois, group_size=7, spatial_scale=1.0 / 16.0) #[psroipooled_loc_rois, _] = psroi_pooling_op.psroi_pool( # rfcn_bbox, rois, group_size=7, spatial_scale=1.0 / 16.0) #cls_score = tf.reduce_mean(psroipooled_cls_rois, axis=[1, 2]) #bbox_pred = tf.reduce_mean(psroipooled_loc_rois, axis=[1, 2]) #cls_prob = loss_opr.softmax_layer(cls_score, "cls_prob") # cls_prob = tf.nn.softmax(cls_score, name="cls_prob") #bbox_pred = tf.tile(bbox_pred, [1, cfg.num_classes]) if not is_training: stds = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (cfg.num_classes)) means = np.tile( np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (cfg.num_classes)) bbox_pred *= stds bbox_pred += means ##############add prediction##################### tf.add_to_collection("rpn_cls_score", rpn_cls_score) tf.add_to_collection("rpn_cls_prob", rpn_cls_prob) tf.add_to_collection("rpn_bbox_pred", rpn_bbox_pred) tf.add_to_collection("cls_score", cls_score) tf.add_to_collection("cls_prob", cls_prob) tf.add_to_collection("bbox_pred", bbox_pred) tf.add_to_collection("rois", rois) else: #-------------------- rpn loss ---------------------------------# from detection_opr.utils import loss_opr_without_box_weight rpn_loss_box = loss_opr_without_box_weight.smooth_l1_loss_rpn( tf.reshape(rpn_bbox_pred, [-1, 4]), tf.reshape(rpn_bbox_targets, [-1, 4]), tf.reshape(rpn_labels, [-1]), sigma=cfg.simga_rpn) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_label = tf.reshape(rpn_labels, [-1]) rpn_select = tf.where(tf.not_equal(rpn_label, -1)) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, rpn_select), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) #-------------------- rcnn loss --------------------------------# label = tf.reshape(labels, [-1]) cross_entropy, loss_box = loss_opr_without_box_weight.sum_ohem_loss( tf.reshape(cls_score, [-1, cfg.num_classes]), label, bbox_pred, bbox_targets, cfg.TRAIN.nr_ohem_sampling, cfg.num_classes) loss_box *= 2 #--------------------add to colloection ------------------------# tf.add_to_collection('loss_cls', cross_entropy) tf.add_to_collection('loss_box', loss_box) tf.add_to_collection('rpn_loss_cls', rpn_cross_entropy) tf.add_to_collection('rpn_loss_box', rpn_loss_box) loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box tf.add_to_collection('losses', loss) return loss
def discriminator(x, f_dim, output_size, c_dim, is_training=True): # Network net = x argscope_conv2d = slim.arg_scope([slim.conv2d], kernel_size=[4, 4], stride=[2, 2], activation_fn=tf.nn.relu) with argscope_conv2d: net = slim.conv2d(net, f_dim) dnet = slim.conv2d(net, f_dim, kernel_size=[3, 3], stride=[1, 1]) net += 1e-1 * slim.conv2d(dnet, f_dim, kernel_size=[3, 3], stride=[1, 1]) net = slim.conv2d(net, f_dim) dnet = slim.conv2d(net, f_dim // 2, kernel_size=[3, 3], stride=[1, 1]) net += 1e-1 * slim.conv2d(dnet, f_dim, kernel_size=[3, 3], stride=[1, 1]) net = slim.conv2d(net, f_dim) dnet = slim.conv2d(net, f_dim // 2, kernel_size=[3, 3], stride=[1, 1]) net += 1e-1 * slim.conv2d(dnet, f_dim, kernel_size=[3, 3], stride=[1, 1]) net = slim.conv2d(net, f_dim) dnet = slim.conv2d(net, f_dim // 2, kernel_size=[3, 3], stride=[1, 1]) net += 1e-1 * slim.conv2d(dnet, f_dim, kernel_size=[3, 3], stride=[1, 1]) net = tf.reshape(net, [-1, output_size // 16 * output_size // 16 * f_dim]) logits = slim.fully_connected(net, 1, activation_fn=None) logits = tf.squeeze(logits, -1) return logits
def UpsampleConv(inputs, num_outputs, kernel_size, stride=1, padding='SAME', rate=1, name='UpsampleConv', **kwargs): with tf.variable_scope(name): h,w = inputs.shape.as_list()[1:3] output = tf.image.resize_nearest_neighbor(inputs, size=[2*h, 2*w]) output = slim.conv2d(output, num_outputs, kernel_size, stride=stride, padding=padding, rate=rate, **kwargs) return output
def DiscriminatorCNN(x, input_channel, z_num, hidden_num, data_format): with tf.variable_scope("D") as vs: # Encoder x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 128, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 128, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 128, 3, 2, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 256, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 256, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 256, 3, 2, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 384, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 384, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 384, 3, 2, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 512, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, 512, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = tf.reshape(x, [-1, np.prod([8, 8, 512])]) z_out = slim.fully_connected(x, z_num, activation_fn=None) x = slim.fully_connected(x, z_num, activation_fn=None) # Decoder num_output = np.prod([8, 8, hidden_num]) x = slim.fully_connected(x, num_output, activation_fn=None) x = reshape(x, 8, 8, hidden_num, data_format) # 1 x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = upscale(x, 2, data_format) # 2 x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = upscale(x, 2, data_format) # 3 x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = upscale(x, 2, data_format) # 4 x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) x = slim.conv2d(x, hidden_num, 3, 1, activation_fn=tf.nn.elu, data_format=data_format) out = slim.conv2d(x, input_channel, 3, 1, activation_fn=None, data_format=data_format) variables = tf.contrib.framework.get_variables(vs) return out, z_out, variables
def network(input): conv1 = slim.conv2d(input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_1') conv1 = slim.conv2d(conv1, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_2') pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME') conv2 = slim.conv2d(pool1, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_1') conv2 = slim.conv2d(conv2, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_2') pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME') conv3 = slim.conv2d(pool2, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_1') conv3 = slim.conv2d(conv3, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_2') pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME') conv4 = slim.conv2d(pool3, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_1') conv4 = slim.conv2d(conv4, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_2') pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME') conv5 = slim.conv2d(pool4, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_1') conv5 = slim.conv2d(conv5, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_2') up6 = upsample_and_concat(conv5, conv4, 256, 512) conv6 = slim.conv2d(up6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_1') conv6 = slim.conv2d(conv6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_2') up7 = upsample_and_concat(conv6, conv3, 128, 256) conv7 = slim.conv2d(up7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_1') conv7 = slim.conv2d(conv7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_2') up8 = upsample_and_concat(conv7, conv2, 64, 128) conv8 = slim.conv2d(up8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_1') conv8 = slim.conv2d(conv8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_2') up9 = upsample_and_concat(conv8, conv1, 32, 64) conv9 = slim.conv2d(up9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1') conv9 = slim.conv2d(conv9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_2') conv10 = slim.conv2d(conv9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10') out = tf.depth_to_space(conv10, 2) return out
def build_adaptnet(inputs, num_classes): """ Builds the AdaptNet model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: AdaptNet model """ net = ConvBlock(inputs, n_filters=64, kernel_size=[3, 3]) net = ConvBlock(net, n_filters=64, kernel_size=[7, 7], stride=2) net = slim.pool(net, [2, 2], stride=[2, 2], pooling_type='MAX') net = ResNetBlock_2(net, filters_1=64, filters_2=256, s=1) net = ResNetBlock_1(net, filters_1=64, filters_2=256) net = ResNetBlock_1(net, filters_1=64, filters_2=256) net = ResNetBlock_2(net, filters_1=128, filters_2=512, s=2) net = ResNetBlock_1(net, filters_1=128, filters_2=512) net = ResNetBlock_1(net, filters_1=128, filters_2=512) skip_connection = ConvBlock(net, n_filters=12, kernel_size=[1, 1]) net = MultiscaleBlock_1(net, filters_1=128, filters_2=512, filters_3=64, p=1, d=2) net = ResNetBlock_2(net, filters_1=256, filters_2=1024, s=2) net = ResNetBlock_1(net, filters_1=256, filters_2=1024) net = MultiscaleBlock_1(net, filters_1=256, filters_2=1024, filters_3=64, p=1, d=2) net = MultiscaleBlock_1(net, filters_1=256, filters_2=1024, filters_3=64, p=1, d=4) net = MultiscaleBlock_1(net, filters_1=256, filters_2=1024, filters_3=64, p=1, d=8) net = MultiscaleBlock_1(net, filters_1=256, filters_2=1024, filters_3=64, p=1, d=16) net = MultiscaleBlock_2(net, filters_1=512, filters_2=2048, filters_3=512, p=2, d=4) net = MultiscaleBlock_1(net, filters_1=512, filters_2=2048, filters_3=512, p=2, d=8) net = MultiscaleBlock_1(net, filters_1=512, filters_2=2048, filters_3=512, p=2, d=16) net = ConvBlock(net, n_filters=12, kernel_size=[1, 1]) net = Upsampling(net, scale=2) net = tf.add(skip_connection, net) net = Upsampling(net, scale=8) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net
def build_graph(self, onehot_cards): scope = 'AutoEncoder' with tf.variable_scope(scope): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], weights_regularizer=slim.l2_regularizer(1e-3)): input_conv = tf.reshape(onehot_cards, [-1, 1, INPUT_DIM, 1]) single_conv = slim.conv2d(activation_fn=None, inputs=input_conv, num_outputs=32, kernel_size=[1, 1], stride=[1, 4], padding='SAME') pair_conv = slim.conv2d(activation_fn=None, inputs=input_conv, num_outputs=32, kernel_size=[1, 2], stride=[1, 4], padding='SAME') triple_conv = slim.conv2d(activation_fn=None, inputs=input_conv, num_outputs=32, kernel_size=[1, 3], stride=[1, 4], padding='SAME') quadric_conv = slim.conv2d(activation_fn=None, inputs=input_conv, num_outputs=32, kernel_size=[1, 4], stride=[1, 4], padding='SAME') conv = tf.concat( [single_conv, pair_conv, triple_conv, quadric_conv], -1) encoding_params = [[128, 3, 'identity'], [128, 3, 'identity'], [128, 3, 'downsampling'], [128, 3, 'identity'], [128, 3, 'identity'], [256, 3, 'downsampling'], [256, 3, 'identity'], [256, 3, 'identity']] for param in encoding_params: if param[-1] == 'identity': conv = identity_block(conv, param[0], param[1]) elif param[-1] == 'upsampling': conv = upsample_block(conv, param[0], param[1]) elif param[-1] == 'downsampling': conv = downsample_block(conv, param[0], param[1]) else: raise Exception('unsupported layer type') conv = tf.reduce_mean(conv, [1, 2], True) encoding = tf.identity(conv, name='encoding') # is_training = get_current_tower_context().is_training # if not is_training: # return decoding_params = [[256, 4, 'upsampling'], [256, 3, 'identity'], [256, 3, 'identity'], [256, 4, 'upsampling'], [128, 3, 'identity'], [128, 3, 'identity'], [128, 4, 'upsampling'], [128, 3, 'identity'], [1, 3, 'identity']] for param in decoding_params: if param[-1] == 'identity': conv = identity_block(conv, param[0], param[1]) elif param[-1] == 'upsampling': conv = upsample_block(conv, param[0], param[1]) elif param[-1] == 'downsampling': conv = downsample_block(conv, param[0], param[1]) else: raise Exception('unsupported layer type') print(conv.shape) decoded = tf.reshape( conv, [-1, conv.shape[1] * conv.shape[2] * conv.shape[3]]) reconstuct_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.pad(onehot_cards, [[0, 0], [0, 4]]), logits=decoded) reconstuct_loss = tf.reduce_mean(tf.reduce_sum(reconstuct_loss, -1), name='reconstruct_loss') l2_loss = tf.truediv(regularize_cost_from_collection(), tf.cast(tf.shape(onehot_cards)[0], tf.float32), name='l2_loss') add_moving_summary(reconstuct_loss, decay=0) add_moving_summary(l2_loss, decay=0) loss = reconstuct_loss + l2_loss return loss
def neural_networks(): # 输入:训练的数量,一张图片的宽度,一张图片的高度 [-1,-1,16] inputs = tf.placeholder(tf.float32, [None, None, image_height], name="inputs") # 定义 ctc_loss 是稀疏矩阵 labels = tf.sparse_placeholder(tf.int32, name="labels") # 1维向量 size [batch_size] 等于 np.ones(batch_size)* image_width seq_len = tf.placeholder(tf.int32, [None], name="seq_len") keep_prob = tf.placeholder(tf.float32, name="keep_prob") drop_prob = 1 - keep_prob shape = tf.shape(inputs) batch_size, image_width = shape[0], shape[1] layer = tf.reshape(inputs, [batch_size, image_width, image_height, 1]) layer = slim.conv2d(layer, 64, [3, 3], normalizer_fn=slim.batch_norm) for i in range(POOL_COUNT): for j in range(10): layer = addResLayer(layer) layer = slim.conv2d(layer, 64, [3, 3], stride=[2, 2], normalizer_fn=slim.batch_norm) layer = tf.reshape( layer, [batch_size, -1, 64 ]) #[batch_size, image_width*image_height//POOL_SIZE//POOL_SIZE, 64] num_hidden = 128 with tf.variable_scope('RNN1'): cell_fw = tf.contrib.rnn.GRUCell(num_hidden // 2) cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) cell_bw = tf.contrib.rnn.GRUCell(num_hidden // 2) cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, layer, seq_len, dtype=tf.float32) outputs = tf.concat( outputs, axis=2 ) #[batch_size, image_width*image_height//POOL_SIZE//POOL_SIZE, num_hidden] layer = tf.reshape(outputs, [-1, num_hidden]) layer = tf.layers.dense(layer, num_hidden) layer = tf.reshape(layer, [batch_size, -1, num_hidden]) with tf.variable_scope('RNN2'): cell_fw = tf.contrib.rnn.GRUCell(num_hidden // 2) cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) cell_bw = tf.contrib.rnn.GRUCell(num_hidden // 2) cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, input_keep_prob=keep_prob, output_keep_prob=keep_prob) outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, layer, seq_len, dtype=tf.float32) outputs = tf.concat( outputs, axis=2 ) #[batch_size, image_width*image_height//POOL_SIZE//POOL_SIZE, num_hidden] layer = tf.reshape(outputs, [-1, num_hidden]) # layer = tf.layers.dense(layer, 512, activation=tf.nn.relu) # layer = tf.layers.dropout(layer,drop_prob) # # 这里不需要再加上 tf.nn.softmax 层,因为ctc_loss会加 layer = tf.layers.dense(layer, num_classes) # 输出对数: [batch_size , max_time , num_classes] layer = tf.reshape(layer, [batch_size, -1, num_classes]) # 需要变换到 time_major == True [max_time x batch_size x num_classes] logits = tf.transpose(layer, (1, 0, 2), name="logits") return logits, inputs, labels, seq_len, keep_prob
def generator(self, inputs, reuse=False, scope='g_net'): n, h, w, c = inputs.get_shape().as_list() if self.args.model == 'lstm': with tf.variable_scope('LSTM'): cell = BasicConvLSTMCell([h / 4, w / 4], [3, 3], 128) rnn_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) x_unwrap = [] with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, padding='SAME', normalizer_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer( uniform=True), biases_initializer=tf.constant_initializer(0.0)): inp_pred = inputs for i in xrange(self.n_levels): scale = self.scale**(self.n_levels - i - 1) hi = int(round(h * scale)) wi = int(round(w * scale)) inp_blur = tf.image.resize_images(inputs, [hi, wi], method=0) inp_pred = tf.stop_gradient( tf.image.resize_images(inp_pred, [hi, wi], method=0)) inp_all = tf.concat([inp_blur, inp_pred], axis=3, name='inp') if self.args.model == 'lstm': rnn_state = tf.image.resize_images(rnn_state, [hi // 4, wi // 4], method=0) # encoder conv1_1 = slim.conv2d(inp_all, 32, [5, 5], scope='enc1_1') conv1_2 = ResnetBlock(conv1_1, 32, 5, scope='enc1_2') conv1_3 = ResnetBlock(conv1_2, 32, 5, scope='enc1_3') conv1_4 = ResnetBlock(conv1_3, 32, 5, scope='enc1_4') conv2_1 = slim.conv2d(conv1_4, 64, [5, 5], stride=2, scope='enc2_1') conv2_2 = ResnetBlock(conv2_1, 64, 5, scope='enc2_2') conv2_3 = ResnetBlock(conv2_2, 64, 5, scope='enc2_3') conv2_4 = ResnetBlock(conv2_3, 64, 5, scope='enc2_4') conv3_1 = slim.conv2d(conv2_4, 128, [5, 5], stride=2, scope='enc3_1') conv3_2 = ResnetBlock(conv3_1, 128, 5, scope='enc3_2') conv3_3 = ResnetBlock(conv3_2, 128, 5, scope='enc3_3') conv3_4 = ResnetBlock(conv3_3, 128, 5, scope='enc3_4') if self.args.model == 'lstm': deconv3_4, rnn_state = cell(conv3_4, rnn_state) else: deconv3_4 = conv3_4 # decoder deconv3_3 = ResnetBlock(deconv3_4, 128, 5, scope='dec3_3') deconv3_2 = ResnetBlock(deconv3_3, 128, 5, scope='dec3_2') deconv3_1 = ResnetBlock(deconv3_2, 128, 5, scope='dec3_1') deconv2_4 = slim.conv2d_transpose(deconv3_1, 64, [4, 4], stride=2, scope='dec2_4') cat2 = deconv2_4 + conv2_4 deconv2_3 = ResnetBlock(cat2, 64, 5, scope='dec2_3') deconv2_2 = ResnetBlock(deconv2_3, 64, 5, scope='dec2_2') deconv2_1 = ResnetBlock(deconv2_2, 64, 5, scope='dec2_1') deconv1_4 = slim.conv2d_transpose(deconv2_1, 32, [4, 4], stride=2, scope='dec1_4') cat1 = deconv1_4 + conv1_4 deconv1_3 = ResnetBlock(cat1, 32, 5, scope='dec1_3') deconv1_2 = ResnetBlock(deconv1_3, 32, 5, scope='dec1_2') deconv1_1 = ResnetBlock(deconv1_2, 32, 5, scope='dec1_1') inp_pred = slim.conv2d(deconv1_1, self.chns, [5, 5], activation_fn=None, scope='dec1_0') if i >= 0: x_unwrap.append(inp_pred) if i == 0: tf.get_variable_scope().reuse_variables() return x_unwrap
def bottleneck(self, bottom, name, reuse=False): with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() head_bottleneck = slim.conv2d(bottom, 1024, [1, 1], scope=name) return head_bottleneck
def conv2d(input_, output_dim, ks=3,s=1,padding='SAME',name='conv2d'): with tf.variable_scope(name): return slim.conv2d(input_, output_dim, ks, s, padding=padding, weights_initializer=tf.contrib.layers.xavier_initializer())