def create_test_network_2(): """Aligned network for test. The graph corresponds to a variation to the example from the second figure in go/cnn-rf-computation#arbitrary-computation-graphs. Layers 2 and 3 are changed to max-pooling operations. Since the functionality is the same as convolution, the network is aligned and the receptive field size is the same as from the network created using create_test_network_1(). Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) l2 = slim.max_pool2d(l2_pad, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.max_pool2d(l2, [1, 1], stride=2, scope='L3', padding='VALID') # Addition. nn.relu(l1 + l3, name='output') return g
def localization_squeezenet(self, inputs): with tf.variable_scope('localization_network'): with slim.arg_scope([slim.conv2d], activation_fn = tf.nn.relu, padding = 'SAME', weights_initializer = tf.constant_initializer(0.0)): conv1 = slim.conv2d(inputs, 64, [3,3], 2, padding = 'VALID', scope='conv1') pool1 = slim.max_pool2d(conv1, [2,2], 2, scope='pool1') fire2 = self.fire_module(pool1, 16, 64, scope = 'fire2') fire3 = self.fire_module(fire2, 16, 64, scope = 'fire3', res_connection=True) fire4 = self.fire_module(fire3, 32, 128, scope = 'fire4') pool4 = slim.max_pool2d(fire4, [2,2], 2, scope='pool4') fire5 = self.fire_module(pool4, 32, 128, scope = 'fire5', res_connection=True) fire6 = self.fire_module(fire5, 48, 192, scope = 'fire6') fire7 = self.fire_module(fire6, 48, 192, scope = 'fire7', res_connection=True) fire8 = self.fire_module(fire7, 64, 256, scope = 'fire8') pool8 = slim.max_pool2d(fire8, [2,2], 2, scope='pool8') fire9 = self.fire_module(pool8, 64, 256, scope = 'fire9', res_connection=True) conv10 = slim.conv2d(fire9, 128, [1,1], 1, scope='conv10') shape = int(np.prod(conv10.get_shape()[1:])) identity = np.array([[1., 0., 0.], [0., 1., 0.]]) identity = identity.flatten() fc11 = slim.fully_connected(tf.reshape(conv10, [-1, shape]), 6, biases_initializer = tf.constant_initializer(identity), scope='fc11') return fc11
def conv_net_kelz(inputs): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = slim.conv2d( inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm) net = slim.conv2d( net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2') net = slim.dropout(net, 0.25, scope='dropout2') net = slim.conv2d( net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3') net = slim.dropout(net, 0.25, scope='dropout3') # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape(net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value), 'flatten4') net = slim.fully_connected(net, 512, scope='fc5') net = slim.dropout(net, 0.5, scope='dropout5') return net
def localization_VGG16(self,inputs): with tf.variable_scope('localization_network'): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.constant_initializer(0.0)): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') shape = int(np.prod(net.get_shape()[1:])) net = slim.fully_connected(tf.reshape(net, [-1, shape]), 4096, scope='fc6') net = slim.fully_connected(net, 1024, scope='fc7') identity = np.array([[1., 0., 0.], [0., 1., 0.]]) identity = identity.flatten() net = slim.fully_connected(net, 6, biases_initializer = tf.constant_initializer(identity) , scope='fc8') return net
def build_arch_baseline(input, is_train: bool, num_classes: int): bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=32, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1') tf.logging.info('output shape: {}'.format(output.get_shape())) with tf.variable_scope('relu_conv2') as scope: output = slim.conv2d(output, num_outputs=64, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2') tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.flatten(output) output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu) tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.dropout(output, 0.5, scope='dp') output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None) tf.logging.info('output shape: {}'.format(output.get_shape())) return output
def network_det(self,inputs,reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') shape = int(np.prod(pool5.get_shape()[1:])) fc6 = slim.fully_connected(tf.reshape(pool5, [-1, shape]), 4096, scope='fc6') fc_detection = slim.fully_connected(fc6, 512, scope='fc_det1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_det2', activation_fn = None) return out_detection
def make_tower(net): net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1') net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 500, scope='fc1') net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2') return net
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:0'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def row_column_max_pooling(bottom, prefix='', window=(7, 7)): column_mx = slim.max_pool2d(bottom, [window[0], 1], stride=[window[0], 1], scope=prefix + '_column_max') row_mx = slim.max_pool2d(bottom, [1, window[1]], stride=[1, window[1]], scope=prefix + '_row_max') column_mean = slim.avg_pool2d(column_mx, [1, window[1]], stride=[1, window[1]], scope=prefix + '_column_mean') row_mean = slim.avg_pool2d(row_mx, [window[0], 1], stride=[window[0], 1], scope=prefix + '_row_mean') return row_mean + column_mean
def _build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def construct_net(self,is_trained = True): with slim.arg_scope([slim.conv2d], padding='VALID', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.conv2d(net,16,[5,5],1,scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.conv2d(net,120,[5,5],1,scope='conv5') net = slim.flatten(net, scope='flat6') net = slim.fully_connected(net, 84, scope='fc7') net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8') digits = slim.fully_connected(net, 10, scope='fc9') return digits
def network(inputs): '''Define the network''' with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = tf.reshape(inputs,[-1,FLAGS.im_size ,FLAGS.im_size,3]) net = slim.conv2d(net, 32, [3,3], scope='conv1') net = slim.max_pool2d(net, [4,4], scope = 'conv1') net = slim.conv2d(net,128,[3,3], scope = 'conv2') net = slim.max_pool2d(net,[4,4], scope = 'pool2') net = slim.flatten(net) net = slim.fully_connected(net,64, scope = 'fc') net = slim.fully_connected(net, n_classes, activation_fn = None, scope = 'output') return net
def AddMaxPool(self, prev_layer, index): """Add a maxpool layer. Args: prev_layer: Input tensor. index: Position in model_str to start parsing Returns: Output tensor, end index in model_str. """ pattern = re.compile(R'(Mp)({\w+})?(\d+),(\d+)(?:,(\d+),(\d+))?') m = pattern.match(self.model_str, index) if m is None: return None, None name = self._GetLayerName(m.group(0), index, m.group(2)) height = int(m.group(3)) width = int(m.group(4)) y_stride = height if m.group(5) is None else m.group(5) x_stride = width if m.group(6) is None else m.group(6) self.reduction_factors[1] *= y_stride self.reduction_factors[2] *= x_stride return slim.max_pool2d( prev_layer, [height, width], [y_stride, x_stride], padding='SAME', scope=name), m.end()
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # scale (divide by MNIST std) x = x * 0.0125 with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1') model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2') model = slim.flatten(model) model = slim.fully_connected(model, 500, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2') return model
def _build_base(self): with tf.variable_scope(self._scope, self._scope): net = resnet_utils.conv2d_same(self._image, 64, 7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='pool1') return net
def build_feature_pyramid(self): ''' reference: https://github.com/CharlesShang/FastMaskRCNN build P2, P3, P4, P5, P6 :return: multi-scale feature map ''' feature_pyramid = {} with tf.variable_scope('feature_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)): feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'], kernel_size=[2, 2], stride=2, scope='build_P6') # P6 is down sample of P5 for layer in range(4, 1, -1): p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)] up_sample_shape = tf.shape(c) up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]], name='build_P%d/up_sample_nearest_neighbor' % layer) c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P%d/reduce_dimension' % layer) p = up_sample + c p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1, padding='SAME', scope='build_P%d/avoid_aliasing' % layer) feature_pyramid['P' + str(layer)] = p return feature_pyramid
def create_test_network(): """Convolutional neural network for test. Returns: name_to_node: Dict keyed by node name, each entry containing the node's NodeDef. """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch before first addition. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad') l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME') # First addition. l4 = nn.relu(l1 + l3, name='L4_relu') # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME') # Right branch after first addition. l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME') # Final addition. gen_math_ops.add(l5, l6, name='L7_add') name_to_node = graph_compute_order.parse_graph_nodes(g.as_graph_def()) return name_to_node
def create_test_network(): """Convolutional neural network for test. Returns: g: Tensorflow graph object (Graph proto). """ g = ops.Graph() with g.as_default(): # An input test image with unknown spatial resolution. x = array_ops.placeholder( dtypes.float32, (None, None, None, 1), name='input_image') # Left branch before first addition. l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') # Right branch before first addition. l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]], name='L2_pad') l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') l3 = slim.max_pool2d(l2, [3, 3], stride=2, scope='L3', padding='SAME') # First addition. l4 = nn.relu(l1 + l3, name='L4_relu') # Left branch after first addition. l5 = slim.conv2d(l4, 1, [1, 1], stride=2, scope='L5', padding='SAME') # Right branch after first addition. l6 = slim.conv2d(l4, 1, [3, 3], stride=2, scope='L6', padding='SAME') # Final addition. gen_math_ops.add(l5, l6, name='L7_add') return g
def build_backbones(self): inputs = self.inputs with slim.arg_scope([slim.conv2d, slim.fully_connected], padding='SAME', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), activation_fn=tf.nn.relu): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') self.vgg_head = net
def build_model(self, input_image, center_map, batch_size): self.batch_size = batch_size self.input_image = input_image self.center_map = center_map with tf.variable_scope('pooled_center_map'): # center map is a gaussion template which gather the respose self.center_map = slim.avg_pool2d(self.center_map, [9, 9], stride=8, padding='SAME', scope='center_map') with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer()): with tf.variable_scope('sub_stages'): net = slim.conv2d(input_image, 64, [3, 3], scope='sub_conv1') net = slim.conv2d(net, 64, [3, 3], scope='sub_conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool1') net = slim.conv2d(net, 128, [3, 3], scope='sub_conv3') net = slim.conv2d(net, 128, [3, 3], scope='sub_conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool2') net = slim.conv2d(net, 256, [3, 3], scope='sub_conv5') net = slim.conv2d(net, 256, [3, 3], scope='sub_conv6') net = slim.conv2d(net, 256, [3, 3], scope='sub_conv7') net = slim.conv2d(net, 256, [3, 3], scope='sub_conv8') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='sub_pool3') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv9') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv10') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv11') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv12') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv13') net = slim.conv2d(net, 512, [3, 3], scope='sub_conv14') self.sub_stage_img_feature = slim.conv2d(net, 128, [3, 3], scope='sub_stage_img_feature') with tf.variable_scope('stage_1'): conv1 = slim.conv2d(self.sub_stage_img_feature, 512, [1, 1], scope='conv1') self.stage_heatmap.append(slim.conv2d(conv1, self.joints, [1, 1], scope='stage_heatmap')) for stage in range(2, self.stages + 1): self._middle_conv(stage)
def build_graph(top_k): # with tf.device('/cpu:0'): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1') max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME') conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2') max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME') conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3') max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME') flatten = slim.flatten(max_pool_3) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def network(self,inputs,reuse=False): if reuse: tf.get_variable_scope().reuse_variables() with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer = tf.truncated_normal_initializer(0.0, 0.01) ): conv1 = slim.conv2d(inputs, 96, [11,11], 4, padding= 'VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3,3], 2, padding= 'VALID', scope='max1') conv1a = slim.conv2d(max1, 256, [4,4], 4, padding= 'VALID', scope='conv1a') conv2 = slim.conv2d(max1, 256, [5,5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3,3], 2, padding= 'VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3,3], 1, scope='conv3') conv3a = slim.conv2d(conv3, 256, [2,2], 2, padding= 'VALID', scope='conv3a') conv4 = slim.conv2d(conv3, 384, [3,3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3,3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3,3], 2, padding= 'VALID', scope='pool5') concat_feat = tf.concat([conv1a, conv3a, pool5],3) conv_all = slim.conv2d(concat_feat, 192, [1,1], 1, padding= 'VALID', scope='conv_all') shape = int(np.prod(conv_all.get_shape()[1:])) fc_full = slim.fully_connected(tf.reshape(tf.transpose(conv_all, [0,3,1,2]), [-1, shape]), 3072, scope='fc_full') fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection1') fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks1') fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility1') fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose1') fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender1') out_detection = slim.fully_connected(fc_detection, 2, scope='fc_detection2', activation_fn = None) out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='fc_landmarks2', activation_fn = None ) out_visibility = slim.fully_connected(fc_visibility, 21, scope='fc_visibility2', activation_fn = None) out_pose = slim.fully_connected(fc_pose, 3, scope='fc_pose2', activation_fn = None) out_gender = slim.fully_connected(fc_gender, 2, scope='fc_gender2', activation_fn = None) return [out_detection, out_landmarks, out_visibility, out_pose, out_gender]
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(1e-6)): model = slim.conv2d(x, 96, [11, 11], 4, padding='VALID', scope='conv1') model = slim.max_pool2d(model, [3, 3], 2, scope='pool1') model = slim.conv2d(model, 256, [5, 5], 1, scope='conv2') model = slim.max_pool2d(model, [3, 3], 2, scope='pool2') model = slim.conv2d(model, 384, [3, 3], 1, scope='conv3') model = slim.conv2d(model, 384, [3, 3], 1, scope='conv4') model = slim.conv2d(model, 256, [3, 3], 1, scope='conv5') model = slim.max_pool2d(model, [3, 3], 2, scope='pool5') model = slim.flatten(model) model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc2') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do2') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc3') return model
def conv_net(x,is_training): # "updates_collections": None is very import ,without will only get 0.10 batch_norm_params = {"is_training": is_training, "decay": 0.9, "updates_collections": None} #,'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.variable_scope("ConvNet",reuse=tf.AUTO_REUSE): x = tf.reshape(x, [-1, 28, 28, 1]) net = slim.conv2d(x, 6, [5,5], scope="conv_1") net = slim.max_pool2d(net, [2, 2],scope="pool_1") net = slim.conv2d(net, 12, [5,5], scope="conv_2") net = slim.max_pool2d(net, [2, 2], scope="pool_2") net = slim.flatten(net, scope="flatten") net = slim.fully_connected(net, 100, scope="fc") net = slim.dropout(net,is_training=is_training) net = slim.fully_connected(net, num_classes, scope="prob", activation_fn=None,normalizer_fn=None) return net
def sketch_a_net_slim(inputs): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.1), weights_regularizer=slim.l2_regularizer(0.0005), trainable=True): with slim.arg_scope([slim.conv2d], padding='VALID'): # x = tf.reshape(inputs, shape=[-1, 225, 225, 1]) conv1 = slim.conv2d(inputs, 64, [15, 15], 3, scope='conv1_s1') conv1 = slim.max_pool2d(conv1, [3, 3], scope='pool1') conv2 = slim.conv2d(conv1, 128, [5, 5], scope='conv2_s1') conv2 = slim.max_pool2d(conv2, [3, 3], scope='pool2') conv3 = slim.conv2d(conv2, 256, [3, 3], padding='SAME', scope='conv3_s1') conv4 = slim.conv2d(conv3, 256, [3, 3], padding='SAME', scope='conv4_s1') conv5 = slim.conv2d(conv4, 256, [3, 3], padding='SAME', scope='conv5_s1') conv5 = slim.max_pool2d(conv5, [3, 3], scope='pool3') att_f = slim.flatten(conv5) fc6 = slim.fully_connected(att_f, 512, scope='fc6_s1') fc7 = slim.fully_connected(fc6, 256, activation_fn=None, scope='fc7_sketch') return fc7
def overfeat(inputs): '''Define the network''' with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = tf.reshape(inputs,[-1, FLAGS.im_size,FLAGS.im_size,3]) net = slim.conv2d(net, 64, [11, 11], 4, padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') net = slim.conv2d(net, 256, [5, 5], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.conv2d(net,512, [3, 3], scope='conv3') net = slim.conv2d(net, 1024, [3, 3], scope='conv4') net = slim.conv2d(net, 1024, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], 2, scope='pool5') net = slim.flatten(net) net = slim.fully_connected(net,128, scope = 'fc') net = slim.fully_connected(net,n_classes, activation_fn = None, scope = 'output') return net
def model(x_image): with ExitStack() as stack: c1 = stack.enter_context(slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( stddev=0.1), biases_initializer=tf.constant_initializer(0.1))) c2 = stack.enter_context(slim.arg_scope([slim.max_pool2d], padding="SAME")) h_conv1 = slim.conv2d(x_image, 32, [5, 5]) h_pool1 = slim.max_pool2d(h_conv1, [2, 2]) h_conv2 = slim.conv2d(h_pool1, 64, [5, 5]) h_pool2 = slim.max_pool2d(h_conv2, [2, 2]) h_pool2_flat = slim.flatten(h_pool2) h_fc1 = slim.fully_connected(h_pool2_flat, 1024) y_conv = slim.fully_connected(h_fc1, 10, activation_fn=None) return y_conv
def inference(inputs): x = tf.reshape(inputs,[-1,28,28,1]) conv_1 = tf.nn.relu(slim.conv2d(x,32,[3,3])) #28 * 28 * 32 pool_1 = slim.max_pool2d(conv_1,[2,2]) # 14 * 14 * 32 block_1 = res_identity(pool_1,32,[3,3],'layer_2') block_2 = res_change(block_1,64,[3,3],'layer_3') block_3 = res_identity(block_2,64,[3,3],'layer_4') block_4 = res_change(block_3,32,[3,3],'layer_5') net_flatten = slim.flatten(block_4,scope='flatten') fc_1 = slim.fully_connected(slim.dropout(net_flatten,0.8),200,activation_fn=tf.nn.tanh,scope='fc_1') output = slim.fully_connected(slim.dropout(fc_1,0.8),10,activation_fn=None,scope='output_layer') return output
def _image_to_head(self, is_training, reuse=False): with tf.variable_scope(self._scope, self._scope, reuse=reuse): net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net return net
def _image_to_head(self, is_training, reuse=None): with slim.arg_scope(self._arg_scope(is_training, reuse)): net = slim.conv2d(self._image, 96, [3, 3], stride=1, scope='conv1') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool1') net = self.fire_module(net, 16, 64, scope='fire2') net = self.fire_module(net, 16, 64, scope='fire3') net = self.fire_module(net, 32, 128, scope='fire4') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool4') net = self.fire_module(net, 32, 128, scope='fire5') net = self.fire_module(net, 48, 192, scope='fire6') net = self.fire_module(net, 48, 192, scope='fire7') net = self.fire_module(net, 64, 256, scope='fire8') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool8', padding='SAME') net = self.fire_module(net, 64, 256, scope='fire9') net = slim.max_pool2d(net, [2, 2], stride=2, scope='maxpool9', padding='SAME') net = self.fire_module(net, 64, 512, scope='fire10') self._act_summaries.append(net) self._layers['head'] = net return net
def pool(input_, ks=2, s=1, name='max_pool'): with tf.variable_scope(name): return slim.max_pool2d(input_, ks, s)
def maxpool2x2(input, name): output = slim.max_pool2d(input, kernel_size=[2, 2], stride=2, scope=name) if PRINT_LAYER_LOG: print(name, output.get_shape()) return output
def inference(input_tensor, regularizer=None): with slim.arg_scope([slim.conv2d, slim.max_pool2d], stride=1, padding='SAME'): with tf.variable_scope("layer1-initconv"): data = slim.conv2d(input_tensor, CONV_DEEP, [7, 7]) data = slim.max_pool2d(data, [2, 2], stride=2) with tf.variable_scope("resnet_layer"): data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP, layer=6, half=False, name="layer4-9-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 2, layer=8, half=True, name="layer10-15-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 4, layer=12, half=True, name="layer16-27-conv") data = res_block(input_tensor=data, kshape=[CONV_SIZE, CONV_SIZE], deph=CONV_DEEP * 8, layer=6, half=True, name="layer28-33-conv") data = slim.avg_pool2d(data, [2, 2], stride=2) #得到输出信息的维度,用于全连接层的输入 data_shape = data.get_shape().as_list() nodes = data_shape[1] * data_shape[2] * data_shape[3] reshaped = tf.reshape(data, [data_shape[0], nodes]) #最后全连接层 with tf.variable_scope('layer34-fc'): fc_weights = tf.get_variable( "weight", [nodes, NUM_LABELS], initializer=tf.truncated_normal_initializer( stddev=0.1)) # if regularizer != None: # tf.add_to_collection('losses', regularizer(fc_weights)) fc_biases = tf.get_variable( "bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1)) fc = tf.nn.relu( tf.matmul(reshaped, fc_weights) + fc_biases) # if train: # fc = tf.nn.dropout(fc, 0.5) # return fc return fc
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) end_points['Mixed_5b'] = net # 35 x 35 x 320 net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 自己计算是# 17 x 17 x 1088 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) #8 x 8 x 2080 自己计算是# 17 x 17 x 1088 with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def vgg_19(inputs, num_classes=1000, is_training=False, dropout_keep_prob=0.5, spatial_squeeze=True, scope='vgg_19', reuse=False, fc_conv_padding='VALID'): """Oxford Net VGG 19-Layers version E Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. Returns: the last op containing the log predictions and end_points dict. """ with tf.variable_scope(scope, 'vgg_19', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, 3, scope='conv1', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, 3, scope='conv2', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 4, slim.conv2d, 256, 3, scope='conv3', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv4', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 4, slim.conv2d, 512, 3, scope='conv5', reuse=reuse) net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) return net, end_points
def sc(inputs, input_shape=(8, 8), batch_size=1, modify=[]): activations = OrderedDict() padding = 'SAME' initializer = tf.truncated_normal_initializer(stddev=0.01) regularizer = slim.l2_regularizer(0.0005) modifys = {} name = 'conv1' net = slim.conv2d(inputs, 3, [3, 3], padding=padding, weights_initializer=initializer, weights_regularizer=regularizer, scope=name) c1 = tf.get_variable('scale1', shape=(3), initializer=initializer, regularizer=regularizer) c1 = c1 + 10 net = tf.multiply(net, c1) activations[name] = net if name in modify: modifys[name] = tf.placeholder(tf.float32, shape=(batch_size, None, None, None)) net = tf.multiply(net, modifys[name]) net = slim.max_pool2d(net, [2, 2], scope='pool1') name = 'conv2' net = slim.conv2d(net, 2, [3, 3], padding=padding, weights_initializer=initializer, weights_regularizer=regularizer, scope=name) c1 = tf.get_variable('scale2', shape=(2), initializer=initializer, regularizer=regularizer) c1 = c1 + 10 net = tf.multiply(net, c1) activations[name] = net if name in modify: modifys[name] = tf.placeholder(tf.float32, shape=(batch_size, None, None, None)) net = tf.multiply(net, modifys[name]) net = slim.max_pool2d(net, [2, 2], scope='pool2') name = 'conv3' net = slim.conv2d(net, 2, [3, 3], padding=padding, weights_initializer=initializer, weights_regularizer=regularizer, scope=name) c1 = tf.get_variable('scale3', shape=(2), initializer=initializer, regularizer=regularizer) c1 = c1 + 10 net = tf.multiply(net, c1) activations[name] = net if name in modify: modifys[name] = tf.placeholder(tf.float32, shape=(batch_size, None, None, None)) net = tf.multiply(net, modifys[name]) net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.conv2d(net, 2, [3, 3], padding=padding, weights_initializer=initializer, weights_regularizer=regularizer, activation_fn=None, scope='conv4') c1 = tf.get_variable('scale4', shape=(2), initializer=initializer, regularizer=regularizer) c1 = c1 + 10 net = tf.multiply(net, c1) activations['conv4'] = net print net logits = slim.softmax(net) if len(modify) != 0: return logits, net, activations, modifys else: return logits, net, activations
def network(self, _input): conv1 = slim.conv2d(_input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_1') conv1 = slim.conv2d(conv1, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_2') pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME') conv2 = slim.conv2d(pool1, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_1') conv2 = slim.conv2d(conv2, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_2') pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME') conv3 = slim.conv2d(pool2, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_1') conv3 = slim.conv2d(conv3, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_2') pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME') conv4 = slim.conv2d(pool3, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_1') conv4 = slim.conv2d(conv4, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_2') pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME') conv5 = slim.conv2d(pool4, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_1') conv5 = slim.conv2d(conv5, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_2') up6 = upsample_and_concat(conv5, conv4, 256, 512) conv6 = slim.conv2d(up6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_1') conv6 = slim.conv2d(conv6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_2') up7 = upsample_and_concat(conv6, conv3, 128, 256) conv7 = slim.conv2d(up7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_1') conv7 = slim.conv2d(conv7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_2') up8 = upsample_and_concat(conv7, conv2, 64, 128) conv8 = slim.conv2d(up8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_1') conv8 = slim.conv2d(conv8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_2') up9 = upsample_and_concat(conv8, conv1, 32, 64) conv9 = slim.conv2d(up9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1') conv9 = slim.conv2d(conv9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_2') conv10 = slim.conv2d(conv9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10') out = tf.depth_to_space(conv10, 2) return out
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): # define common param with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print(inputs.get_shape()) net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1') print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') print(net.get_shape()) net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') print(net.get_shape()) # batch*H*W*2 conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) # conv4_1 = slim.conv2d(net,num_outputs=1,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.sigmoid) print(conv4_1.get_shape()) # batch*H*W*4 bbox_pred = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None) print(bbox_pred.get_shape()) # batch*H*W*10 landmark_pred = slim.conv2d(net, num_outputs=10, kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) print(landmark_pred.get_shape()) # cls_prob_original = conv4_1 # bbox_pred_original = bbox_pred if training: # batch*2 cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) # batch bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) # batch*10 landmark_pred = tf.squeeze(landmark_pred, [1, 2], name="landmark_pred") landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy # test else: # when test,batch_size = 1 cls_pro_test = tf.squeeze(conv4_1, axis=0) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) return cls_pro_test, bbox_pred_test, landmark_pred_test
def vgg_16(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.5, spatial_squeeze=True, reuse=None, scope='vgg_16', fc_conv_padding='VALID', global_pool=False): """Oxford Net VGG 16-Layers version D Example. Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. If 0 or None is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional scope for the variables. fc_conv_padding: the type of padding to use for the fully connected layer that is implemented as a convolutional layer. Use 'SAME' padding if you are applying the network in a fully convolutional manner and want to get a prediction map downsampled by a factor of 32 as an output. Otherwise, the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. global_pool: Optional boolean flag. If True, the input to the classification layer is avgpooled to size 1x1, for any input size. (This is not part of the original VGG architecture.) Returns: net: the output of the logits layer (if num_classes is a non-zero integer), or the input to the logits layer (if num_classes is 0 or None). end_points: a dict of tensors with intermediate activations. """ print('using vgg 16 network') with tf.variable_scope(scope, 'vgg_16', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') # Convert end_points_collection into a end_point dict. end_points = slim.utils.convert_collection_to_dict( end_points_collection) if global_pool: net = tf.reduce_mean(net, [1, 2], keep_dims=True, name='global_pool') end_points['global_pool'] = net if num_classes: net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') if spatial_squeeze: # tf.squeeze 从tensor中删除所有大小是1的维度 net = tf.squeeze(net, [1, 2], name='fc8/squeezed') end_points[sc.name + '/fc8'] = net return net, end_points
def __init__(self, input_tensor, is_training): rgb = input_tensor - [123.68, 116.779, 103.939] # [ 123.68000031 116.77999878 103.94000244] # Build convolutional layers only batch_norm_params = { 'is_training': is_training, 'decay': 0.9, 'updates_collections': None } with slim.arg_scope([slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): self.conv1_1 = slim.conv2d(rgb, 64, [3, 3], scope='vgg_16/conv1/conv1_1') self.conv1_2 = slim.conv2d(self.conv1_1, 64, [3, 3], scope='vgg_16/conv1/conv1_2') self.pool1 = slim.max_pool2d(self.conv1_2, [2, 2], scope='pool1') self.conv2_1 = slim.conv2d(self.pool1, 128, [3, 3], scope='vgg_16/conv2/conv2_1') self.conv2_2 = slim.conv2d(self.conv2_1, 128, [3, 3], scope='vgg_16/conv2/conv2_2') self.pool2 = slim.max_pool2d(self.conv2_2, [2, 2], scope='pool2') self.conv3_1 = slim.conv2d(self.pool2, 256, [3, 3], scope='vgg_16/conv3/conv3_1') self.conv3_2 = slim.conv2d(self.conv3_1, 256, [3, 3], scope='vgg_16/conv3/conv3_2') self.conv3_3 = slim.conv2d(self.conv3_2, 256, [3, 3], scope='vgg_16/conv3/conv3_3') self.pool3 = slim.max_pool2d(self.conv3_3, [2, 2], scope='pool3') self.conv4_1 = slim.conv2d(self.pool3, 512, [3, 3], scope='vgg_16/conv4/conv4_1') self.conv4_2 = slim.conv2d(self.conv4_1, 512, [3, 3], scope='vgg_16/conv4/conv4_2') self.conv4_3 = slim.conv2d(self.conv4_2, 512, [3, 3], scope='vgg_16/conv4/conv4_3') self.pool4 = slim.max_pool2d(self.conv4_3, [2, 2], scope='pool4') self.conv5_1 = slim.conv2d(self.pool4, 512, [3, 3], scope='vgg_16/conv5/conv5_1') self.conv5_2 = slim.conv2d(self.conv5_1, 512, [3, 3], scope='vgg_16/conv5/conv5_2') self.conv5_3 = slim.conv2d(self.conv5_2, 512, [3, 3], scope='vgg_16/conv5/conv5_3') self.pool5 = slim.max_pool2d(self.conv5_3, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. self.pool6 = slim.conv2d(self.pool5, 4096, [7, 7], scope='vgg_16/fc6') # pool6_drop = tf.nn.dropout(self.pool6, keep_prob) # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, # scope='dropout6') self.pool7 = slim.conv2d(self.pool6, 4096, [1, 1], scope='vgg_16/fc7')
def create_network(images, num_classes=None, add_logits=True, reuse=None, create_summaries=True, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = images network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_outputs=128) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1", padding="SAME") network = residual_net.residual_block( network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_net.residual_block( network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network # Features in rows, normalize axis 1. features = tf.nn.l2_normalize(features, dim=1) if add_logits: with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, int(num_classes)), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable("scale", (), tf.float32, initializer=tf.constant_initializer( 0., tf.float32), regularizer=slim.l2_regularizer(1e-1)) if create_summaries: tf.summary.scalar("scale", scale) scale = tf.nn.softplus(scale) # Mean vectors in colums, normalize axis 0. weights_normed = tf.nn.l2_normalize(weights, dim=0) logits = scale * tf.matmul(features, weights_normed) else: logits = None return features, logits
def densenet(inputs, num_classes=1000, reduction=None, growth_rate=None, num_filters=None, num_layers=None, dropout_rate=None, data_format='NHWC', is_training=True, reuse=None, scope=None): assert reduction is not None assert growth_rate is not None assert num_filters is not None assert num_layers is not None compression = 1.0 - reduction num_dense_blocks = len(num_layers) if data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) with tf.variable_scope(scope, 'densenetxxx', [inputs, num_classes], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training), \ slim.arg_scope([slim.conv2d, _conv, _conv_block, _dense_block, _transition_block], outputs_collections=end_points_collection), \ slim.arg_scope([_conv], dropout_rate=dropout_rate), \ slim.arg_scope([slim.batch_norm], fused=False): net = inputs # initial convolution net = slim.conv2d(net, num_filters, 7, stride=2, scope='conv1') net = slim.batch_norm(net) net = tf.nn.relu(net) net = slim.max_pool2d(net, 3, stride=2, padding='SAME') # blocks for i in range(num_dense_blocks - 1): # dense blocks net, num_filters = _dense_block(net, num_layers[i], num_filters, growth_rate, scope='dense_block' + str(i + 1)) # Add transition_block net, num_filters = _transition_block(net, num_filters, compression=compression, scope='transition_block' + str(i + 1)) net, num_filters = _dense_block(net, num_layers[-1], num_filters, growth_rate, scope='dense_block' + str(num_dense_blocks)) # final blocks with tf.variable_scope('final_block', [inputs]): net = slim.batch_norm(net) net = tf.nn.relu(net) net = _global_avg_pool2d(net, scope='global_avg_pool') net = slim.conv2d(net, num_classes, 1, biases_initializer=tf.zeros_initializer(), scope='logits') end_points = slim.utils.convert_collection_to_dict( end_points_collection) if num_classes is not None: end_points['predictions'] = slim.softmax(net, scope='predictions') return net, end_points
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 # net = slim.conv2d(net, 32, 3, padding='VALID', # scope='Conv2d_2a_3x3') # end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 32, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 64, 1, padding='VALID', scope='Conv2d_3b_1x1') net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3b_3x3') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 # net = slim.conv2d(net, 192, 3, padding='VALID', # scope='Conv2d_4a_3x3') # end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 32, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 1, block35, scale=0.27) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 96, 96, 128, 192) end_points['Mixed_6a'] = net # # 10 x Inception-Resnet-B # net = slim.repeat(net, 1, block17, scale=0.10) # end_points['Mixed_6b'] = net # # # Reduction-B # with tf.variable_scope('Mixed_7a'): # net = reduction_b(net) # end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 1, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.max_pool2d( net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') ### 修改成max pool net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def inception_v3_base(inputs, final_endpoint='Mixed_7c', min_depth=16, depth_multiplier=1.0, scope=None): """Inception model from http://arxiv.org/abs/1512.00567. Constructs an Inception v3 network from inputs to the given final endpoint. This method can construct the network up to the final inception block Mixed_7c. Note that the names of the layers in the paper do not correspond to the names of the endpoints registered by this function although they build the same network. Here is a mapping from the old_names to the new names: Old name | New name ======================================= conv0 | Conv2d_1a_3x3 conv1 | Conv2d_2a_3x3 conv2 | Conv2d_2b_3x3 pool1 | MaxPool_3a_3x3 conv3 | Conv2d_3b_1x1 conv4 | Conv2d_4a_3x3 pool2 | MaxPool_5a_3x3 mixed_35x35x256a | Mixed_5b mixed_35x35x288a | Mixed_5c mixed_35x35x288b | Mixed_5d mixed_17x17x768a | Mixed_6a mixed_17x17x768b | Mixed_6b mixed_17x17x768c | Mixed_6c mixed_17x17x768d | Mixed_6d mixed_17x17x768e | Mixed_6e mixed_8x8x1280a | Mixed_7a mixed_8x8x2048a | Mixed_7b mixed_8x8x2048b | Mixed_7c Args: inputs: a tensor of size [batch_size, height, width, channels]. final_endpoint: specifies the endpoint to construct the network up to. It can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. min_depth: Minimum depth value (number of channels) for all convolution ops. Enforced when depth_multiplier < 1, and not an active constraint when depth_multiplier >= 1. depth_multiplier: Float multiplier for the depth (number of channels) for all convolution ops. The value must be greater than zero. Typical usage will be to set this value in (0, 1) to reduce the number of parameters or computation cost of the model. scope: Optional variable_scope. Returns: tensor_out: output tensor corresponding to the final_endpoint. end_points: a set of activations for external use, for example summaries or losses. Raises: ValueError: if final_endpoint is not set to one of the predefined values, or depth_multiplier <= 0 """ # end_points will collect relevant activations for external use, for example # summaries or losses. end_points = {} if depth_multiplier <= 0: raise ValueError('depth_multiplier is not greater than zero.') depth = lambda d: max(int(d * depth_multiplier), min_depth) with tf.variable_scope(scope, 'InceptionV3', [inputs]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='VALID'): # 299 x 299 x 3 end_point = 'Conv2d_1a_3x3' net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 149 x 149 x 32 end_point = 'Conv2d_2a_3x3' net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 32 end_point = 'Conv2d_2b_3x3' net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 147 x 147 x 64 end_point = 'MaxPool_3a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 73 x 73 x 64 end_point = 'Conv2d_3b_1x1' net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # test # net2 = slim.flatten(net) # net2 = slim.fully_connected(net2, 100) # 73 x 73 x 80. end_point = 'Conv2d_4a_3x3' net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 71 x 71 x 192. end_point = 'MaxPool_5a_3x3' net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # 35 x 35 x 192. # Inception blocks with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # mixed: 35 x 35 x 256. end_point = 'Mixed_5b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_1: 35 x 35 x 288. end_point = 'Mixed_5c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv_1_0c_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_2: 35 x 35 x 288. end_point = 'Mixed_5d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_3: 17 x 17 x 768. end_point = 'Mixed_6a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_1x1') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed4: 17 x 17 x 768. end_point = 'Mixed_6b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_5: 17 x 17 x 768. end_point = 'Mixed_6c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_6: 17 x 17 x 768. end_point = 'Mixed_6d' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_7: 17 x 17 x 768. end_point = 'Mixed_6e' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0b_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0c_1x7') branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], scope='Conv2d_0d_7x1') branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], scope='Conv2d_0e_1x7') with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_8: 8 x 8 x 1280. end_point = 'Mixed_7a' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], scope='Conv2d_0b_1x7') branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], scope='Conv2d_0c_7x1') branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_9: 8 x 8 x 2048. end_point = 'Mixed_7b' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points # mixed_10: 8 x 8 x 2048. end_point = 'Mixed_7c' with tf.variable_scope(end_point): with tf.variable_scope('Branch_0'): branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') with tf.variable_scope('Branch_1'): branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') branch_1 = tf.concat(axis=3, values=[ slim.conv2d( branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1') ]) with tf.variable_scope('Branch_2'): branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') branch_2 = slim.conv2d(branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') branch_2 = tf.concat(axis=3, values=[ slim.conv2d( branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1') ]) with tf.variable_scope('Branch_3'): branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') net = tf.concat( axis=3, values=[branch_0, branch_1, branch_2, branch_3]) end_points[end_point] = net if end_point == final_endpoint: return net, end_points raise ValueError('Unknown final endpoint %s' % final_endpoint)
def build(input_tensor, num_class, trainable, debug): outputs_collections = "neunet" #Take in input data as 'net' object #Expect an input of [BatchSize, Channels, HSize, VSize] net = input_tensor if debug: print 'Input Tensor: ', input_tensor.shape #Define the number of filters to convolve filters = config.NETWORK['Filters'] #Define Kernel size and stride kernsize = config.NETWORK['Kernal'] strider = config.NETWORK['Stride'] poolstride = config.NETWORK['PoolStride'] #Define Fully Connected Size fcsize = config.NETWORK['FCSize'] #Define Number of Conv and FC Layers clnumb = config.NETWORK['CLNumb'] fcnumb = config.NETWORK['FCNumb'] cldropout = config.NETWORK['CLDropout'] fcdropout = config.NETWORK['FCDropout'] idropout = config.NETWORK['IDropout'] #Loop to set hidden layers in the network with tf.variable_scope('Network'): #net = tf.layers.dropout(net, idropout) with tf.variable_scope('Deep_Conv_Layers'): for step in xrange(clnumb): #Convolve the network via slim net = slim.conv2d( inputs=net, # input tensor num_outputs=filters, # number of filters/feature maps kernel_size=[3, 3], # kernel size stride=2, # stride size trainable=trainable, # train or inference activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='Conv_Layer_%d' % step) net = slim.conv2d( inputs=net, # input tensor num_outputs=filters, # number of filters/feature maps kernel_size=[3, 3], # kernel size stride=1, # stride size trainable=trainable, # train or inference activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer(), biases_initializer=tf.zeros_initializer(), scope='Conv_Layer2_%d' % step) #Max Pool the network if (step + 1) < clnumb: net = slim.max_pool2d( inputs=net, # input tensor kernel_size=[2, 2], # kernel size stride=2, # stride size scope='Pool_Layer_%d' % step) else: net = tf.layers.average_pooling2d( inputs=net, pool_size=[ net.get_shape()[-2].value, net.get_shape()[-3].value ], strides=1, padding='valid', name='conv%d_pool' % step) # net = tf.layers.dropout(net, cldropout) #Increase filters for higher order features filters *= 2 if debug: print 'After Convolutional Layer ', step, ' shape ', net.shape with tf.variable_scope('Deep_FC_Layers'): #Flatten the network to 1D net = slim.flatten(net, scope='Flatten_Step') if debug: print 'After flattening', net.shape #Set through a fully connected layer for step in xrange(fcnumb): net = slim.fully_connected(net, fcsize, scope='FC_Layer_%d' % step) if debug: print 'After Fully Connected Layer %d' % step, net.shape if trainable: net = slim.dropout(net, keep_prob=fcdropout, is_training=trainable, scope='fc%d_dropout' % step) #Set through a final fc layer net = slim.fully_connected(net, int(num_class), scope='FC_Final') if debug: print 'After Fully Connected Layer Final', net.shape end_points = slim.utils.convert_collection_to_dict(outputs_collections) #Send back the network return net, end_points
def define(inputs, reuse, weightDecay, scope='InceptionResnetV2', trainFrom=None, freezeBatchNorm=False): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ with tf.name_scope('preprocess'): #BGR -> RGB inputs = tf.reverse(inputs, axis=[3]) #Normalize inputs = 2.0 * (inputs / 255.0 - 0.5) end_points = {} scopes = [] trainBatchNormScope = slim.arg_scope([slim.batch_norm], is_training=True) weightDecayScope = slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weightDecay), biases_regularizer=slim.l2_regularizer(weightDecay)) trainBnEntered = False currBlock = "" def beginBlock(name): nonlocal trainBnEntered nonlocal currBlock currBlock = name if (trainFrom is not None) and (not trainBnEntered) and ( trainFrom == name or trainFrom == "start"): print("Enabling training on " + trainFrom) if not freezeBatchNorm: trainBatchNormScope.__enter__() weightDecayScope.__enter__() trainBnEntered = True def endBlock(net, scope=True, name=None): if name is None: name = currBlock end_points[name] = net if scope: scopes.append(name) def endAll(): if trainBnEntered: if not freezeBatchNorm: trainBatchNormScope.__exit__(None, None, None) weightDecayScope.__exit__(None, None, None) with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm], is_training=False): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 beginBlock('Conv2d_1a_3x3') net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') endBlock(net) # 147 x 147 x 32 beginBlock('Conv2d_2a_3x3') net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') endBlock(net) # 147 x 147 x 64 beginBlock('Conv2d_2b_3x3') net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') endBlock(net) # 73 x 73 x 64 beginBlock('MaxPool_3a_3x3') net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') endBlock(net) # 73 x 73 x 80 beginBlock('Conv2d_3b_1x1') net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') endBlock(net) # 71 x 71 x 192 beginBlock('Conv2d_4a_3x3') net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') endBlock(net) # 35 x 35 x 192 beginBlock('MaxPool_5a_3x3') net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') endBlock(net) # 35 x 35 x 320 beginBlock('Mixed_5b') with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d( net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ], 3) endBlock(net) beginBlock('Repeat') net = slim.repeat(net, 10, InceptionResnetV2.block35, scale=0.17) endBlock(net) # 17 x 17 x 1024 beginBlock('Mixed_6a') with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d( net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat( [tower_conv, tower_conv1_2, tower_pool], 3) endBlock(net) beginBlock('Repeat_1') net = slim.repeat(net, 20, InceptionResnetV2.block17, scale=0.10) endBlock(net) endBlock(net, scope=False, name='aux') beginBlock('Mixed_7a') with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d( net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ], 3) endBlock(net) beginBlock('Repeat_2') net = slim.repeat(net, 9, InceptionResnetV2.block8, scale=0.20) endBlock(net) beginBlock('Block8') net = InceptionResnetV2.block8(net, activation_fn=None) endBlock(net) beginBlock('Conv2d_7b_1x1') net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') endBlock(net) endBlock(net, scope=False, name='PrePool') endAll() return end_points, scope, scopes
def ChainedResidualPooling(inputs,n_i=256): net_relu=tf.nn.relu(inputs) net=slim.max_pool2d(net_relu, [5, 5],stride=1,padding='SAME') net=slim.conv2d(net,n_i,3) return tf.add(net,net_relu)
def build_sampler(self, max_len=20): features = self.features # max_len = self.n_time_step +1 # batch normalize feature vectors with tf.variable_scope('model') as scope: x_image = tf.reshape( features, [-1, 128, 128, 1]) # print x_image.get_shape() h_conv1 = tf.nn.relu( slim.conv2d(x_image, 32, [3, 3], scope='conv1')) # print h_conv1.get_shape() h_conv2 = tf.nn.relu( slim.conv2d(h_conv1, 64, [3, 3], scope='conv2')) # print h_conv2.get_shape() h_pool1 = slim.max_pool2d( h_conv2, [2, 2], scope='pool1') # print h_pool1.get_shape() h_conv3 = tf.nn.relu( slim.conv2d(h_pool1, 128, [3, 3], scope='conv3')) # print h_conv3.get_shape() h_conv4 = tf.nn.relu( slim.conv2d(h_conv3, 128, [2, 2], scope='conv4')) # print h_conv4.get_shape() h_pool2 = slim.max_pool2d( h_conv4, [2, 2], scope='pool2') # print h_pool2.get_shape() h_conv5 = tf.nn.relu( slim.conv2d(h_pool2, 256, [3, 3], scope='conv5')) # print h_conv5.get_shape() h_conv6 = tf.nn.relu( slim.conv2d(h_conv5, 512, [3, 3], scope='conv6')) # print h_conv6.get_shape() h_pool3 = slim.max_pool2d( h_conv6, [2, 2], scope='pool3') # print h_pool3.get_shape() resh = tf.reshape(h_pool3, [-1, h_pool3.get_shape().as_list()[1]**2, 512]) features = tf.contrib.layers.batch_norm( inputs=resh, decay=0.95, center=True, scale=True, is_training=('test' == 'train'), updates_collections=None, scope=('conv_features' + 'batch_norm')) c, h = self._get_initial_lstm(features=features) features_proj = self._project_features(features=features) sampled_word_list = [] alpha_list = [] beta_list = [] lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.H, reuse=True) for t in range(max_len): if t == 0: x = tf.fill([tf.shape(features)[0], 4], tf.to_float(0)) else: x = sampled_word x = tf.to_float(x) x = slim.fully_connected(x, 512, scope='fc') context, alpha = self._attention_layer(features, features_proj, h, reuse=(t != 0)) alpha_list.append(alpha) if self.selector: context, beta = self._selector(context, h, reuse=(t != 0)) beta_list.append(beta) with tf.variable_scope('lstm', reuse=(t != 0)): _, (c, h) = lstm_cell(inputs=tf.concat([x, context], 1), state=[c, h]) logits = self._decode_lstm(x, h, context, reuse=(t != 0)) sampled_word = logits sampled_word_list.append(sampled_word) alphas = tf.transpose(tf.stack(alpha_list), (1, 0, 2)) # (N, T, L) betas = tf.transpose(tf.squeeze(beta_list), (1, 0)) # (N, T) # generated_boxes = tf.transpose(tf.stack(sampled_word_list), (1, 0)) # (N, max_len) generated_boxes = tf.transpose(sampled_word_list, (1, 0, 2)) # (N, max_len) return alphas, betas, generated_boxes
def subsample(inputs,factor,scope=None): if factor==1: return inputs else: return slim.max_pool2d(inputs,[1,1],stride = factor, scope = scope)
def resnet_base(self, inputs, is_training): if self.scope_name == 'resnet_v1_50': middle_num_units = 6 elif self.scope_name == 'resnet_v1_101': middle_num_units = 23 else: raise NotImplementedError( 'We only support resnet_v1_50 or resnet_v1_101. Check your network name....' ) blocks = [ resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), # use stride 1 for the last conv4 layer. resnet_v1_block('block3', base_depth=256, num_units=middle_num_units, stride=1) ] # when use fpn . stride list is [1, 2, 2] with slim.arg_scope(self.resnet_arg_scope(is_training=False)): with tf.variable_scope(self.scope_name, 'resnet_v1_101'): # Do the first few layers manually, because 'SAME' padding can behave inconsistently # for images of different sizes: sometimes 0, sometimes 1 net = resnet_utils.conv2d_same(inputs, num_outputs=64, kernel_size=7, stride=2, scope='conv1') net = tf.pad(net, [[0, 0], [1, 1], [1, 1], [0, 0]]) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, padding='VALID', scope='pool1') # generate freeze flag block_freeze = [False ] * self.fixed_block + (4 - self.fixed_block) * [True] with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[0]))): net, _ = resnet_v1.resnet_v1(net, blocks[0:1], global_pool=False, include_root_block=False, scope=self.scope_name) with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[1]))): net, _ = resnet_v1.resnet_v1(net, blocks[1:2], global_pool=False, include_root_block=False, scope=self.scope_name) # add_heatmap(C3, name='Layer/C3') # C3 = tf.Print(C3, [tf.shape(C3)], summarize=10, message='C3_shape') with slim.arg_scope( self.resnet_arg_scope( is_training=(is_training and block_freeze[2]))): net, _ = resnet_v1.resnet_v1(net, blocks[2:3], global_pool=False, include_root_block=False, scope=self.scope_name) return net
def unet(cls, image, keep_prob, phase_train, output_channel, num_layers, is_debug=False): net = {} batch_size = tf.shape(image)[0] current = image net['image'] = current for index_module in range(num_layers): # Check type of module is_encoder = index_module < num_layers // 2 is_decoder = index_module > num_layers // 2 is_classifier = index_module == num_layers // 2 # Set number of input and output channels in_ch = current.get_shape()[-1] mod_output = 'mod%d_out' if is_encoder: current = cls.unet_encode(current, keep_prob, phase_train, index_module) name = mod_output % index_module net[name] = current current = slim.max_pool2d(current, [2, 2], stride=2, padding='SAME') if is_classifier: current = cls.unet_encode(current, keep_prob, phase_train, index_module) name = mod_output % index_module net[name] = current current = cls.upconv(current, index_module) if is_decoder: fuse_pool = mod_output % (num_layers - 1 - index_module) # print(index_module, num_layers-1-index_module) # print(net[fuse_pool].get_shape()) # print(current.get_shape()) current = tf.concat([current, net[fuse_pool]], axis=3, name="fuse_%d" % index_module) current = cls.unet_decode(current, keep_prob, phase_train, index_module) name = mod_output % index_module net[name] = current if index_module != num_layers - 1: current = cls.upconv(current, index_module) if is_debug: print(name) print(net[name].get_shape()) utils.add_activation_summary(current) # conv1x1 current = slim.conv2d(current, output_channel, 1) name = 'segment' net[name] = current if is_debug: print(name) print(net[name].get_shape()) print('unet complete') return net
def Mynet(x, keep_prob): def inception_module(x, in_f, f_1, f_2_1, f_2_2, f_3_1, f_3_2, f_4_2): x1 = slim.conv2d(x, f_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x2_1 = slim.conv2d(x, f_2_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x2_2 = slim.conv2d(x2_1, f_2_1, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) x3_1 = slim.conv2d(x, f_3_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x3_2 = slim.conv2d(x3_1, f_3_2, [5, 5], stride=1, padding='SAME', activation_fn=tf.nn.relu) x4_1 = slim.max_pool2d(x, [3, 3], stride=1, padding='SAME') x4_2 = slim.conv2d(x4_1, f_4_2, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = tf.concat([x1, x2_2, x3_2, x4_2], axis=-1) return x x = slim.conv2d(x, 64, [7, 7], stride=2, padding="VALID", activation_fn=tf.nn.relu) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') x = tf.nn.local_response_normalization(x) x = slim.conv2d(x, 64, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = slim.conv2d(x, 192, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = tf.nn.local_response_normalization(x) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') # inception 3a, 3b x = inception_module(x, 194, 64, 96, 128, 16, 32, 32) x = inception_module(x, 256, 128, 128, 192, 32, 96, 64) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') # inception 4a x = inception_module(x, 480, 192, 96, 208, 16, 48, 64) # auxiliary loss1 x_aux1 = slim.avg_pool2d(x, 5, padding='SAME', stride=1) x_aux1 = slim.conv2d(x_aux1, 128, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) mb, h, w, c = x_aux1.get_shape().as_list() x_aux1 = tf.reshape(x_aux1, [-1, h * w * c]) x_aux1 = slim.fully_connected(x_aux1, 1024, activation_fn=tf.nn.relu) x_aux1 = slim.dropout(x_aux1, keep_prob=keep_prob) x_aux1 = slim.fully_connected(x_aux1, num_classes) # inception 4b, 4c, 4d x = inception_module(x, 512, 160, 112, 224, 24, 64, 64) x = inception_module(x, 512, 128, 128, 256, 24, 64, 64) x = inception_module(x, 512, 112, 144, 288, 32, 64, 64) # auxiliary loss2 x_aux2 = slim.avg_pool2d(x, 5, padding='SAME', stride=1) x_aux2 = slim.conv2d(x_aux2, 128, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) mb, h, w, c = x_aux2.get_shape().as_list() x_aux2 = tf.reshape(x_aux2, [-1, h * w * c]) x_aux2 = slim.fully_connected(x_aux2, 1024, activation_fn=tf.nn.relu) x_aux2 = slim.dropout(x_aux2, keep_prob=keep_prob) x_aux2 = slim.fully_connected(x_aux2, num_classes) # inception 4e, 5a, 5b x = inception_module(x, 528, 256, 160, 320, 32, 128, 128) x = slim.max_pool2d(x, 3, padding='SAME', stride=2) x = inception_module(x, 832, 256, 160, 320, 32, 128, 128) x = inception_module(x, 832, 384, 192, 384, 48, 128, 128) #x = slim.avg_pool2d(x, 7, stride=1, padding='SAME') #mb, h, w, c = x.get_shape().as_list() #x = tf.reshape(x, [-1, h * w * c]) x = tf.reduce_mean(x, axis=[1, 2]) x = slim.fully_connected(x, num_classes) return x, x_aux1, x_aux2
def build_model(self): features = self.features bbox = self.bbox batch_size = tf.shape(features)[0] bbox_in = tf.to_float(bbox[:, :self.T]) bbox_out = tf.to_float(bbox[:, 1:]) # print bbox_out.shape, bbox.shape mask = tf.to_float(tf.not_equal(bbox_out, self._null)) with tf.variable_scope('model') as scope: x_image = tf.reshape(features, [-1, 128, 128, 1]) h_conv1 = tf.nn.relu( slim.conv2d(x_image, 32, [3, 3], scope='conv1')) h_conv2 = tf.nn.relu( slim.conv2d(h_conv1, 64, [3, 3], scope='conv2')) h_pool1 = slim.max_pool2d(h_conv2, [2, 2], scope='pool1') h_conv3 = tf.nn.relu( slim.conv2d(h_pool1, 128, [3, 3], scope='conv3')) h_conv4 = tf.nn.relu( slim.conv2d(h_conv3, 128, [2, 2], scope='conv4')) h_pool2 = slim.max_pool2d(h_conv4, [2, 2], scope='pool2') h_conv5 = tf.nn.relu( slim.conv2d(h_pool2, 256, [3, 3], scope='conv5')) h_conv6 = tf.nn.relu( slim.conv2d(h_conv5, 512, [3, 3], scope='conv6')) h_pool3 = slim.max_pool2d(h_conv6, [2, 2], scope='pool3') resh = tf.reshape(h_pool3, [-1, h_pool3.get_shape().as_list()[1]**2, 512]) with tf.variable_scope('decoder') as scope: dectran1 = slim.conv2d_transpose( h_pool3, 512, [3, 3], [2, 2], scope='deconvtran1') #print dectran1.get_shape() deconv1 = tf.nn.relu( slim.conv2d(dectran1, 256, [3, 3], scope='deconv1')) #print deconv1.get_shape() deconv2 = tf.nn.relu( slim.conv2d(deconv1, 128, [3, 3], scope='deconv2')) #print deconv2.get_shape() dectran2 = slim.conv2d_transpose( deconv2, 128, [3, 3], [2, 2], scope='deconvtran2') #print dectran2.get_shape() deconv3 = tf.nn.relu( slim.conv2d(dectran2, 128, [3, 3], scope='deconv3')) #print deconv3.get_shape() deconv4 = tf.nn.relu( slim.conv2d(deconv3, 64, [3, 3], scope='deconv4')) # print deconv4.get_shape() dectran3 = slim.conv2d_transpose( deconv4, 64, [3, 3], [2, 2], scope='deconvtran3') #print dectran3.get_shape() deconv5 = tf.nn.relu( slim.conv2d(dectran3, 32, [3, 3], scope='deconv5')) #print deconv5.get_shape() deconv6 = tf.nn.relu( slim.conv2d(deconv5, 1, [3, 3], scope='deconv6')) #print deconv6.get_shape() resh1 = tf.reshape(x_image, [-1, deconv6.get_shape().as_list()[1]**2]) resh2 = tf.reshape(deconv6, [-1, deconv6.get_shape().as_list()[1]**2]) features = tf.contrib.layers.batch_norm( inputs=resh, decay=0.95, center=True, scale=True, is_training=('train' == 'train'), updates_collections=None, scope=('conv_features' + 'batch_norm')) c, h = self._get_initial_lstm(features=features) x = tf.to_float(bbox_in) x = slim.fully_connected(x, 512, scope='fc') features_proj = self._project_features(features=features) loss = 0.01 * tf.sqrt( tf.reduce_mean(tf.square(tf.subtract(resh1, resh2)))) alpha_list = [] lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=self.H) for t in range(self.T): context, alpha = self._attention_layer(features, features_proj, h, reuse=(t != 0)) alpha_list.append(alpha) if self.selector: context, beta = self._selector(context, h, reuse=(t != 0)) # print context.get_shape() with tf.variable_scope('lstm', reuse=(t != 0)): _, (c, h) = lstm_cell(inputs=tf.concat([x[:, t, :], context], 1), state=[c, h]) logits = self._decode_lstm(x[:, t, :], h, context, dropout=self.dropout, reuse=(t != 0)) loss += tf.reduce_sum( tf.reduce_mean( tf.square( tf.subtract(logits, bbox_out[:, t]) * mask[:, t]))) if self.alpha_c > 0: alphas = tf.transpose(tf.stack(alpha_list), (1, 0, 2)) # (N, T, L) alphas_all = tf.reduce_sum(alphas, 1) # (N, L) alpha_reg = self.alpha_c * tf.reduce_sum( (16. / self.L - alphas_all)**2) loss += alpha_reg return loss / tf.to_float(batch_size)
def simple_conv_net_on(self, input_layer, opts): if opts.use_batch_norm: normalizer_fn = slim.batch_norm normalizer_params = {'is_training': IS_TRAINING} else: normalizer_fn = None normalizer_params = None # optionally drop blue channel, in a simple cart pole env we only need r/g #if opts.drop_blue_channel: # input_layer = input_layer[:,:,:,0:2,:,:] # state is (batch, height, width, rgb, camera_idx, repeat) # rollup rgb, camera_idx and repeat into num_channels # i.e. (batch, height, width, rgb*camera_idx*repeat) height, width = map(int, input_layer.get_shape()[1:3]) num_channels = input_layer.get_shape()[3:].num_elements() input_layer = tf.reshape(input_layer, [-1, height, width, num_channels]) print(self.namespace, " : input_layer", util.shape_and_product_of(input_layer), file=sys.stderr) # whiten image, per channel, using batch_normalisation layer with # params calculated directly from batch. axis = list(range(input_layer.get_shape().ndims - 1)) batch_mean, batch_var = tf.nn.moments( input_layer, axis) # gives moments per channel whitened_input_layer = tf.nn.batch_normalization(input_layer, batch_mean, batch_var, scale=None, offset=None, variance_epsilon=1e-6) # TODO: num_outputs here are really dependant on the incoming channels, # which depend on the #repeats & cameras so they should be a param. model = slim.conv2d(whitened_input_layer, num_outputs=10, kernel_size=[5, 5], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv1') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool1') self.pool1 = model print("pool1", util.shape_and_product_of(model), file=sys.stderr) model = slim.conv2d(model, num_outputs=10, kernel_size=[5, 5], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv2') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool2 = model print("pool2", util.shape_and_product_of(model), file=sys.stderr) model = slim.conv2d(model, num_outputs=10, kernel_size=[3, 3], normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, scope='conv3') model = slim.max_pool2d(model, kernel_size=[2, 2], scope='pool2') self.pool3 = model print("pool3", util.shape_and_product_of(model), file=sys.stderr) return model
def network(self, inputs): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01)): conv1 = slim.conv2d(inputs, 96, [11, 11], 4, padding='VALID', scope='conv1') max1 = slim.max_pool2d(conv1, [3, 3], 2, padding='VALID', scope='max1') conv1a = slim.conv2d(max1, 256, [4, 4], 4, padding='VALID', scope='conv1a') conv2 = slim.conv2d(max1, 256, [5, 5], 1, scope='conv2') max2 = slim.max_pool2d(conv2, [3, 3], 2, padding='VALID', scope='max2') conv3 = slim.conv2d(max2, 384, [3, 3], 1, scope='conv3') conv3a = slim.conv2d(conv3, 256, [2, 2], 2, padding='VALID', scope='conv3a') conv4 = slim.conv2d(conv3, 384, [3, 3], 1, scope='conv4') conv5 = slim.conv2d(conv4, 256, [3, 3], 1, scope='conv5') pool5 = slim.max_pool2d(conv5, [3, 3], 2, padding='VALID', scope='pool5') concat_feat = tf.concat([conv1a, conv3a, pool5], 3) conv_all = slim.conv2d(concat_feat, 192, [1, 1], 1, padding='VALID', scope='conv_all') shape = int(np.prod(conv_all.get_shape()[1:])) fc_full = slim.fully_connected(tf.reshape( tf.transpose(conv_all, [0, 3, 1, 2]), [-1, shape]), 3072, scope='fc_full') #fc_full = slim.fully_connected(tf.reshape(conv_all, [-1, shape]), 3072, scope='fc_full') fc_detection = slim.fully_connected(fc_full, 512, scope='fc_detection') fc_landmarks = slim.fully_connected(fc_full, 512, scope='fc_landmarks') fc_visibility = slim.fully_connected(fc_full, 512, scope='fc_visibility') fc_pose = slim.fully_connected(fc_full, 512, scope='fc_pose') fc_gender = slim.fully_connected(fc_full, 512, scope='fc_gender') out_detection = slim.fully_connected(fc_detection, 2, scope='out_detection', activation_fn=None) out_landmarks = slim.fully_connected(fc_landmarks, 42, scope='out_landmarks', activation_fn=None) out_visibility = slim.fully_connected(fc_visibility, 21, scope='out_visibility', activation_fn=None) out_pose = slim.fully_connected(fc_pose, 3, scope='out_pose', activation_fn=None) out_gender = slim.fully_connected(fc_gender, 2, scope='out_gender', activation_fn=None) return [ out_detection, out_landmarks, out_visibility, out_pose, tf.nn.softmax(out_gender), conv_all ]
def maxpool(self, x, kernel_size): p = np.floor((kernel_size - 1) / 2).astype(np.int32) p_x = self.padding(x, p) return slim.max_pool2d(p_x, kernel_size)
def __init__(self, scope, img_w, img_h, n_classes, dropout_keep_prob=1.0): """Defining the model.""" self.scope = scope self.n_classes = n_classes self.dropout_keep_prob = dropout_keep_prob self.input = tf.placeholder(tf.float32, [None, img_h, img_w, 1]) self.conv1 = slim.conv2d(self.input, num_outputs=32, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv1') self.conv2 = slim.conv2d(self.conv1, num_outputs=128, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv2') self.conv3 = slim.conv2d(self.conv2, num_outputs=128, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv3') self.pool1 = slim.max_pool2d(self.conv3, [2, 2]) self.conv4 = slim.conv2d(self.pool1, num_outputs=256, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv4') self.conv5 = slim.conv2d(self.conv4, num_outputs=256, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv5') self.pool2 = slim.max_pool2d(self.conv5, [2, 2]) self.conv6 = slim.conv2d(self.pool2, num_outputs=512, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv6') self.conv7 = slim.conv2d(self.conv6, num_outputs=512, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv7') self.conv8 = slim.conv2d(self.conv7, num_outputs=512, kernel_size=[3, 3], stride=[1, 1], padding='Valid', scope=self.scope + '_conv8') self.pool = slim.max_pool2d(self.conv8, [2, 2]) self.hidden = slim.fully_connected(slim.flatten(self.pool), 8192, scope=self.scope + '_hidden', activation_fn=tf.nn.relu) self.classes1 = slim.fully_connected(self.hidden, 4096, scope=self.scope + '_fc1', activation_fn=tf.nn.relu) self.classes2 = slim.dropout(self.classes1) self.classes3 = slim.fully_connected( self.classes2, 4096, scope=self.scope + '_fc2', ) self.classes4 = slim.dropout(self.classes3) self.classes = slim.fully_connected(self.classes4, self.n_classes, scope=self.scope + '_fc3', activation_fn=None) self.targets = tf.placeholder(tf.int32, [None]) self.targets_onehot = tf.one_hot(self.targets, self.n_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.targets_onehot, logits=self.classes)) self.train_step = tf.train.RMSPropOptimizer(1e-3).minimize(self.loss)
def resnet_v1(inputs, blocks, num_classes=None, is_training=True, global_pool=True, output_stride=None, include_root_block=True, spatial_squeeze=True, reuse=None, scope=None): """Generator for v1 ResNet models. This function generates a family of ResNet v1 models. See the resnet_v1_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce ResNets of various depths. Training for image classification on Imagenet is usually done with [224, 224] inputs, resulting in [7, 7] feature maps at the output of the last ResNet block for the ResNets defined in [1] that have nominal stride equal to 32. However, for dense prediction tasks we advise that one uses inputs with spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In this case the feature maps at the ResNet output will have spatial shape [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] and corners exactly aligned with the input image corners, which greatly facilitates alignment of the features to the image. Using as input [225, 225] images results in [8, 8] feature maps at the output of the last ResNet block. For dense prediction tasks, the ResNet needs to run in fully-convolutional (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all have nominal stride equal to 32 and a good choice in FCN mode is to use output_stride=16 in order to increase the density of the computed features at small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. blocks: A list of length equal to the number of ResNet blocks. Each element is a resnet_utils.Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If None we return the features before the logit layer. is_training: whether is training or not. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. include_root_block: If True, include the initial convolution followed by max-pooling, if False excludes it. spatial_squeeze: if True, logits is of shape [B, C], if false logits is of shape [B, 1, 1, C], where B is batch_size and C is number of classes. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. If num_classes is not None, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.conv2d, bottleneck, resnet_utils.stack_blocks_dense], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs if include_root_block: if output_stride is not None: if output_stride % 4 != 0: raise ValueError('The output_stride needs to be a multiple of 4.') output_stride /= 4 net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = slim.utils.collect_named_outputs(end_points_collection, 'pool2', net) net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) end_points = slim.utils.convert_collection_to_dict(end_points_collection) # end_points['pool2'] = end_points['resnet_v1_50/pool1/MaxPool:0'] try: end_points['pool3'] = end_points['resnet_v1_50/block1'] end_points['pool4'] = end_points['resnet_v1_50/block2'] except: end_points['pool3'] = end_points['Detection/resnet_v1_50/block1'] end_points['pool4'] = end_points['Detection/resnet_v1_50/block2'] end_points['pool5'] = net # if global_pool: # # Global average pooling. # net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) # if num_classes is not None: # net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, # normalizer_fn=None, scope='logits') # if spatial_squeeze: # logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze') # else: # logits = net # # Convert end_points_collection into a dictionary of end_points. # end_points = slim.utils.convert_collection_to_dict(end_points_collection) # if num_classes is not None: # end_points['predictions'] = slim.softmax(logits, scope='predictions') return net, end_points
def __init__(self, s_dim, trainer, scope, a_dim=8310): with tf.variable_scope(scope): card_cnt = 57 # self.temp = tf.placeholder(tf.float32, None, name="boltz") self.input = tf.placeholder(tf.float32, [None, s_dim], name="input") # embedding layer # self.state_onehot = tf.one_hot(self.input, 15, dtype=tf.float32) # self.state_onehot = tf.reshape(self.state_onehot, [-1, 15]) embeddings = slim.fully_connected( inputs=self.input, num_outputs=256, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer()) self.embeddings = tf.reshape(embeddings, [-1, 1, 64, 4]) # 1D convolution self.conv1 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.embeddings, num_outputs=16, kernel_size=[1, 8], stride=[1, 1], padding='SAME') self.maxpool1 = slim.max_pool2d(inputs=self.conv1, kernel_size=[1, 4], stride=2, padding='SAME') self.conv2 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.maxpool1, num_outputs=32, kernel_size=[1, 4], stride=[1, 1], padding='SAME') self.maxpool2 = slim.max_pool2d(inputs=self.conv2, kernel_size=[1, 2], stride=2, padding='SAME') self.conv3 = slim.conv2d(activation_fn=tf.nn.relu, inputs=self.maxpool2, num_outputs=64, kernel_size=[1, 2], stride=[1, 1], padding='SAME') self.maxpool3 = slim.max_pool2d(inputs=self.conv3, kernel_size=[1, 2], stride=2, padding='SAME') # flatten layer self.fc_flattened = slim.fully_connected(inputs=slim.flatten( self.maxpool3), num_outputs=256, activation_fn=None) self.fc_flattened = slim.fully_connected(inputs=slim.flatten( self.fc_flattened), num_outputs=512, activation_fn=None) # self.fc1 = slim.fully_connected(inputs=self.fc_flattened, num_outputs=1024, activation_fn=tf.nn.sigmoid) # self.fc2 = slim.fully_connected(inputs=self.fc_flattened, num_outputs=8310, activation_fn=tf.nn.elu) # value self.fc3 = slim.fully_connected(inputs=self.fc_flattened, num_outputs=64, activation_fn=tf.nn.elu) self.fc4 = slim.fully_connected( inputs=self.fc3, num_outputs=1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0)) # self.policy_pred = tf.reshape(self.fc2, [1, -1]) # self.mask = tf.placeholder(tf.bool, [None, a_dim], name='mask') # self.mask = tf.reshape(self.mask, [1, -1]) # self.valid_policy = tf.boolean_mask(self.policy_pred[0], self.mask[0]) # self.policy_norm = tf.norm(self.valid_policy) # self.a0 = self.valid_policy[0] # self.boltz_policy = tf.reshape(tf.nn.softmax(self.valid_policy / self.temp), [1, -1]) # self.valid_policy = tf.nn.softmax(self.valid_policy) # self.valid_policy = tf.reshape(self.valid_policy, [1, -1]) self.val_pred = tf.reshape(self.fc4, [-1]) # only support batch size one since masked_a_dim is changing # self.action = tf.placeholder(tf.int32, [None], "action_input") # self.masked_a_dim = tf.placeholder(tf.int32, None) # self.action_one_hot = tf.one_hot(self.action, self.masked_a_dim, dtype=tf.float32) self.val_truth = tf.placeholder(tf.float32, [None], "val_input") # self.advantages = tf.placeholder(tf.float32, [None], "advantage_input") # self.pi_sample = tf.reduce_sum(tf.multiply(self.action_one_hot[0], self.valid_policy[0])) # self.pi = tf.cond(self.pi_sample > 0.99, lambda : self.pi_sample - 0.01, lambda : self.pi_sample) # self.pred_prob = self.pi # self.policy_loss = -tf.reduce_sum(tf.log(tf.clip_by_value(self.pi, 1e-8, 1.)) * self.advantages) self.val_loss = tf.reduce_sum( tf.square(self.val_pred - self.val_truth)) self.loss = self.val_loss # self.loss = 0.2 * self.val_loss + self.policy_loss local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) self.gradients = tf.gradients(self.loss, local_vars) self.var_norms = tf.global_norm(local_vars) self.gradients, self.grad_norms = tf.clip_by_global_norm( self.gradients, 4.0) self.apply_grads = trainer.apply_gradients( zip(self.gradients, local_vars))
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope="conv1") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv2") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2") print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv3") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope="conv4") print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="fc1", activation_fn=tf.nn.relu) print(fc1.get_shape()) # batch*2 cls_prob = slim.fully_connected(fc1, num_outputs=2, scope="cls_fc", activation_fn=tf.nn.softmax) print(cls_prob.get_shape()) # batch*4 bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope="bbox_fc", activation_fn=None) print(bbox_pred.get_shape()) # batch*10 landmark_pred = slim.fully_connected(fc1, num_outputs=10, scope="landmark_fc", activation_fn=None) print(landmark_pred.get_shape()) # train if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy else: return cls_prob, bbox_pred, landmark_pred