def res_u_network(inputs, output_dim=3, keep_prob=1.0, filter_size=8, nr_downsamples=4, nr_residual_blocks=3, gated=True, nonlinearity="concat_elu"): # store for as a = [] # set nonlinearity nonlinearity = nn.set_nonlinearity(nonlinearity) # encoding piece x_i = inputs for i in xrange(nr_downsamples): for j in xrange(nr_residual_blocks): x_i = nn.res_block(x_i, filter_size=filter_size, keep_p=keep_prob, gated=gated, nonlinearity=nonlinearity, name="res_encode_" + str(i) + "_block_" + str(j)) if i < nr_downsamples - 1: a.append(x_i) filter_size = filter_size * 2 x_i = nn.res_block(x_i, filter_size=filter_size, keep_p=keep_prob, gated=gated, nonlinearity=nonlinearity, stride=2, name="res_encode_" + str(i) + "_block_" + str(nr_residual_blocks)) # decoding piece for i in xrange(nr_downsamples - 1): filter_size = filter_size / 2 x_i = nn.transpose_conv_layer(x_i, 4, 2, filter_size, "up_conv_" + str(i)) x_i = nn.res_block(x_i, a=a.pop(), filter_size=filter_size, keep_p=keep_prob, gated=gated, nonlinearity=nonlinearity, name="res_decode_" + str(i) + "_block_0") for j in xrange(nr_residual_blocks - 1): x_i = nn.res_block(x_i, filter_size=filter_size, keep_p=keep_prob, gated=gated, nonlinearity=nonlinearity, name="res_decode_" + str(i) + "_block_" + str(j + 1)) x_i = nn.conv_layer(x_i, 3, 1, output_dim, "final_conv") x_i = tf.tanh(x_i) if output_dim > 1: x_i = x_i * (-inputs + 1.0) return x_i
def res_generator_network(batch_size, shape, inputs=None, full_shape=None, hidden_size=512, filter_size=4, nr_residual_blocks=1, gated=True, nonlinearity="concat_elu"): # new shape if shape[0] % 3 == 0: factor = 3 else: factor = 2 nr_upsamples = int(np.log2(shape[0] / factor)) filter_size = filter_size * pow(2, nr_upsamples) # set nonlinearity nonlinearity = nn.set_nonlinearity(nonlinearity) # fc layer x_i = inputs x_i = nn.fc_layer(x_i, pow(factor, len(shape)) * filter_size, "decode_layer", nn.set_nonlinearity("elu")) x_i = tf.reshape(x_i, [batch_size] + len(shape) * [factor] + [filter_size]) # decoding piece for i in xrange(nr_upsamples): filter_size = filter_size / 2 x_i = nn.transpose_conv_layer(x_i, 4, 2, filter_size, "up_conv_" + str(i)) for j in xrange(nr_residual_blocks): x_i = nn.res_block(x_i, filter_size=filter_size, gated=gated, nonlinearity=nonlinearity, name="res_decode_" + str(i) + "_block_" + str(j)) x_i = nn.conv_layer(x_i, 3, 1, 1, "final_conv") #x_i = tf.sigmoid(x_i) if full_shape is not None: if len(x_i.get_shape()) == 4: x_i = tf.pad( x_i, [[0, 0], [shape[0] / 2, full_shape[0] - (3 * shape[0] / 2)], [shape[1] / 2, full_shape[1] - (3 * shape[1] / 2)], [0, 0]]) elif len(x_i.get_shape()) == 5: x_i = tf.pad( x_i, [[0, 0], [shape[0] / 4, full_shape[0] - (5 * shape[0] / 4)], [shape[1] / 4, full_shape[1] - (5 * shape[1] / 4)], [shape[2] / 4, full_shape[2] - (5 * shape[2] / 4)], [0, 0]]) return x_i
def xiao_network(inputs): # this network should never be used and only works of 512x512 x_i = inputs nonlinearity = nn.set_nonlinearity("relu") x_i = nn.conv_layer(x_i, 8, 8, 128, "conv_1", nonlinearity) x_i = nn.conv_layer(x_i, 8, 8, 512, "conv_2", nonlinearity) x_i = nn.fc_layer(x_i, 1024, "fc", nonlinearity, flat=True) x_i = tf.expand_dims(x_i, axis=1) x_i = tf.expand_dims(x_i, axis=1) x_i = nn.transpose_conv_layer(x_i, 8, 8, 512, "trans_conv_1", nonlinearity) x_i = nn.transpose_conv_layer(x_i, 8, 8, 256, "trans_conv_2", nonlinearity) x_i = nn.transpose_conv_layer(x_i, 2, 2, 64, "trans_conv_4", nonlinearity) x_i = nn.transpose_conv_layer(x_i, 2, 2, 32, "trans_conv_5", nonlinearity) x_i = nn.transpose_conv_layer(x_i, 2, 2, 3, "trans_conv_6") boundary = tf.minimum(tf.maximum(tf.round(-inputs + .5), 0.0), 1.0) x_i = x_i * (-boundary + 1.0) #x_i = x_i * inputs return x_i
def _build_model(self, triplets): ''' :param triplets: batches of triplets, [3*N, 64, 64, 3] :return: a model dict containing all Tensors ''' model = {} if self._data_format == "NCHW": images = tf.transpose(triplets, [0, 3, 1, 2]) shape_dict = {} shape_dict['conv1'] = [8, 8, 3, 16] with tf.variable_scope('conv1'): model['conv1'] = nn.conv_layer( self._data_format, triplets, 1, 'VALID', shape_dict['conv1']) # [3N,57,57,16] model['pool1'] = nn.max_pool2d(self._data_format, model['conv1'], 2, 'VALID') # outsize [3N, 28, 28, 16] shape_dict['conv2'] = [5, 5, 16, 7] with tf.variable_scope('conv2'): model['conv2'] = nn.conv_layer(self._data_format, model['pool1'], 1, 'VALID', shape_dict['conv2']) # [3N,24,24,7] model['pool2'] = nn.max_pool2d(self._data_format, model['conv2'], 2, 'SAME') # [3N, 12, 12, 7] shape_dict['fc1'] = 256 with tf.variable_scope('fc1'): model['fc1'] = nn.fc(model['pool2'], shape_dict['fc1']) # [3N, 256] shape_dict['fc2'] = 16 with tf.variable_scope('fc2'): model['fc2'] = nn.fc(model['fc1'], shape_dict['fc2']) # [3N, 16] return model
def _build_model(self, image, max_instance, direct_slice, is_train=False, save_var=False, val_dict=None): model = {} if val_dict is None: # Not during validation, use pretrained weight feed_dict = self.data_dict else: # Duing validation, use the currently trained weight feed_dict = val_dict if save_var: # During training, weights are saved to var_dict var_dict = self.var_dict else: # During inference or validation, no need to save weights var_dict = None # Step1: build fcn8s and score_out which has shape[H, W, Classes] model['conv1_1'] = nn.conv_layer(image, feed_dict, "conv1_1", var_dict=var_dict) model['conv1_2'] = nn.conv_layer(model['conv1_1'], feed_dict, "conv1_2", var_dict=var_dict) model['pool1'] = nn.max_pool_layer(model['conv1_2'], "pool1") model['conv2_1'] = nn.conv_layer(model['pool1'], feed_dict, "conv2_1", var_dict=var_dict) model['conv2_2'] = nn.conv_layer(model['conv2_1'], feed_dict, "conv2_2", var_dict=var_dict) model['pool2'] = nn.max_pool_layer(model['conv2_2'], "pool2") model['conv3_1'] = nn.conv_layer(model['pool2'], feed_dict, "conv3_1", var_dict=var_dict) model['conv3_2'] = nn.conv_layer(model['conv3_1'], feed_dict, "conv3_2", var_dict=var_dict) model['conv3_3'] = nn.conv_layer(model['conv3_2'], feed_dict, "conv3_3", var_dict=var_dict) model['pool3'] = nn.max_pool_layer(model['conv3_3'], "pool3") model['conv4_1'] = nn.conv_layer(model['pool3'], feed_dict, "conv4_1", var_dict=var_dict) model['conv4_2'] = nn.conv_layer(model['conv4_1'], feed_dict, "conv4_2", var_dict=var_dict) model['conv4_3'] = nn.conv_layer(model['conv4_2'], feed_dict, "conv4_3", var_dict=var_dict) model['pool4'] = nn.max_pool_layer(model['conv4_3'], "pool4") model['conv5_1'] = nn.conv_layer(model['pool4'], feed_dict, "conv5_1", var_dict=var_dict) model['conv5_2'] = nn.conv_layer(model['conv5_1'], feed_dict, "conv5_2", var_dict=var_dict) model['conv5_3'] = nn.conv_layer(model['conv5_2'], feed_dict, "conv5_3", var_dict=var_dict) model['pool5'] = nn.max_pool_layer(model['conv5_3'], "pool5") model['conv6_1'] = nn.conv_layer(model['pool5'], feed_dict, "conv6_1", shape=[3, 3, 512, 512], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv6_2'] = nn.conv_layer(model['conv6_1'], feed_dict, "conv6_2", shape=[3, 3, 512, 512], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv6_3'] = nn.conv_layer(model['conv6_2'], feed_dict, "conv6_3", shape=[3, 3, 512, 4096], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv7'] = nn.conv_layer(model['conv6_3'], feed_dict, "conv7", shape=[1, 1, 4096, 4096], dropout=is_train, keep_prob=0.5, var_dict=var_dict) # Skip feature fusion model['score_fr'] = nn.conv_layer(model['conv7'], feed_dict, "score_fr_mask", shape=[1, 1, 4096, self.num_pred_class * max_instance], relu=False, dropout=False, var_dict=var_dict) # Upsample: score_fr*2 upscore_fr_2s = nn.upscore_layer(model['score_fr'], feed_dict, "upscore_fr_2s_mask", tf.shape(model['pool4']), self.num_pred_class * max_instance, ksize=4, stride=2, var_dict=var_dict) # Fuse upscore_fr_2s + score_pool4 in_features = model['pool4'].get_shape()[3].value score_pool4 = nn.conv_layer(model['pool4'], feed_dict, "score_pool4_mask", shape=[1, 1, in_features, self.num_pred_class * max_instance], relu=False, dropout=False, var_dict=var_dict) fuse_pool4 = tf.add(upscore_fr_2s, score_pool4) # Upsample fuse_pool4*2 upscore_pool4_2s = nn.upscore_layer(fuse_pool4, feed_dict, "upscore_pool4_2s_mask", tf.shape(model['pool3']), self.num_pred_class * max_instance, ksize=4, stride=2, var_dict=var_dict) # Fuse upscore_pool4_2s + score_pool3 in_features = model['pool3'].get_shape()[3].value score_pool3 = nn.conv_layer(model['pool3'], self.data_dict, "score_pool3_mask", shape=[1, 1, in_features, self.num_pred_class * max_instance], relu=False, dropout=False, var_dict=var_dict) score_out = tf.add(upscore_pool4_2s, score_pool3) # Upsample to original size *8 # Or we have to do it by class model['upmask'] = nn.upscore_layer(score_out, feed_dict, "upmask", tf.shape(image), self.num_pred_class * max_instance, ksize=16, stride=8, var_dict=var_dict) print('InstanceFCN8s model is builded successfully!') print('Model: %s' % str(model.keys())) return model
def _build_model(self, image, num_classes, is_train=False, scale_min='fcn16s', save_var=False, val_dict=None): model = {} if val_dict is None: # Not during validation, use pretrained weight feed_dict = self.data_dict else: # Duing validation, use the currently trained weight feed_dict = val_dict if save_var: # During training, weights are saved to var_dict var_dict = self.var_dict else: # During inference or validation, no need to save weights var_dict = None model['conv1_1'] = nn.conv_layer(image, feed_dict, "conv1_1", var_dict=var_dict) model['conv1_2'] = nn.conv_layer(model['conv1_1'], feed_dict, "conv1_2", var_dict=var_dict) model['pool1'] = nn.max_pool_layer(model['conv1_2'], "pool1") model['conv2_1'] = nn.conv_layer(model['pool1'], feed_dict, "conv2_1", var_dict=var_dict) model['conv2_2'] = nn.conv_layer(model['conv2_1'], feed_dict, "conv2_2", var_dict=var_dict) model['pool2'] = nn.max_pool_layer(model['conv2_2'], "pool2") model['conv3_1'] = nn.conv_layer(model['pool2'], feed_dict, "conv3_1", var_dict=var_dict) model['conv3_2'] = nn.conv_layer(model['conv3_1'], feed_dict, "conv3_2", var_dict=var_dict) model['conv3_3'] = nn.conv_layer(model['conv3_2'], feed_dict, "conv3_3", var_dict=var_dict) model['pool3'] = nn.max_pool_layer(model['conv3_3'], "pool3") model['conv4_1'] = nn.conv_layer(model['pool3'], feed_dict, "conv4_1", var_dict=var_dict) model['conv4_2'] = nn.conv_layer(model['conv4_1'], feed_dict, "conv4_2", var_dict=var_dict) model['conv4_3'] = nn.conv_layer(model['conv4_2'], feed_dict, "conv4_3", var_dict=var_dict) model['pool4'] = nn.max_pool_layer(model['conv4_3'], "pool4") model['conv5_1'] = nn.conv_layer(model['pool4'], feed_dict, "conv5_1", var_dict=var_dict) model['conv5_2'] = nn.conv_layer(model['conv5_1'], feed_dict, "conv5_2", var_dict=var_dict) model['conv5_3'] = nn.conv_layer(model['conv5_2'], feed_dict, "conv5_3", var_dict=var_dict) model['pool5'] = nn.max_pool_layer(model['conv5_3'], "pool5") model['conv6_1'] = nn.conv_layer(model['pool5'], feed_dict, "conv6_1", shape=[3, 3, 512, 512], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv6_2'] = nn.conv_layer(model['conv6_1'], feed_dict, "conv6_2", shape=[3, 3, 512, 512], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv6_3'] = nn.conv_layer(model['conv6_2'], feed_dict, "conv6_3", shape=[3, 3, 512, 4096], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['conv7'] = nn.conv_layer(model['conv6_3'], feed_dict, "conv7", shape=[1, 1, 4096, 4096], dropout=is_train, keep_prob=0.5, var_dict=var_dict) model['score_fr'] = nn.conv_layer(model['conv7'], feed_dict, "score_fr", shape=[1, 1, 4096, num_classes], relu=False, dropout=False, var_dict=var_dict) # fcn32s is always calculated for now model['fcn32s'] = nn.upscore_layer(model['score_fr'], feed_dict, "upscore_fr_32s", tf.shape(image), num_classes, ksize=64, stride=32, var_dict=var_dict) # fcn16s is calculated also when scale_min is *8, because we need to calculate fuse_pool4 anyway if scale_min == 'fcn16s' or scale_min == 'fcn8s': upscore_fr_2s = nn.upscore_layer(model['score_fr'], feed_dict, "upscore_fr_2s", tf.shape(model['pool4']), num_classes, ksize=4, stride=2, var_dict=var_dict) # Fuse fc8 *2, pool4 in_features = model['pool4'].get_shape()[3].value score_pool4 = nn.conv_layer(model['pool4'], feed_dict, "score_pool4", shape=[1, 1, in_features, num_classes], relu=False, dropout=False, var_dict=var_dict) fuse_pool4 = tf.add(upscore_fr_2s, score_pool4) # Upsample fusion *16 model['fcn16s'] = nn.upscore_layer(fuse_pool4, feed_dict, "upscore_pool4_16s", tf.shape(image), num_classes, ksize=32, stride=16, var_dict=var_dict) # fcn8s is calculated only when scale_min is *8 if scale_min == 'fcn8s': # Upsample fc8 *4 upscore_pool4_2s = nn.upscore_layer(fuse_pool4, feed_dict, "upscore_pool4_2s", tf.shape(model['pool3']), num_classes, ksize=4, stride=2, var_dict=var_dict) # Fuse fc8 *4, pool4 *2, pool3 in_features = model['pool3'].get_shape()[3].value score_pool3 = nn.conv_layer(model['pool3'], self.data_dict, "score_pool3", shape=[1, 1, in_features, num_classes], relu=False, dropout=False, var_dict=var_dict) fuse_pool3 = tf.add(score_pool3, upscore_pool4_2s) # Upsample fusion *8 model['fcn8s'] = nn.upscore_layer(fuse_pool3, feed_dict, "upscore8", tf.shape(image), num_classes, ksize=16, stride=8, var_dict=var_dict) #self.var_dict = var_dict print('Model with scale %s is builded successfully!' % scale_min) print('Model: %s' % str(model.keys())) return model
def _build_model(self, image, is_train=False): '''If is_train, save weight to self._var_dict, otherwise, don't save weights''' model = {} feed_dict = self._weight_dict if is_train: var_dict = self._var_dict else: var_dict = None if is_train: dropout = True else: dropout = False shape_dict = {} shape_dict['B0'] = [3, 3, 3, 64] # B0: [H,W,3] -> [H,W,64] with tf.variable_scope('B0'): model['B0'] = nn.conv_layer(image, feed_dict, 1, 'SAME', shape_dict['B0'], var_dict) # B2_1: [H,W,64] -> [H/2, W/2, 128] shape_dict['B2'] = {} shape_dict['B2']['side'] = [1, 1, 64, 128] shape_dict['B2']['convs'] = [[3, 3, 64, 128], [3, 3, 128, 128]] with tf.variable_scope('B2_1'): model['B2_1'] = nn.ResUnit_downsample_2convs(model['B0'], feed_dict, shape_dict['B2'], var_dict=var_dict) # B2_2, B2_3: [H/2, W/2, 128] for i in range(2): with tf.variable_scope('B2_' + str(i + 2)): model['B2_' + str(i + 2)] = nn.ResUnit_2convs( model['B2_' + str(i + 1)], feed_dict, shape_dict['B2']['convs'][1], var_dict=var_dict) # B3_1: [H/2, W/2, 128] -> [H/4, W/4, 256] shape_dict['B3'] = {} shape_dict['B3']['side'] = [1, 1, 128, 256] shape_dict['B3']['convs'] = [[3, 3, 128, 256], [3, 3, 256, 256]] with tf.variable_scope('B3_1'): model['B3_1'] = nn.ResUnit_downsample_2convs(model['B2_3'], feed_dict, shape_dict['B3'], var_dict=var_dict) # B3_2, B3_3: [H/4, W/4, 256] for i in range(2): with tf.variable_scope('B3_' + str(i + 2)): model['B3_' + str(i + 2)] = nn.ResUnit_2convs( model['B3_' + str(i + 1)], feed_dict, shape_dict['B3']['convs'][1], var_dict=var_dict) # B4_1: [H/4, W/4, 256] -> [H/8, W/8, 512] shape_dict['B4'] = {} shape_dict['B4']['side'] = [1, 1, 256, 512] shape_dict['B4']['convs'] = [[3, 3, 256, 512], [3, 3, 512, 512]] with tf.variable_scope('B4_1'): model['B4_1'] = nn.ResUnit_downsample_2convs(model['B3_3'], feed_dict, shape_dict['B4'], var_dict=var_dict) # B4_2 ~ B4_6: [H/8, W/8, 512] for i in range(5): with tf.variable_scope('B4_' + str(i + 2)): model['B4_' + str(i + 2)] = nn.ResUnit_2convs( model['B4_' + str(i + 1)], feed_dict, shape_dict['B4']['convs'][1], var_dict=var_dict) # B5_1: [H/8, W/8, 512] -> [H/8, W/8, 1024] shape_dict['B5_1'] = {} shape_dict['B5_1']['side'] = [1, 1, 512, 1024] shape_dict['B5_1']['convs'] = [[3, 3, 512, 512], [3, 3, 512, 1024]] with tf.variable_scope('B5_1'): model['B5_1'] = nn.ResUnit_hybrid_dilate_2conv(model['B4_6'], feed_dict, shape_dict['B5_1'], var_dict=var_dict) # B5_2, B5_3: [H/8, W/8, 1024] # Shape for B5_2, B5_3 shape_dict['B5_2_3'] = [[3, 3, 1024, 512], [3, 3, 512, 1024]] for i in range(2): with tf.variable_scope('B5_' + str(i + 2)): model['B5_' + str(i + 2)] = nn.ResUnit_full_dilate_2convs( model['B5_' + str(i + 1)], feed_dict, shape_dict['B5_2_3'], var_dict=var_dict) # B6: [H/8, W/8, 1024] -> [H/8, W/8, 2048] shape_dict['B6'] = [[1, 1, 1024, 512], [3, 3, 512, 1024], [1, 1, 1024, 2048]] with tf.variable_scope('B6'): model['B6'] = nn.ResUnit_hybrid_dilate_3conv(model['B5_3'], feed_dict, shape_dict['B6'], dropout=dropout, var_dict=var_dict) # B7: [H/8, W/8, 2048] -> [H/8, W/8, 4096] shape_dict['B7'] = [[1, 1, 2048, 1024], [3, 3, 1024, 2048], [1, 1, 2048, 4096]] with tf.variable_scope('B7'): model['B7'] = nn.ResUnit_hybrid_dilate_3conv(model['B6'], feed_dict, shape_dict['B7'], dropout=dropout, var_dict=var_dict) # ResNet tail. shape_dict['Tail'] = [[3, 3, 4096, 512], [3, 3, 512, self._num_classes]] with tf.variable_scope('Tail'): model['Tail'] = nn.ResUnit_tail(model['B7'], feed_dict, shape_dict['Tail'], var_dict) # Upsampling using Bilinear interpolation new_image_size = [1024, 2048] with tf.variable_scope('Bilinear'): model['Bilinear'] = nn.bilinear_upscore_layer( model['Tail'], new_image_size) return model
def _build_model(self, images, is_train=False): ''' :param image: image in RGB format :param is_train: either True or False :return: main output and side supervision ''' model = {} if is_train: dropout = True else: dropout = False if self._data_format == "NCHW": images = tf.transpose(images, [0, 3, 1, 2]) shape_dict = {} shape_dict['B0'] = [3, 3, 3, 64] with tf.variable_scope('main'): # Residual Block B0 with tf.variable_scope('B0'): model['B0'] = nn.conv_layer(self._data_format, images, 1, 'SAME', shape_dict['B0']) # Pooling 1 model['B0_pooled'] = nn.max_pool2d(self._data_format, model['B0'], 2, 'SAME') # Residual Block B1_0, B1_1, B1_2 shape_dict['B1'] = {} shape_dict['B1']['side'] = [1, 1, 64, 128] shape_dict['B1']['convs'] = [[3, 3, 64, 128], [3, 3, 128, 128]] with tf.variable_scope('B1_0'): model['B1_0'] = nn.res_side(self._data_format, model['B0_pooled'], shape_dict['B1'], is_train) for i in range(2): with tf.variable_scope('B1_' + str(i + 1)): model['B1_' + str(i + 1)] = nn.res( self._data_format, model['B1_' + str(i)], shape_dict['B1']['convs'], is_train) # Pooling 2 model['B1_2_pooled'] = nn.max_pool2d(self._data_format, model['B1_2'], 2, 'SAME') # Residual Block B2_0, B2_1, B2_2 shape_dict['B2'] = {} shape_dict['B2']['side'] = [1, 1, 128, 256] shape_dict['B2']['convs'] = [[3, 3, 128, 256], [3, 3, 256, 256]] with tf.variable_scope('B2_0'): model['B2_0'] = nn.res_side(self._data_format, model['B1_2_pooled'], shape_dict['B2'], is_train) for i in range(2): with tf.variable_scope('B2_' + str(i + 1)): model['B2_' + str(i + 1)] = nn.res( self._data_format, model['B2_' + str(i)], shape_dict['B2']['convs'], is_train) # Pooling 3 model['B2_2_pooled'] = nn.max_pool2d(self._data_format, model['B2_2'], 2, 'SAME') # Residual Block B3_0 - B3_5 shape_dict['B3'] = {} shape_dict['B3']['side'] = [1, 1, 256, 512] shape_dict['B3']['convs'] = [[3, 3, 256, 512], [3, 3, 512, 512]] with tf.variable_scope('B3_0'): model['B3_0'] = nn.res_side(self._data_format, model['B2_2_pooled'], shape_dict['B3'], is_train) for i in range(5): with tf.variable_scope('B3_' + str(i + 1)): model['B3_' + str(i + 1)] = nn.res( self._data_format, model['B3_' + str(i)], shape_dict['B3']['convs'], is_train) # Pooling 4 model['B3_5_pooled'] = nn.max_pool2d(self._data_format, model['B3_5'], 2, 'SAME') # Residual Block B4_0, B4_1, B4_2 shape_dict['B4_0'] = {} shape_dict['B4_0']['side'] = [1, 1, 512, 1024] shape_dict['B4_0']['convs'] = [[3, 3, 512, 512], [3, 3, 512, 1024]] with tf.variable_scope('B4_0'): model['B4_0'] = nn.res_side(self._data_format, model['B3_5_pooled'], shape_dict['B4_0'], is_train) shape_dict['B4_23'] = [[3, 3, 1024, 512], [3, 3, 512, 1024]] for i in range(2): with tf.variable_scope('B4_' + str(i + 1)): model['B4_' + str(i + 1)] = nn.res(self._data_format, model['B4_' + str(i)], shape_dict['B4_23'], is_train) # add side conv path and upsample, crop to image size im_size = tf.shape(images) with tf.variable_scope('B1_side_path'): side_2 = nn.conv_layer(self._data_format, model['B1_2'], 1, 'SAME', [3, 3, 128, 16]) side_2 = nn.bias_layer(self._data_format, side_2, [16]) side_2_f = nn.conv_transpose(self._data_format, side_2, [16, 16], 2, 'SAME') side_2_f = nn.crop_features(self._data_format, side_2_f, im_size) with tf.variable_scope('B2_side_path'): side_4 = nn.conv_layer(self._data_format, model['B2_2'], 1, 'SAME', [3, 3, 256, 16]) side_4 = nn.bias_layer(self._data_format, side_4, 16) side_4_f = nn.conv_transpose(self._data_format, side_4, [16, 16], 4, 'SAME') side_4_f = nn.crop_features(self._data_format, side_4_f, im_size) with tf.variable_scope('B3_side_path'): side_8 = nn.conv_layer(self._data_format, model['B3_5'], 1, 'SAME', [3, 3, 512, 16]) side_8 = nn.bias_layer(self._data_format, side_8, 16) side_8_f = nn.conv_transpose(self._data_format, side_8, [16, 16], 8, 'SAME') side_8_f = nn.crop_features(self._data_format, side_8_f, im_size) with tf.variable_scope('B4_side_path'): side_16 = nn.conv_layer(self._data_format, model['B4_2'], 1, 'SAME', [3, 3, 1024, 16]) side_16 = nn.bias_layer(self._data_format, side_16, 16) side_16_f = nn.conv_transpose(self._data_format, side_16, [16, 16], 16, 'SAME') side_16_f = nn.crop_features(self._data_format, side_16_f, im_size) # add side path supervision sup_out = {} with tf.variable_scope('B1_side_sup'): side_2_s = nn.conv_layer(self._data_format, side_2, 1, 'SAME', [1, 1, 16, 2]) side_2_s = nn.bias_layer(self._data_format, side_2_s, [2]) side_2_s = nn.conv_transpose(self._data_format, side_2_s, [2, 2], 2, 'SAME') side_2_s = nn.crop_features(self._data_format, side_2_s, im_size) sup_out['side_2_s'] = side_2_s with tf.variable_scope('B2_side_sup'): side_4_s = nn.conv_layer(self._data_format, side_4, 1, 'SAME', [1, 1, 16, 2]) side_4_s = nn.bias_layer(self._data_format, side_4_s, [2]) side_4_s = nn.conv_transpose(self._data_format, side_4_s, [2, 2], 4, 'SAME') side_4_s = nn.crop_features(self._data_format, side_4_s, im_size) sup_out['side_4_s'] = side_4_s with tf.variable_scope('B3_side_sup'): side_8_s = nn.conv_layer(self._data_format, side_8, 1, 'SAME', [1, 1, 16, 2]) side_8_s = nn.bias_layer(self._data_format, side_8_s, [2]) side_8_s = nn.conv_transpose(self._data_format, side_8_s, [2, 2], 8, 'SAME') side_8_s = nn.crop_features(self._data_format, side_8_s, im_size) sup_out['side_8_s'] = side_8_s with tf.variable_scope('B4_side_sup'): side_16_s = nn.conv_layer(self._data_format, side_16, 1, 'SAME', [1, 1, 16, 2]) side_16_s = nn.bias_layer(self._data_format, side_16_s, [2]) side_16_s = nn.conv_transpose(self._data_format, side_16_s, [2, 2], 16, 'SAME') side_16_s = nn.crop_features(self._data_format, side_16_s, im_size) sup_out['side_16_s'] = side_16_s # concat and linearly fuse if self._data_format == "NCHW": concat_side = tf.concat( [side_2_f, side_4_f, side_8_f, side_16_f], axis=1) else: concat_side = tf.concat( [side_2_f, side_4_f, side_8_f, side_16_f], axis=3) with tf.variable_scope('fuse'): net_out = nn.conv_layer(self._data_format, concat_side, 1, 'SAME', [1, 1, 64, 2]) net_out = nn.bias_layer(self._data_format, net_out, 2) return net_out, sup_out
def _build_model(self, image, is_train=False): '''If is_train, save weight to self._var_dict, otherwise, don't save weights''' model = {} feed_dict = self._weight_dict if is_train: var_dict = self._var_dict else: var_dict = None if is_train: dropout = True else: dropout = False shape_dict = {} shape_dict['B0'] = [3, 3, 3, 64] # B0: [H,W,3] -> [H,W,64] with tf.variable_scope('B0'): model['B0'] = nn.conv_layer(image, feed_dict, 1, 'SAME', shape_dict['B0'], var_dict) # B2_1: [H,W,64] -> [H/2, W/2, 128] shape_dict['B2'] = {} shape_dict['B2']['side'] = [1, 1, 64, 128] shape_dict['B2']['convs'] = [[3, 3, 64, 128], [3, 3, 128, 128]] with tf.variable_scope('B2_1'): model['B2_1'] = nn.ResUnit_downsample_2convs(model['B0'], feed_dict, shape_dict['B2'], var_dict=var_dict) # B2_2, B2_3: [H/2, W/2, 128] for i in range(2): with tf.variable_scope('B2_' + str(i + 2)): model['B2_' + str(i + 2)] = nn.ResUnit_2convs( model['B2_' + str(i + 1)], feed_dict, shape_dict['B2']['convs'][1], var_dict=var_dict) # B3_1: [H/2, W/2, 128] -> [H/4, W/4, 256] shape_dict['B3'] = {} shape_dict['B3']['side'] = [1, 1, 128, 256] shape_dict['B3']['convs'] = [[3, 3, 128, 256], [3, 3, 256, 256]] with tf.variable_scope('B3_1'): model['B3_1'] = nn.ResUnit_downsample_2convs(model['B2_3'], feed_dict, shape_dict['B3'], var_dict=var_dict) # B3_2, B3_3: [H/4, W/4, 256] for i in range(2): with tf.variable_scope('B3_' + str(i + 2)): model['B3_' + str(i + 2)] = nn.ResUnit_2convs( model['B3_' + str(i + 1)], feed_dict, shape_dict['B3']['convs'][1], var_dict=var_dict) # B4_1: [H/4, W/4, 256] -> [H/8, W/8, 512] shape_dict['B4'] = {} shape_dict['B4']['side'] = [1, 1, 256, 512] shape_dict['B4']['convs'] = [[3, 3, 256, 512], [3, 3, 512, 512]] with tf.variable_scope('B4_1'): model['B4_1'] = nn.ResUnit_downsample_2convs(model['B3_3'], feed_dict, shape_dict['B4'], var_dict=var_dict) # B4_2 ~ B4_6: [H/8, W/8, 512] for i in range(5): with tf.variable_scope('B4_' + str(i + 2)): model['B4_' + str(i + 2)] = nn.ResUnit_2convs( model['B4_' + str(i + 1)], feed_dict, shape_dict['B4']['convs'][1], var_dict=var_dict) # B5_1: [H/8, W/8, 512] -> [H/8, W/8, 1024] shape_dict['B5_1'] = {} shape_dict['B5_1']['side'] = [1, 1, 512, 1024] shape_dict['B5_1']['convs'] = [[3, 3, 512, 512], [3, 3, 512, 1024]] with tf.variable_scope('B5_1'): model['B5_1'] = nn.ResUnit_hybrid_dilate_2conv(model['B4_6'], feed_dict, shape_dict['B5_1'], var_dict=var_dict) # B5_2, B5_3: [H/8, W/8, 1024] # Shape for B5_2, B5_3 shape_dict['B5_2_3'] = [[3, 3, 1024, 512], [3, 3, 512, 1024]] for i in range(2): with tf.variable_scope('B5_' + str(i + 2)): model['B5_' + str(i + 2)] = nn.ResUnit_full_dilate_2convs( model['B5_' + str(i + 1)], feed_dict, shape_dict['B5_2_3'], var_dict=var_dict) # B6: [H/8, W/8, 1024] -> [H/8, W/8, 2048] shape_dict['B6'] = [[1, 1, 1024, 512], [3, 3, 512, 1024], [1, 1, 1024, 2048]] with tf.variable_scope('B6'): model['B6'] = nn.ResUnit_hybrid_dilate_3conv(model['B5_3'], feed_dict, shape_dict['B6'], dropout=dropout, var_dict=var_dict) # B7: [H/8, W/8, 2048] -> [H/8, W/8, 4096] shape_dict['B7'] = [[1, 1, 2048, 1024], [3, 3, 1024, 2048], [1, 1, 2048, 4096]] with tf.variable_scope('B7'): model['B7'] = nn.ResUnit_hybrid_dilate_3conv(model['B6'], feed_dict, shape_dict['B7'], dropout=dropout, var_dict=var_dict) # ResNet tail. No conv, only batch_norm + activation shape_dict['Tail'] = 4096 with tf.variable_scope('Tail'): model['Tail'] = nn.ResUnit_tail(model['B7'], feed_dict, shape_dict['Tail'], var_dict) # Global Pooling: [H/8, W/8, 4096] -> [4096] with tf.variable_scope('avg_pool'): model['pool_out'] = nn.Global_avg_pool(model['Tail']) # Fully connected: [4096] -> [10] with tf.variable_scope('FC'): batch_size = tf.shape(model['pool_out'])[0] model['fc_out'] = nn.FC(model['pool_out'], batch_size, feed_dict, self._num_classes, var_dict) return model