def model_InceptionResNet_BLSTM(images, weight_decay=1e-5, is_training=True): ''' 相对不对称的U-Net结构, 并在Inception提出特征后,使用BLSTM提取全局的一个特征 :param images: :param weight_decay: :param is_training: :return: ''' images = mean_image_subtraction(images) logits, end_points = InceptionResNet.model(images, is_training=is_training, weight_decay=weight_decay) print 'end_points is ', end_points print 'end_points key are', end_points.keys() print 'logits is ', logits with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } # 提取Global的特征 # end_points['Scale-5'] = Bilstm(end_points['Scale-5'], d_i=1536, d_h=256, d_o=256, name='Bilstm', # weight_decay=weight_decay) with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['Scale-5'], end_points['Scale-4'], end_points['Scale-3'], end_points['Scale-2'], end_points['Scale-1'], end_points['Scale-0'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None, None, None] h = [None, None, None, None, None, None] num_inputs = [1536, 1088, 320, 192, 64, 64] num_outputs = [None, 128, 64, 32, 32, 32] g_last = 6 for i in range(g_last): if i == 0: h[i] = f[i] else: f[i] = slim.conv2d(f[i], num_outputs=num_outputs[i], kernel_size=1) f[i] = Bilstm(f[i], num_outputs[i], num_outputs[i] * 2, num_outputs[i], name='Bilstm-' + str(i)) c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= g_last - 2: g[i] = unpool(h[i]) if i == 0: continue # g[i] = Bilstm(g[i], num_outputs[i], num_outputs[i] * 2, num_outputs[i], name='Bilstm-' + str(i)) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) print 'g[', str(g_last - 1), '] shape is ', g[g_last - 1] # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map if is_training: if FLAGS.cross_loss: print 'loss: cross_loss' F_score = slim.conv2d(g[g_last - 1], 1, 1, activation_fn=None, normalizer_fn=None) else: print 'loss: IoU loss' F_score = slim.conv2d(g[g_last - 1], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) else: F_score = slim.conv2d(g[g_last - 1], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle input_tensor_for_geo = tf.concat([g[g_last - 1], F_score], axis=-1) input_tensor_for_geo = slim.conv2d(input_tensor_for_geo, 33, kernel_size=3, stride=1, activation_fn=tf.nn.relu, scope='merge_conv1') input_tensor_for_geo = slim.conv2d(input_tensor_for_geo, 33, kernel_size=3, stride=1, activation_fn=tf.nn.relu, scope='merge_conv2') geo_map = slim.conv2d(input_tensor_for_geo, 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(input_tensor_for_geo, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def model_InceptionResNet_symmetry(images, weight_decay=1e-5, is_training=True): ''' 相对不对称的U-Net结构 :param images: :param weight_decay: :param is_training: :return: ''' images = mean_image_subtraction(images) logits, end_points = InceptionResNet.model(images, is_training=is_training, weight_decay=weight_decay) print 'end_points is ', end_points print 'end_points key are', end_points.keys() print 'logits is ', logits with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['Scale-5'], end_points['Scale-4'], end_points['Scale-3'], end_points['Scale-2'], end_points['Scale-1'], end_points['Scale-0'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) # Scale-5: output_channel = 256 h = end_points['Scale-5'] h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-5-2']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-5-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-5-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = unpool(h) # Scale-4 output_channel = 128 h = slim.conv2d(tf.concat([g, end_points['Scale-4']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-4-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = unpool(h) # Scale-3 output_channel = 64 h = slim.conv2d(tf.concat([g, end_points['Scale-3']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-3-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-3-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = unpool(h) # Scale-2 output_channel = 32 h = slim.conv2d(tf.concat([g, end_points['Scale-2']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-2-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-2-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = unpool(h) # Scale-1 output_channel = 32 h = slim.conv2d(tf.concat([g, end_points['Scale-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-1-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-1-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = unpool(h) # Scale-1 output_channel = 32 h = slim.conv2d(tf.concat([g, end_points['Scale-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-0-1']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) h = slim.conv2d(tf.concat([h, end_points['Scale-0-0']], axis=-1), output_channel, 1) h = slim.conv2d(h, output_channel, kernel_size=3) g = slim.conv2d(h, output_channel, 3) if is_training: if FLAGS.cross_loss: print 'loss: cross_loss' F_score = slim.conv2d(g, 1, 1, activation_fn=None, normalizer_fn=None) else: print 'loss: IoU loss' F_score = slim.conv2d(g, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) else: F_score = slim.conv2d(g, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle input_tensor_for_geo = tf.concat([g, F_score], axis=-1) input_tensor_for_geo = slim.conv2d(input_tensor_for_geo, 33, kernel_size=3, stride=1, activation_fn=tf.nn.relu, scope='merge_conv1') input_tensor_for_geo = slim.conv2d(input_tensor_for_geo, 33, kernel_size=3, stride=1, activation_fn=tf.nn.relu, scope='merge_conv2') geo_map = slim.conv2d(input_tensor_for_geo, 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(input_tensor_for_geo, 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry