Esempio n. 1
0
def model_InceptionResNet_BLSTM(images, weight_decay=1e-5, is_training=True):
    '''
    相对不对称的U-Net结构, 并在Inception提出特征后,使用BLSTM提取全局的一个特征
    :param images:
    :param weight_decay:
    :param is_training:
    :return:
    '''
    images = mean_image_subtraction(images)
    logits, end_points = InceptionResNet.model(images,
                                               is_training=is_training,
                                               weight_decay=weight_decay)
    print 'end_points is ', end_points
    print 'end_points key are', end_points.keys()
    print 'logits is ', logits
    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        # 提取Global的特征
        # end_points['Scale-5'] = Bilstm(end_points['Scale-5'], d_i=1536, d_h=256, d_o=256, name='Bilstm',
        #                                weight_decay=weight_decay)
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['Scale-5'], end_points['Scale-4'],
                end_points['Scale-3'], end_points['Scale-2'],
                end_points['Scale-1'], end_points['Scale-0']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None, None, None]
            h = [None, None, None, None, None, None]
            num_inputs = [1536, 1088, 320, 192, 64, 64]
            num_outputs = [None, 128, 64, 32, 32, 32]
            g_last = 6
            for i in range(g_last):
                if i == 0:
                    h[i] = f[i]
                else:
                    f[i] = slim.conv2d(f[i],
                                       num_outputs=num_outputs[i],
                                       kernel_size=1)
                    f[i] = Bilstm(f[i],
                                  num_outputs[i],
                                  num_outputs[i] * 2,
                                  num_outputs[i],
                                  name='Bilstm-' + str(i))
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= g_last - 2:
                    g[i] = unpool(h[i])
                    if i == 0:
                        continue
                    # g[i] = Bilstm(g[i], num_outputs[i], num_outputs[i] * 2, num_outputs[i], name='Bilstm-' + str(i))
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(
                    i, h[i].shape, i, g[i].shape))
            print 'g[', str(g_last - 1), '] shape is ', g[g_last - 1]
            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            if is_training:
                if FLAGS.cross_loss:
                    print 'loss: cross_loss'
                    F_score = slim.conv2d(g[g_last - 1],
                                          1,
                                          1,
                                          activation_fn=None,
                                          normalizer_fn=None)
                else:
                    print 'loss: IoU loss'
                    F_score = slim.conv2d(g[g_last - 1],
                                          1,
                                          1,
                                          activation_fn=tf.nn.sigmoid,
                                          normalizer_fn=None)
            else:
                F_score = slim.conv2d(g[g_last - 1],
                                      1,
                                      1,
                                      activation_fn=tf.nn.sigmoid,
                                      normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            input_tensor_for_geo = tf.concat([g[g_last - 1], F_score], axis=-1)

            input_tensor_for_geo = slim.conv2d(input_tensor_for_geo,
                                               33,
                                               kernel_size=3,
                                               stride=1,
                                               activation_fn=tf.nn.relu,
                                               scope='merge_conv1')
            input_tensor_for_geo = slim.conv2d(input_tensor_for_geo,
                                               33,
                                               kernel_size=3,
                                               stride=1,
                                               activation_fn=tf.nn.relu,
                                               scope='merge_conv2')
            geo_map = slim.conv2d(input_tensor_for_geo,
                                  4,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(input_tensor_for_geo,
                                     1,
                                     1,
                                     activation_fn=tf.nn.sigmoid,
                                     normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
Esempio n. 2
0
def model_InceptionResNet_symmetry(images,
                                   weight_decay=1e-5,
                                   is_training=True):
    '''
    相对不对称的U-Net结构
    :param images:
    :param weight_decay:
    :param is_training:
    :return:
    '''
    images = mean_image_subtraction(images)
    logits, end_points = InceptionResNet.model(images,
                                               is_training=is_training,
                                               weight_decay=weight_decay)
    print 'end_points is ', end_points
    print 'end_points key are', end_points.keys()
    print 'logits is ', logits
    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['Scale-5'], end_points['Scale-4'],
                end_points['Scale-3'], end_points['Scale-2'],
                end_points['Scale-1'], end_points['Scale-0']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))

            # Scale-5:
            output_channel = 256
            h = end_points['Scale-5']
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-5-2']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-5-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-5-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = unpool(h)

            # Scale-4
            output_channel = 128
            h = slim.conv2d(tf.concat([g, end_points['Scale-4']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-4-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = unpool(h)

            # Scale-3
            output_channel = 64
            h = slim.conv2d(tf.concat([g, end_points['Scale-3']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-3-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-3-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = unpool(h)

            # Scale-2
            output_channel = 32
            h = slim.conv2d(tf.concat([g, end_points['Scale-2']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-2-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-2-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = unpool(h)

            # Scale-1
            output_channel = 32
            h = slim.conv2d(tf.concat([g, end_points['Scale-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-1-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-1-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = unpool(h)

            # Scale-1
            output_channel = 32
            h = slim.conv2d(tf.concat([g, end_points['Scale-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-0-1']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            h = slim.conv2d(tf.concat([h, end_points['Scale-0-0']], axis=-1),
                            output_channel, 1)
            h = slim.conv2d(h, output_channel, kernel_size=3)
            g = slim.conv2d(h, output_channel, 3)

            if is_training:
                if FLAGS.cross_loss:
                    print 'loss: cross_loss'
                    F_score = slim.conv2d(g,
                                          1,
                                          1,
                                          activation_fn=None,
                                          normalizer_fn=None)
                else:
                    print 'loss: IoU loss'
                    F_score = slim.conv2d(g,
                                          1,
                                          1,
                                          activation_fn=tf.nn.sigmoid,
                                          normalizer_fn=None)
            else:
                F_score = slim.conv2d(g,
                                      1,
                                      1,
                                      activation_fn=tf.nn.sigmoid,
                                      normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            input_tensor_for_geo = tf.concat([g, F_score], axis=-1)

            input_tensor_for_geo = slim.conv2d(input_tensor_for_geo,
                                               33,
                                               kernel_size=3,
                                               stride=1,
                                               activation_fn=tf.nn.relu,
                                               scope='merge_conv1')
            input_tensor_for_geo = slim.conv2d(input_tensor_for_geo,
                                               33,
                                               kernel_size=3,
                                               stride=1,
                                               activation_fn=tf.nn.relu,
                                               scope='merge_conv2')
            geo_map = slim.conv2d(input_tensor_for_geo,
                                  4,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(input_tensor_for_geo,
                                     1,
                                     1,
                                     activation_fn=tf.nn.sigmoid,
                                     normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry