Beispiel #1
0
 def __init__(self, tensor, keep_prob=1.0, num_classes=1001, retrain_layer=[], weights_path='./weights/resnet_v2_101.ckpt'):
     # Call the parent class
     Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer, weights_path)
     
     # Create the Graph
     is_training = True if retrain_layer else False
     with slim.arg_scope(resnet_arg_scope()):
         self.final, self.endpoints = resnet_v2_101(
             self.tensor,
             num_classes=num_classes,
             is_training=is_training,
             global_pool=True # True: both height_out and width_out equal one
         )
Beispiel #2
0
    def __init__(self,
                 tensor,
                 keep_prob=1.0,
                 num_classes=1001,
                 retrain_layer=[],
                 weights_path='./weights/resnet_v2_101.ckpt'):
        # Call the parent class
        Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer,
                       weights_path)

        # TODO This implementation has a problem while validation (is still set to training)
        is_training = True if retrain_layer else False
        with slim.arg_scope(resnet_arg_scope()):
            self.final, self.endpoints = resnet_v2_101(self.tensor,
                                                       num_classes=num_classes,
                                                       is_training=is_training)
Beispiel #3
0
    def __call__(self, inputs):

        inputs = ((inputs / 255.0) - 0.5) * 2.0

        with tf.contrib.slim.arg_scope(resnet_arg_scope()):

            image_features, end_points = resnet_v2_101(
                inputs,
                num_classes=self.num_classes,
                is_training=self.is_training,
                global_pool=self.global_pool,
                output_stride=self.output_stride,
                reuse=self.reuse,
                scope=self.scope)
            self.reuse = True

        return image_features
Beispiel #4
0
def get_model(input_pls,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=224,
              FLAGS=None):

    if FLAGS.act == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.act == "elu":
        activation_fn = tf.nn.elu

    input_imgs = input_pls['imgs']
    input_pnts = input_pls['pnts']
    input_gvfs = input_pls['gvfs']
    input_onedge = input_pls['onedge']
    input_trans_mat = input_pls['trans_mats']
    input_obj_rot_mats = input_pls['obj_rot_mats']

    batch_size = input_imgs.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['pnts'] = input_pnts
    if FLAGS.rot:
        end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats)
        end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats)
    else:
        end_points['gt_gvfs_xyz'] = input_gvfs  #* 10
        end_points['pnts_rot'] = input_pnts
    if FLAGS.edgeweight != 1.0:
        end_points['onedge'] = input_onedge
    input_pnts_rot = end_points['pnts_rot']
    end_points['imgs'] = input_imgs  # B*H*W*3|4

    # Image extract features
    if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size:
        if FLAGS.alpha:
            ref_img_rgb = tf.compat.v1.image.resize_bilinear(
                input_imgs[:, :, :, :3], [img_size, img_size])
            ref_img_alpha = tf.image.resize_nearest_neighbor(
                tf.expand_dims(input_imgs[:, :, :, 3], axis=-1),
                [img_size, img_size])
            ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1)
        else:
            ref_img = tf.compat.v1.image.resize_bilinear(
                input_imgs, [img_size, img_size])
    else:
        ref_img = input_imgs
    end_points['resized_ref_img'] = ref_img
    if FLAGS.encoder[:6] == "vgg_16":
        vgg.vgg_16.default_image_size = img_size
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(FLAGS.wd)):
            ref_feats_embedding, encdr_end_points = vgg.vgg_16(
                ref_img,
                num_classes=FLAGS.num_classes,
                is_training=False,
                scope='vgg_16',
                spatial_squeeze=False)
    elif FLAGS.encoder == "sim_res":
        ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder(
            ref_img,
            FLAGS.batch_size,
            is_training=is_training,
            activation_fn=activation_fn,
            bn=bn,
            bn_decay=bn_decay,
            wd=FLAGS.wd)
    elif FLAGS.encoder == "resnet_v1_50":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_50')
        scopelst = [
            "resnet_v1_50/block1", "resnet_v1_50/block2",
            "resnet_v1_50/block3", 'resnet_v1_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v1_101":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_101')
        scopelst = [
            "resnet_v1_101/block1", "resnet_v1_101/block2",
            "resnet_v1_101/block3", 'resnet_v1_101/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_50":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_50')
        scopelst = [
            "resnet_v2_50/block1", "resnet_v2_50/block2",
            "resnet_v2_50/block3", 'resnet_v2_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_101":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_101')
        scopelst = [
            "resnet_v2_101/block1", "resnet_v2_101/block2",
            "resnet_v2_101/block3", 'resnet_v2_101/block4'
        ]
    end_points['img_embedding'] = ref_feats_embedding
    point_img_feat = None
    gvfs_feat = None
    sample_img_points = get_img_points(input_pnts,
                                       input_trans_mat)  # B * N * 2

    if FLAGS.img_feat_onestream:
        with tf.compat.v1.variable_scope("sdfimgfeat") as scope:
            if FLAGS.encoder[:3] == "vgg":
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv1/conv1_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv2/conv2_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv3/conv3_3'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                if FLAGS.encoder[-7:] != "smaller":
                    conv4 = tf.compat.v1.image.resize_bilinear(
                        encdr_end_points['vgg_16/conv4/conv4_3'],
                        (FLAGS.img_h, FLAGS.img_w))
                    point_conv4 = tf.contrib.resampler.resampler(
                        conv4, sample_img_points)
                    point_img_feat = tf.concat(axis=2,
                                               values=[
                                                   point_conv1, point_conv2,
                                                   point_conv3, point_conv4
                                               ])  # small
                else:
                    print("smaller vgg")
                    point_img_feat = tf.concat(
                        axis=2, values=[point_conv1, point_conv2,
                                        point_conv3])  # small
            elif FLAGS.encoder[:3] == "res":
                # print(encdr_end_points.keys())
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            else:
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            print("point_img_feat.shape", point_img_feat.get_shape())
            point_img_feat = tf.expand_dims(point_img_feat, axis=2)
            if FLAGS.decoder == "att":
                gvfs_feat = gvfnet.get_gvf_att_imgfeat(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            elif FLAGS.decoder == "skip":
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            else:
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
    else:
        if not FLAGS.multi_view:
            with tf.compat.v1.variable_scope("sdfprediction") as scope:
                gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot,
                                                 ref_feats_embedding,
                                                 is_training,
                                                 batch_size,
                                                 bn,
                                                 bn_decay,
                                                 wd=FLAGS.wd,
                                                 activation_fn=activation_fn)
    end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[
        'pred_gvfs_direction'] = None, None, None
    if FLAGS.XYZ:
        end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead(
            gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn)
        end_points['pred_gvfs_dist'] = tf.sqrt(
            tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']),
                          axis=2,
                          keepdims=True))
        end_points[
            'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum(
                end_points['pred_gvfs_dist'], 1e-6)
    else:
        end_points['pred_gvfs_dist'], end_points[
            'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead(
                gvfs_feat,
                batch_size,
                wd=FLAGS.wd,
                activation_fn=activation_fn)
        end_points['pred_gvfs_xyz'] = end_points[
            'pred_gvfs_direction'] * end_points['pred_gvfs_dist']

    end_points["sample_img_points"] = sample_img_points
    # end_points["ref_feats_embedding"] = ref_feats_embedding
    end_points["point_img_feat"] = point_img_feat

    return end_points
                   shape=(None, shape1, shape2, 4),
                   name='x_features')
y_ = tf.placeholder(tf.int64, shape=[None, kind_num], name='y_')
keep_prob = tf.placeholder("float", name='keep_prob')
training = tf.placeholder(tf.bool, name='training')

n_batch = len(data.train_indices) // batch_size

# prelogits, end_points = network_incep.inference(x, keep_prob,
#                                           phase_train=training , bottleneck_layer_size=128,
#                                           weight_decay=0.0)

#logits = densenet.densenet_inference(x, training, keep_prob)

# prelogits, end_points = resnet.resnet_v1_101(x , is_training=training)
prelogits, end_points = resnet_v2.resnet_v2_101(x, is_training=training)
prelogits = tf.layers.flatten(prelogits)
#logits = densenet.densenet_inference(x, training, keep_prob)

logits = slim.fully_connected(
    prelogits,
    kind_num,
    activation_fn=None,
    weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
    # weights_regularizer=slim.l2_regularizer(0.0),
    scope='Logits',
    reuse=False)

#logits = tf.identity(logits, 'logits')
y_conv = tf.nn.softmax(logits, name='Softmax')
Beispiel #6
0
def model(inputs):

    batch_size, height, width = config.BATCH_SIZE, config.IMAGE_SHAPE[
        0], config.IMAGE_SHAPE[1]

    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        #net, end_points = resnet_v2.resnet_v2_101(inputs, 1001, is_training=False)
        net, end_points = resnet_v2.resnet_v2_101(
            inputs,
            64,
            is_training=True,
            global_pool=False,
            output_stride=config.OUTPUT_STRIDE)
    # print(net)
    kp_maps = tf.contrib.layers.conv2d(net,
                                       num_outputs=config.NUM_KP,
                                       kernel_size=(1, 1),
                                       activation_fn=tf.nn.sigmoid,
                                       stride=1,
                                       scope='kp_maps')
    short_offsets = tf.contrib.layers.conv2d(net,
                                             num_outputs=2 * config.NUM_KP,
                                             kernel_size=(1, 1),
                                             activation_fn=None,
                                             stride=1,
                                             scope='short_offsets')
    mid_offsets = tf.contrib.layers.conv2d(net,
                                           num_outputs=4 * config.NUM_EDGES,
                                           kernel_size=(1, 1),
                                           activation_fn=None,
                                           stride=1,
                                           scope='mid_offsets')
    long_offsets = tf.contrib.layers.conv2d(net,
                                            num_outputs=2 * config.NUM_KP,
                                            kernel_size=(1, 1),
                                            activation_fn=None,
                                            stride=1,
                                            scope='long_offsets')
    seg_mask = tf.contrib.layers.conv2d(net,
                                        num_outputs=1,
                                        kernel_size=(1, 1),
                                        activation_fn=tf.nn.sigmoid,
                                        stride=1,
                                        scope='seg_mask')

    kp_maps = tf.image.resize_bilinear(kp_maps, (height, width),
                                       align_corners=True)
    short_offsets = tf.image.resize_bilinear(short_offsets, (height, width),
                                             align_corners=True)
    mid_offsets = tf.image.resize_bilinear(mid_offsets, (height, width),
                                           align_corners=True)
    long_offsets = tf.image.resize_bilinear(long_offsets, (height, width),
                                            align_corners=True)
    seg_mask = tf.image.resize_bilinear(seg_mask, (height, width),
                                        align_corners=True)
    '''
    with tf.name_scope('kp_maps_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, config.NUM_KP, config.NUM_KP]))
        kp_maps = tf.nn.conv2d_transpose(kp_maps, wt, [batch_size, height, width, config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('short_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP]))
        short_offsets = tf.nn.conv2d_transpose(short_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('mid_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 4*config.NUM_EDGES, 4*config.NUM_EDGES]))
        mid_offsets = tf.nn.conv2d_transpose(mid_offsets, wt, [batch_size, height, width, 4*config.NUM_EDGES], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('long_offsets_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP]))
        long_offsets = tf.nn.conv2d_transpose(long_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME')
    
    with tf.name_scope('seg_mask_deconv') as scope:
        wt = tf.Variable(tf.truncated_normal([9, 9, 1, 1]))
        seg_mask = tf.nn.conv2d_transpose(seg_mask, wt, [batch_size, height, width, 1], [1, 8, 8, 1], 'SAME')
    '''
    mid_offsets = split_and_refine_mid_offsets(mid_offsets, short_offsets)
    long_offsets = split_and_refine_long_offsets(long_offsets, short_offsets)
    outputs = [kp_maps, short_offsets, mid_offsets, long_offsets, seg_mask]
    return outputs
Beispiel #7
0
    def build_model(self):
        # 默认参数
        FILTER_SIZE = (5, 5)
        Z_DIM = 2048
        STRIDE = (2, 2)
        DEPTHS = [64, 128, 256, 256, 128, 32]
        CHANNELS = 4
        N_CLASS = self.config.nclass

        def encoder(input, z_dim=Z_DIM, is_training=False):
            net = conv2d_BN(input,
                            DEPTHS[0],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_1',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            net = conv2d_BN(net,
                            DEPTHS[1],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_2',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            net = conv2d_BN(net,
                            DEPTHS[2],
                            FILTER_SIZE,
                            is_training,
                            stride=STRIDE,
                            name='conv_3',
                            kernel_initializer=tf.truncated_normal_initializer(
                                stddev=0.01))
            z = tf.layers.dense(
                tf.layers.flatten(net),
                z_dim,
                kernel_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            z = tf.nn.relu(tf.layers.batch_normalization(z,
                                                         training=is_training),
                           name='enc')
            return z

        self.x = tf.placeholder(tf.float32,
                                shape=[None] + self.config.input_shape,
                                name="input")
        self.y = tf.placeholder(tf.int32, shape=[None], name="label")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        # network architecture
        batch_norm_decay = 0.997 if self.config.get(
            'bn_decay') == None else self.config.bn_decay
        output_stride = self.config.get('output_stride')
        if self.config.model == "resnet":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                net, end_points = resnet_v2.resnet_v2_50(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v2_50/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits, "pred")
        elif self.config.model == "resnet_101":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                pred, end_points = resnet_v2.resnet_v2_101(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v2_101/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits)
        elif self.config.model == "resnet_v1_50":
            with slim.arg_scope(
                    resnet_v2.resnet_arg_scope(
                        batch_norm_decay=batch_norm_decay)):
                pred, end_points = resnet_v1.resnet_v1_50(
                    self.x,
                    N_CLASS,
                    is_training=self.is_training,
                    output_stride=output_stride)
                logits = tf.squeeze(end_points["resnet_v1_50/logits"],
                                    axis=[1, 2])
            pred = tf.nn.softmax(logits)
        else:
            z = encoder(self.x, z_dim=Z_DIM, is_training=self.is_training)
            logits = tf.layers.dense(
                z,
                N_CLASS,
                kernel_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            pred = tf.nn.softmax(logits)

        self.pred = pred
        with tf.name_scope("loss"):

            self.loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(
                labels=self.y, logits=logits),
                                       name='cross_entropy')

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            print(update_ops)
            with tf.control_dependencies(update_ops):
                self.train_op = tf.train.AdamOptimizer(
                    self.config.learning_rate).minimize(
                        self.loss, global_step=self.global_step_tensor)
            correct_prediction = tf.equal(
                tf.argmax(pred, 1, output_type=tf.int32), self.y)
            self.acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
Beispiel #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @ File ResNetDemo.py
# @ Description :
# @ Author alexchung
# @ Time 21/1/2019 09:52

import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import resnet_v2



images = tf.Variable(initial_value=tf.random_uniform(shape=(5, 299, 299, 3), minval=0, maxval=3), dtype=tf.float32)
num_classes = tf.constant(value=5, dtype=tf.int32)
# is_training = True

if __name__ == "__main__":

    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    with tf.Session() as sess:
        # images, class_num = sess.run([images, class_num])
        sess.run(init)
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_101(images, num_classes=num_classes.eval(), is_training=True)

        for var in tf.model_variables():
            print(var.name, var.shape)