Ejemplo n.º 1
0
def resnet_50(input_image):
    arg_scope = resnet_v1.resnet_arg_scope()
    with slim.arg_scope(arg_scope):
        features, _ = resnet_v1.resnet_v1_50(input_image)
        # feature flatten
        features = tf.squeeze(features)
    return features
Ejemplo n.º 2
0
def top_feature_net(input, anchors, inds_inside, num_bases):
  stride=8
  with tf.variable_scope("top_base") as sc:
    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope) :
      net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
      #pdb.set_trace()
      block=end_points['top_base/resnet_v1_50/block4']
      # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
      tf.summary.histogram('rpn_top_block', block) 
      # tf.summary.histogram('rpn_top_block_weights', tf.get_collection('2/conv_weight')[0])
    with tf.variable_scope('top') as scope:
      #up     = upsample2d(block, factor = 2, has_bias=True, trainable=True, name='1')
      #up     = block
      up      = conv2d_bn_relu(block, num_kernels=128, kernel_size=(3,3), stride=[1,1,1,1], padding='SAME', name='2')
      scores  = conv2d(up, num_kernels=2*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='score')
      probs   = tf.nn.softmax( tf.reshape(scores,[-1,2]), name='prob')
      deltas  = conv2d(up, num_kernels=4*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='delta')

    #<todo> flip to train and test mode nms (e.g. different nms_pre_topn values): use tf.cond
    with tf.variable_scope('top-nms') as scope:    #non-max
      batch_size, img_height, img_width, img_channel = input.get_shape().as_list()
      img_scale = 1
      # pdb.set_trace()
      rois, roi_scores = tf_rpn_nms( probs, deltas, anchors, inds_inside,
                                       stride, img_width, img_height, img_scale,
                                       nms_thresh=0.7, min_size=stride, nms_pre_topn=300, nms_post_topn=50,
                                       name ='nms')
  
    #<todo> feature = upsample2d(block, factor = 4,  ...)
    feature = block
def network_resnet_v1_50():
    input_shape = [1, 224, 224, 3]
    input_ = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape)
    net, _end_points = resnet_v1_50(input_,
                                    num_classes=1000,
                                    is_training=False)
    return net
Ejemplo n.º 4
0
    def build_model(self, inp, mode, regularizer=None):
        net = inp['img']

        training = (mode == tf.estimator.ModeKeys.TRAIN)

        with tf.variable_scope('encode'):
            with slim.arg_scope(
                    resnet_v1.resnet_arg_scope(
                        weight_decay=self.config_dict['ext']
                        ['encoder_l2_decay'])):
                net, _ = resnet_v1.resnet_v1_50(net,
                                                num_classes=None,
                                                is_training=training,
                                                global_pool=True)

        with tf.variable_scope('classify'):
            # net = tf.layers.max_pooling2d(net, net.shape.as_list()[1], 1)
            # net = tf.layers.conv2d(net, 1024, 1, kernel_regularizer=regularizer)
            net = tf.layers.conv2d(net,
                                   self.config_dict['label_cnt'],
                                   1,
                                   kernel_regularizer=regularizer)
            logits = tf.squeeze(net, axis=(1, 2))

        return logits
Ejemplo n.º 5
0
def res50():
    image = tf.placeholder(tf.float32, [None, 224, 224, 3], 'image')
    with slim.arg_scope(resnet_arg_scope(is_training=False)):
        net_conv, end_point = resnet_v1.resnet_v1_50(image,
                                                     global_pool=True,
                                                     is_training=False)
    return net_conv, image
Ejemplo n.º 6
0
def run_model(total_gpu_num):
    """Train model."""
    with epl.replicate(total_gpu_num):
        iterator = get_mock_iterator()
        images, labels = iterator.get_next()
        features = resnet_v1.resnet_v1_50(images,
                                          num_classes=None,
                                          is_training=True)[0]
        features = tf.squeeze(features, [1, 2])

    with epl.split(total_gpu_num):
        logits = tf.layers.dense(features, class_num)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)

    global_step = tf.train.get_or_create_global_step()
    optimizer = tf.train.AdamOptimizer(learning_rate=0.9)
    train_op = optimizer.minimize(loss, global_step=global_step)

    hooks = [tf.train.StopAtStepHook(last_step=20)]
    with tf.train.MonitoredTrainingSession(hooks=hooks) as sess:
        while not sess.should_stop():
            starttime = time.time()
            _, _, step = sess.run([loss, train_op, global_step])
            endtime = time.time()
            tf.logging.info("[Iteration {} ], Time: {:.4} .".format(
                step, endtime - starttime))
    tf.logging.info("[Finished]")
Ejemplo n.º 7
0
    def getCNNFeatures(self, input_tensor, out_dim, fc_initializer):
        graph = tf.Graph()

        with graph.as_default():

            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(input_tensor,
                                                         num_classes=None)
        model_path = os.path.join(self.checkpoints_dir, self.ckpt_name)
        init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
            model_path, slim.get_model_variables('resnet_v1'))
        flattened = tf.reshape(end_points["resnet_v1_50/block4"], [-1, fc_dim])
        print flattened.get_shape()
        with vs.variable_scope('fc_resnet'):
            W = vs.get_variable("W", [fc_dim, out_dim],
                                initializer=fc_initializer)
            b = vs.get_variable("b", [out_dim], initializer=fc_initializer)
            output = tf.nn.relu(tf.matmul(flattened, W) + b)

        return init_fn, output


#TEST:
# cnn_f_extractor = CNN_FeatureExtractor()
# inputt = tf.constant(np.arange(12288, dtype=np.float32), shape=[1, 64, 64, 3])
# inputfn, features = cnn_f_extractor.getCNNFeatures(inputt, 256, tf.contrib.layers.variance_scaling_initializer())
# print features.get_shape()
Ejemplo n.º 8
0
def tower_loss(scope):
    images, labels = read_and_decode()
    if net == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes)
    elif net == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes)
    elif net == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v2_50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    else:
        raise Exception('No network matched with net %s.' % net)
    assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes)
    _ = cal_loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    for l in losses + [total_loss]:
        loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)
    return total_loss
Ejemplo n.º 9
0
def get_slim_resnet_v1_byname(net_name,
                              inputs,
                              num_classes=None,
                              is_training=True,
                              global_pool=True,
                              output_stride=None,
                              weight_decay=0.):
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs=inputs,
                num_classes=num_classes,
                is_training=is_training,
                global_pool=global_pool,
                output_stride=output_stride,
            )

        return logits, end_points
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs=inputs,
                num_classes=num_classes,
                is_training=is_training,
                global_pool=global_pool,
                output_stride=output_stride,
            )
        return logits, end_points
Ejemplo n.º 10
0
 def build_graph(self):
     with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
         logits, end_point = resnet_v1.resnet_v1_50(
             self.input, num_classes=self.num_classes, scope='resnet_v1_50')
         # logits [-1,1,1,dim]  全局池化
         dim = logits.get_shape()[-1]
         assert dim == self.num_classes
         self.logits = tf.reshape(logits, [-1, dim])
Ejemplo n.º 11
0
 def testing_network(self, image):
     _, endpoints = resnet_v1.resnet_v1_50(image,
                                           num_classes=702,
                                           is_training=False,
                                           global_pool=True,
                                           output_stride=None,
                                           reuse=tf.AUTO_REUSE,
                                           scope='resnet_v1_50')
     return endpoints
Ejemplo n.º 12
0
 def network_test(self, inputs):
     net, end_points = resnet_v1.resnet_v1_50(inputs,
                                              num_classes=self.n_labels,
                                              is_training=False,
                                              global_pool=True,
                                              output_stride=None,
                                              reuse=tf.AUTO_REUSE,
                                              scope="resnet_v1_50")
     return net, end_points
Ejemplo n.º 13
0
 def inference(self):
     x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]])
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         logits, end_points = resnet_v1.resnet_v1_50(x, num_classes=self.nclasses, is_training=self.is_training
         #    , spatial_squeeze=True
             , global_pool=True
             )
     # remove in the future if squeeze build in resnet_v1 function
     net = array_ops.squeeze(logits, [1,2], name='SpatialSqueeze')
     return net
Ejemplo n.º 14
0
def rgb_feature_net(input):

    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
      net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
      block=end_points['resnet_v1_50/block4']
      # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
    #<todo> feature = upsample2d(block, factor = 4,  ...)
      tf.summary.histogram('rgb_top_block', block)
    feature = block
    return feature
Ejemplo n.º 15
0
 def _vision(preprocessed_inputs, reuse=True):
     with tf.variable_scope("vision", reuse=reuse):
         with slim.arg_scope(resnet_v1.resnet_arg_scope()):
             resnet_output, _ = resnet_v1.resnet_v1_50(
                 preprocessed_inputs, is_training=True)
         if not config["fine_tune_vision"]:
             resnet_output = tf.stop_gradient(resnet_output)
         resnet_output = tf.squeeze(resnet_output, axis=[1, 2])
         resnet_output = tf.nn.dropout(
             resnet_output, keep_prob=self.vision_keep_prob_ph)
         vision_result = slim.fully_connected(resnet_output,
                                              num_hidden_hyper,
                                              activation_fn=None)
     return vision_result, resnet_output
Ejemplo n.º 16
0
    def teacher(self, x, j):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            x = utils.nchw_to_nhwc(x)
            batch_out, batch_list = resnet_v1.resnet_v1_50(x, 1000, is_training=True)
            feature = batch_list['resnet_v1_50/block2/unit_4/bottleneck_v1/conv1']

        self.init_fn_1 = slim.assign_from_checkpoint_fn(
            self.pre_dir + '/resnet_v1_50.ckpt', slim.get_model_variables('resnet_v1_50'))
        '''
        del which has no gradient
        '''
        # print(batch_list)
        x = utils.nhwc_to_nchw(feature)
        x, var = vnect(x, j)
        return x, var
Ejemplo n.º 17
0
def batch_pred(models_path, images_list, labels_nums, data_format):

    [batch_size, resize_height, resize_width, depths] = data_format
    input_images = tf.placeholder(
        dtype=tf.float32,
        shape=[None, resize_height, resize_width, depths],
        name='input')

    # model
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        out, end_points = resnet_v1.resnet_v1_50(inputs=input_images,
                                                 num_classes=labels_nums,
                                                 is_training=False)

    out = tf.squeeze(out, [1, 2])

    score = tf.nn.softmax(out, name='pre')
    class_id = tf.argmax(score, 1)

    gpu_options = tf.GPUOptions(allow_growth=False)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess, models_path)
        tot = len(images_list)

        for idx in range(0, tot, batch_size):
            images = list()
            idx_end = min(tot, idx + batch_size)
            print(idx)
            for i in range(idx, idx_end):
                image_path = images_list[i]
                image = open(image_path, 'rb').read()
                image = tf.image.decode_jpeg(image, channels=3)
                processed_image = preprocess_image(image, resize_height,
                                                   resize_width)
                processed_image = sess.run(processed_image)
                # print("processed_image.shape", processed_image.shape)
                images.append(processed_image)
            images = np.array(images)
            start = time.time()
            sess.run([score, class_id], feed_dict={input_images: images})
            end = time.time()
            print("time of batch {} is %f".format(batch_size) % (end - start))

    sess.close()
    def build(self):
        # Input
        self.input = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]])
        self.input_mean = tfutils.mean_value(self.input, self.img_mean)
        if self.base_net == 'vgg16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                outputs, end_points = vgg.vgg_16(self.input_mean,
                                                 self.num_classes)
                self.prob = tf.nn.softmax(outputs, -1)
                self.logits = outputs

        elif self.base_net == 'res50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_101(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res152':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_152(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        else:
            raise ValueError(
                'base network should be vgg16, res50, -101, -152...')
        self.gt = tf.placeholder(dtype=tf.int32, shape=[None])
        # self.var_list = tf.trainable_variables()

        if self.is_train:
            self.loss()
Ejemplo n.º 19
0
def main():
    tf.reset_default_graph()

    input_node = tf.placeholder(tf.float32,
                                shape=(1, 224, 224, 3),
                                name="input")
    print("input_node:", input_node)

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, _ = resnet_v1.resnet_v1_50(input_node, 1000, is_training=False)
        print("net:", net)

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, model_path)

        tf.train.write_graph(sess.graph_def, './pb_model', 'model.pb')

        freeze_graph.freeze_graph('pb_model/model.pb', '', False, model_path,
                                  'resnet_v1_50/logits/BiasAdd',
                                  'save/restore_all', 'save/Const:0',
                                  'pb_model/frozen_resnet_v1_50.pb', False, "")

    print("done")
Ejemplo n.º 20
0
def get_model(input_pls,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=224,
              FLAGS=None):

    if FLAGS.act == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.act == "elu":
        activation_fn = tf.nn.elu

    input_imgs = input_pls['imgs']
    input_pnts = input_pls['pnts']
    input_gvfs = input_pls['gvfs']
    input_onedge = input_pls['onedge']
    input_trans_mat = input_pls['trans_mats']
    input_obj_rot_mats = input_pls['obj_rot_mats']

    batch_size = input_imgs.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['pnts'] = input_pnts
    if FLAGS.rot:
        end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats)
        end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats)
    else:
        end_points['gt_gvfs_xyz'] = input_gvfs  #* 10
        end_points['pnts_rot'] = input_pnts
    if FLAGS.edgeweight != 1.0:
        end_points['onedge'] = input_onedge
    input_pnts_rot = end_points['pnts_rot']
    end_points['imgs'] = input_imgs  # B*H*W*3|4

    # Image extract features
    if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size:
        if FLAGS.alpha:
            ref_img_rgb = tf.compat.v1.image.resize_bilinear(
                input_imgs[:, :, :, :3], [img_size, img_size])
            ref_img_alpha = tf.image.resize_nearest_neighbor(
                tf.expand_dims(input_imgs[:, :, :, 3], axis=-1),
                [img_size, img_size])
            ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1)
        else:
            ref_img = tf.compat.v1.image.resize_bilinear(
                input_imgs, [img_size, img_size])
    else:
        ref_img = input_imgs
    end_points['resized_ref_img'] = ref_img
    if FLAGS.encoder[:6] == "vgg_16":
        vgg.vgg_16.default_image_size = img_size
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(FLAGS.wd)):
            ref_feats_embedding, encdr_end_points = vgg.vgg_16(
                ref_img,
                num_classes=FLAGS.num_classes,
                is_training=False,
                scope='vgg_16',
                spatial_squeeze=False)
    elif FLAGS.encoder == "sim_res":
        ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder(
            ref_img,
            FLAGS.batch_size,
            is_training=is_training,
            activation_fn=activation_fn,
            bn=bn,
            bn_decay=bn_decay,
            wd=FLAGS.wd)
    elif FLAGS.encoder == "resnet_v1_50":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_50')
        scopelst = [
            "resnet_v1_50/block1", "resnet_v1_50/block2",
            "resnet_v1_50/block3", 'resnet_v1_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v1_101":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_101')
        scopelst = [
            "resnet_v1_101/block1", "resnet_v1_101/block2",
            "resnet_v1_101/block3", 'resnet_v1_101/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_50":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_50')
        scopelst = [
            "resnet_v2_50/block1", "resnet_v2_50/block2",
            "resnet_v2_50/block3", 'resnet_v2_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_101":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_101')
        scopelst = [
            "resnet_v2_101/block1", "resnet_v2_101/block2",
            "resnet_v2_101/block3", 'resnet_v2_101/block4'
        ]
    end_points['img_embedding'] = ref_feats_embedding
    point_img_feat = None
    gvfs_feat = None
    sample_img_points = get_img_points(input_pnts,
                                       input_trans_mat)  # B * N * 2

    if FLAGS.img_feat_onestream:
        with tf.compat.v1.variable_scope("sdfimgfeat") as scope:
            if FLAGS.encoder[:3] == "vgg":
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv1/conv1_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv2/conv2_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv3/conv3_3'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                if FLAGS.encoder[-7:] != "smaller":
                    conv4 = tf.compat.v1.image.resize_bilinear(
                        encdr_end_points['vgg_16/conv4/conv4_3'],
                        (FLAGS.img_h, FLAGS.img_w))
                    point_conv4 = tf.contrib.resampler.resampler(
                        conv4, sample_img_points)
                    point_img_feat = tf.concat(axis=2,
                                               values=[
                                                   point_conv1, point_conv2,
                                                   point_conv3, point_conv4
                                               ])  # small
                else:
                    print("smaller vgg")
                    point_img_feat = tf.concat(
                        axis=2, values=[point_conv1, point_conv2,
                                        point_conv3])  # small
            elif FLAGS.encoder[:3] == "res":
                # print(encdr_end_points.keys())
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            else:
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            print("point_img_feat.shape", point_img_feat.get_shape())
            point_img_feat = tf.expand_dims(point_img_feat, axis=2)
            if FLAGS.decoder == "att":
                gvfs_feat = gvfnet.get_gvf_att_imgfeat(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            elif FLAGS.decoder == "skip":
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            else:
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
    else:
        if not FLAGS.multi_view:
            with tf.compat.v1.variable_scope("sdfprediction") as scope:
                gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot,
                                                 ref_feats_embedding,
                                                 is_training,
                                                 batch_size,
                                                 bn,
                                                 bn_decay,
                                                 wd=FLAGS.wd,
                                                 activation_fn=activation_fn)
    end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[
        'pred_gvfs_direction'] = None, None, None
    if FLAGS.XYZ:
        end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead(
            gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn)
        end_points['pred_gvfs_dist'] = tf.sqrt(
            tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']),
                          axis=2,
                          keepdims=True))
        end_points[
            'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum(
                end_points['pred_gvfs_dist'], 1e-6)
    else:
        end_points['pred_gvfs_dist'], end_points[
            'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead(
                gvfs_feat,
                batch_size,
                wd=FLAGS.wd,
                activation_fn=activation_fn)
        end_points['pred_gvfs_xyz'] = end_points[
            'pred_gvfs_direction'] * end_points['pred_gvfs_dist']

    end_points["sample_img_points"] = sample_img_points
    # end_points["ref_feats_embedding"] = ref_feats_embedding
    end_points["point_img_feat"] = point_img_feat

    return end_points
parser.add_argument('--weights', default="model.cktp",
                    type=str)  # define the model path
parser.add_argument('--weight_dir',
                    default='./Affwild_models/standard_ResNet/',
                    type=str)  # define the model path
parser.add_argument('--input_file', default='video_T_01.csv',
                    type=str)  # define the input image path
parser.add_argument('--save_file', default='video_T_01.mat',
                    type=str)  # define the path to save extracted features
args = parser.parse_args()

images_batch = tf.placeholder(tf.float32, [1, 96, 96, 3])

with slim.arg_scope(resnet_v1.resnet_arg_scope()):
    net, end_point = resnet_v1.resnet_v1_50(inputs=images_batch,
                                            is_training=False,
                                            num_classes=None)
net = tf.squeeze(net, [1, 2])
saver = tf.train.Saver()
sess = tf.Session()
weight_file = os.path.join(args.weight_dir, args.weights)
saver.restore(sess, weight_file)
files = pd.read_csv(args.input_file)
files = files.values
feature_list = []
for file_path in tqdm(files):
    file_path = file_path[0].strip()
    image = cv2.imread(file_path)
    inputs = cv2.resize(image, (96, 96))
    inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB).astype(np.float32)
    inputs -= 128.0
Ejemplo n.º 22
0
def model_fn(features, labels, mode, params):   
        
    # Download the pretrained model
    bucket_name = params['bucket_name']
    prefix_name = params['prefix_name']
    s3 = boto3.resource('s3')
    try:
        s3.Bucket(bucket_name).download_file(prefix_name, 'resnet.ckpt')
        print("Pretrained model is downloaded.")
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise
    
    
    # Input Layer
    input_layer = tf.reshape(features[INPUT_TENSOR_NAME], [-1, 32, 32, 3])
    
    # Load Pretrained model
    from  tensorflow.contrib.slim.python.slim.nets.resnet_v1 import resnet_v1_50
    last_layer = resnet_v1_50(input_layer, num_classes=None, scope='resnet_v1_50')
    variables_to_restore = tf.contrib.slim.get_variables_to_restore()
    tf.train.init_from_checkpoint("./resnet.ckpt",{v.name.split(':')[0]: v for v in variables_to_restore if not 'biases' in v.name})
    logits =  tf.reshape(tf.layers.dense(inputs=last_layer[0], units=100), [-1, 100])



    # Define operations
    if mode in (Modes.PREDICT, Modes.EVAL):
        predicted_indices = tf.argmax(input=logits, axis=1)
        probabilities = tf.nn.softmax(logits, name='softmax_tensor')

    if mode in (Modes.TRAIN, Modes.EVAL):
        global_step = tf.train.get_or_create_global_step()
        label_indices = tf.cast(labels, tf.int32)
        loss = tf.losses.softmax_cross_entropy(
            onehot_labels=tf.one_hot(label_indices, depth=100), logits=logits)
            
        tf.summary.scalar('OptimizeLoss', loss)

    if mode == Modes.PREDICT:
        predictions = {
            'classes': predicted_indices,
            'probabilities': probabilities
        }
        export_outputs = {
            SIGNATURE_NAME: tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(
            mode, predictions=predictions, export_outputs=export_outputs)

    if mode == Modes.TRAIN:
        logging_hook = tf.train.LoggingTensorHook({"loss" : loss}, every_n_iter=10)
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks = [logging_hook])

    if mode == Modes.EVAL:
        eval_metric_ops = {
            'accuracy': tf.metrics.accuracy(label_indices, predicted_indices)
        }
        return tf.estimator.EstimatorSpec(
            mode, loss=loss, eval_metric_ops=eval_metric_ops)
Ejemplo n.º 23
0
def evaluate():
    g = tf.Graph()
    with g.as_default():

        image_list, label_list = data_process.read_labeled_image_list(
            FLAGS.input_file)
        # split into sequences, note: in the cnn models case this is splitting into batches of length: seq_length ;
        # for the cnn-rnn models case, I do not check whether the images in a sequence are consecutive or the images are from the same video/the images are displaying the same person
        image_list, label_list = data_process.make_rnn_input_per_seq_length_size(
            image_list, label_list, FLAGS.seq_length)

        images = tf.convert_to_tensor(image_list)
        labels = tf.convert_to_tensor(label_list)

        # Makes an input queue
        input_queue = tf.train.slice_input_producer([images, labels, images],
                                                    num_epochs=None,
                                                    shuffle=False,
                                                    seed=None,
                                                    capacity=1000,
                                                    shared_name=None,
                                                    name=None)
        images_batch, labels_batch, image_locations_batch = data_process.decodeRGB(
            input_queue, FLAGS.seq_length, FLAGS.size)

        images_batch = tf.to_float(images_batch)
        images_batch -= 128.0
        images_batch /= 128.0  # scale all pixel values in range: [-1,1]

        images_batch = tf.reshape(images_batch, [-1, 96, 96, 3])
        labels_batch = tf.reshape(labels_batch, [-1, 2])

        if FLAGS.network == 'vggface_4096':
            from vggface import vggface_4096x4096x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'vggface_2000':
            from vggface import vggface_4096x2000x2 as net
            network = net.VGGFace(FLAGS.batch_size * FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        elif FLAGS.network == 'affwildnet_resnet':
            from tensorflow.contrib.slim.python.slim.nets import resnet_v1
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, _ = resnet_v1.resnet_v1_50(inputs=images_batch,
                                                is_training=False,
                                                num_classes=None)

                with tf.variable_scope('rnn') as scope:
                    cnn = tf.reshape(
                        net, [FLAGS.batch_size, FLAGS.sequence_length, -1])
                    cell = tf.nn.rnn_cell.MultiRNNCell(
                        [tf.nn.rnn_cell.GRUCell(128) for _ in range(2)])
                    outputs, _ = tf.nn.dynamic_rnn(cell, cnn, dtype=tf.float32)
                    outputs = tf.reshape(
                        outputs,
                        (FLAGS.batch_size * FLAGS.sequence_length, 128))

                    weights_initializer = tf.truncated_normal_initializer(
                        stddev=0.01)
                    weights = tf.get_variable('weights_output',
                                              shape=[128, 2],
                                              initializer=weights_initializer,
                                              trainable=True)
                    biases = tf.get_variable('biases_output',
                                             shape=[2],
                                             initializer=tf.zeros_initializer,
                                             trainable=True)

                    prediction = tf.nn.xw_plus_b(outputs, weights, biases)

        elif FLAGS.network == 'affwildnet_vggface':
            from affwildnet import vggface_gru as net
            network = net.VGGFace(FLAGS.batch_size, FLAGS.seq_length)
            network.setup(images_batch)
            prediction = network.get_output()

        num_batches = int(len(image_list) / FLAGS.batch_size)

        variables_to_restore = tf.global_variables()

        with tf.Session() as sess:

            init_fn = slim.assign_from_checkpoint_fn(
                FLAGS.pretrained_model_checkpoint_path,
                variables_to_restore,
                ignore_missing_vars=False)

            init_fn(sess)
            print('Loading model {}'.format(
                FLAGS.pretrained_model_checkpoint_path))

            tf.train.start_queue_runners(sess=sess)

            coord = tf.train.Coordinator()

            evaluated_predictions = []
            evaluated_labels = []
            images = []

            try:
                for _ in range(num_batches):

                    pr, l, imm = sess.run(
                        [prediction, labels_batch, image_locations_batch])
                    evaluated_predictions.append(pr)
                    evaluated_labels.append(l)
                    images.append(imm)

                    if coord.should_stop():
                        break
                coord.request_stop()
            except Exception as e:
                coord.request_stop(e)

            predictions = np.reshape(evaluated_predictions, (-1, 2))
            labels = np.reshape(evaluated_labels, (-1, 2))
            images = np.reshape(images, (-1))

            conc_arousal = concordance_cc2(predictions[:, 1], labels[:, 1])
            conc_valence = concordance_cc2(predictions[:, 0], labels[:, 0])

            for i in range(len(predictions)):
                print("Labels: ", labels[i], "Predictions: ", predictions[i],
                      "Error: ", (abs(labels[i] - predictions[i])))
            print(
                "------------------------------------------------------------------------------"
            )
            print('Concordance on valence : {}'.format(conc_valence))
            print('Concordance on arousal : {}'.format(conc_arousal))
            print('Concordance on total : {}'.format(
                (conc_arousal + conc_valence) / 2))

            mse_arousal = sum(
                (predictions[:, 1] - labels[:, 1])**2) / len(labels[:, 1])
            print('MSE Arousal : {}'.format(mse_arousal))
            mse_valence = sum(
                (predictions[:, 0] - labels[:, 0])**2) / len(labels[:, 0])
            print('MSE Valence : {}'.format(mse_valence))

        return conc_valence, conc_arousal, (
            conc_arousal + conc_valence) / 2, mse_arousal, mse_valence
Ejemplo n.º 24
0
def get_featuremap(net_name, input, num_classes=None):
    '''
    #tensorlayer
    input = tl.layers.InputLayer(input)
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_50,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_50'
                                                 )
            sv = tf.train.Supervisor()
            with sv.managed_session() as sess:
                a = sess.run(featuremap.all_layers)
                print(a)
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_101,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_101'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_152':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_152,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_152'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'vgg16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=vgg.vgg_16,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'spatial_squeeze': False
                                                 },
                                                 name='vgg_16'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    '''

    #slim
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_50(
                inputs=input,
                num_classes=num_classes,
                is_training=False,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_50/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_50/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_50/block3/unit_5/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_50/block4']  # [7, 7]
            }
            return feature_maps_dict
        return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1']
        #return featuremap

    if net_name == 'resnet_v1_101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_101(
                inputs=input,
                num_classes=num_classes,
                is_training=True,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_101/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_101/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_101/block3/unit_22/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_101/block4']
            }
            return feature_maps_dict
        return featuremap

    if net_name == 'vgg_16':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = vgg.vgg_16(
                inputs=input,
                num_classes=7,
                is_training=False,
                spatial_squeeze=False,
            )

        return layer_dic['vgg_16/conv5/conv5_3']
def main(_):

    with tf.name_scope('input_placeholder'):
        mv_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'mv_frame')
        flow_placeholder = tf.placeholder(tf.float32, 
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'flow_frame')
        i_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'i_frame')
        r_placeholder = tf.placeholder(tf.float32,
                    shape=(None, FLAGS.num_segments, 224, 224, 3 ), name = 'r_frame')

    with tf.name_scope('label_placeholder'):
        label_placeholder = tf.placeholder(tf.int32, shape=(None), name = 'labels')

    with tf.name_scope('accuracy'):
        combine_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        i_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        mv_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        r_value_ = tf.placeholder(tf.float32, shape=(), name = 'accuracy')
        tf.summary.scalar('combine_acc', combine_value_)
        tf.summary.scalar('i_acc', i_value_)
        tf.summary.scalar('mv_acc', mv_value_)
        tf.summary.scalar('r_acc', r_value_)
        
    print('Finish placeholder.')


    with tf.name_scope('flatten_input'):
        b_size = tf.shape(mv_placeholder)[0]
        flat_mv = tf.reshape(mv_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3]) # Since we have mulitple segments in a single video
        flat_flow = tf.reshape(flow_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_i = tf.reshape(i_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])
        flat_r = tf.reshape(r_placeholder, [b_size * FLAGS.num_segments, 224, 224, 3])

    with tf.variable_scope('fc_var') as var_scope:
        mv_weights = {
            'w1': _variable_with_weight_decay('wmv1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wmv2', [512 , N_CLASS], 0.0005)
        }
        mv_biases = {
            'b1': _variable_with_weight_decay('bmv1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bmv2', [ N_CLASS ], 0.00)
        }
        i_weights = {
            'w1': _variable_with_weight_decay('wi1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wi2', [512 , N_CLASS], 0.0005)
        }
        i_biases = {
            'b1': _variable_with_weight_decay('bi1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('bi2', [ N_CLASS ], 0.00)
        }
        r_weights = {
            'w1': _variable_with_weight_decay('wr1', [2048 , 512 ], 0.0005),
            'w2': _variable_with_weight_decay('wr2', [512 , N_CLASS], 0.0005)
        }
        r_biases = {
            'b1': _variable_with_weight_decay('br1', [ 512 ], 0.00),
            'b2': _variable_with_weight_decay('br2', [ N_CLASS ], 0.00)
        }

    with tf.variable_scope('fusion_var'):
        fusion = tf.get_variable('fusion', [3], initializer=tf.contrib.layers.xavier_initializer())
    
    print('Finish Flatten.')
    
    with tf.device('/gpu:0'):

        with tf.name_scope('FLMG'):
            mv_res = tf.concat([flat_mv, flat_r], axis = -1)
            mv = slim.conv2d(mv_res, 8, kernel_size=[3, 3], scope = 'FLMG_1')
            mv = slim.conv2d(mv, 8, kernel_size=[3, 3], scope = 'FLMG_2')
            mv = slim.conv2d(mv, 6, kernel_size=[3, 3], scope = 'FLMG_3')
            mv = slim.conv2d(mv, 4, kernel_size=[3, 3], scope = 'FLMG_4')
            mv = slim.conv2d(mv, 2, kernel_size=[3, 3], scope = 'FLMG_5')
            mv = slim.conv2d(mv, 3, kernel_size=[3, 3], scope = 'FLMG_6')

        with tf.name_scope('FLMG_LOSS'):
            # The cost function -- l2 mse
            matrix_pow_2 = tf.pow(tf.subtract(mv, flat_flow), 2)
            matrix_norm = tf.reduce_sum(matrix_pow_2, axis = [1,2,3])

            flmg_loss = tf.reduce_mean(matrix_norm)
            tf.summary.scalar('flmg_loss', flmg_loss)

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            i_feature, _ = resnet_v1.resnet_v1_152(flat_i, num_classes=None, is_training=True, scope='i_resnet')
            mv_feature, _ = resnet_v1.resnet_v1_50(mv, num_classes=None, is_training=True, scope='mv_resnet')
            r_feature, _ = resnet_v1.resnet_v1_50(flat_r, num_classes=None, is_training=True, scope='r_resnet')


        with tf.name_scope('reshape_feature'):
            i_feature = tf.reshape(i_feature, [-1, 2048])
            mv_feature = tf.reshape(mv_feature, [-1, 2048])
            r_feature = tf.reshape(r_feature, [-1, 2048])


        with tf.name_scope('inference_model'):

            i_sc, i_pred = model.inference_feature (i_feature, i_weights, i_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'i_inf')

            mv_sc, mv_pred = model.inference_feature (mv_feature, mv_weights, mv_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'mv_inf')

            r_sc, r_pred = model.inference_feature (r_feature, r_weights, r_biases,
                                                      FLAGS.num_segments, N_CLASS, name = 'r_inf')

            combine_sc, pred_class = model.inference_fusion ( i_sc, mv_sc, r_sc, fusion)

    print('Finish Model.')
    
    with tf.name_scope('classiciation_loss'):
        one_hot_labels = tf.one_hot(label_placeholder, N_CLASS)
        mv_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = mv_sc, labels = one_hot_labels, dim=1))
        i_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = i_sc, labels = one_hot_labels, dim=1))
        r_class_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = r_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('mv_cls_loss', mv_class_loss) 
        tf.summary.scalar('i_cls_loss', i_class_loss) 
        tf.summary.scalar('r_cls_loss', r_class_loss)

        combine_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = combine_sc, labels = one_hot_labels, dim=1))
        tf.summary.scalar('fuse_cls_loss', combine_loss)

        total_loss = combine_loss + i_class_loss + mv_class_loss + r_class_loss + flmg_loss
        tf.summary.scalar('tot_cls_loss', total_loss)


    with tf.name_scope('weigh_decay'):
        weight_loss = sum(tf.get_collection('losses'))
        tf.summary.scalar('eight_decay_loss', weight_loss)

    '''
    with tf.name_scope('training_var_list'):
        mv_variable_list = list ( set(mv_weights.values()) | set(mv_biases.values()) )
        mv_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='mv_resnet')
        i_variable_list = list ( set(i_weights.values()) | set(i_biases.values()) )
        i_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='i_resnet')
        r_variable_list = list ( set(r_weights.values()) | set(r_biases.values()) )
        r_resnet_variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='r_resnet')
    
    with tf.name_scope('summary_var'):
        _variable_summaries(mv_weights['w1'])
        _variable_summaries(i_weights['w2'])
        _variable_summaries(r_weights['w2'])
        _variable_summaries(mv_resnet_variables[0])
        _variable_summaries(i_resnet_variables[0])
        _variable_summaries(r_resnet_variables[0])
        _variable_summaries(fusion)
        
    print('Finish variables.')
    '''
    
    with tf.name_scope('optimizer'):
        '''
        mv_fc_opt = tf.train.AdamOptimizer(FLAGS.mv_lr).minimize(mv_class_loss + weight_loss, var_list = mv_variable_list)
        mv_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(mv_class_loss, var_list = mv_resnet_variables)
        i_fc_opt = tf.train.AdamOptimizer(FLAGS.i_lr).minimize(i_class_loss + weight_loss, var_list = i_variable_list)
        i_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(i_class_loss, var_list = i_resnet_variables)
        r_fc_opt = tf.train.AdamOptimizer(FLAGS.r_lr).minimize(r_class_loss + weight_loss, var_list = r_variable_list)
        r_res_opt = tf.train.AdamOptimizer(FLAGS.resnet_lr).minimize(r_class_loss, var_list = r_resnet_variables)
        fusion_opt = tf.train.GradientDescentOptimizer(10e-6).minimize(combine_loss, var_list = fusion)
        '''

        train_opt = tf.train.AdamOptimizer(FLAGS.tot_lr).minimize(total_loss, var_list = tf.trainable_variables())



    print('Finish Optimizer.')
    
    with tf.name_scope('init_function'):
        init_var = tf.global_variables_initializer()
    
    
    
    with tf.name_scope('video_dataset'):
        train_data = dataset.buildTrainDataset_v2(FLAGS.train_list, FLAGS.data_path, FLAGS.num_segments,
                                                  batch_size = FLAGS.batch_size, augment = False,
                                                  shuffle = True, num_threads=2, buffer=100)
        test_data = dataset.buildTestDataset(FLAGS.valid_list, FLAGS.data_path, FLAGS.num_segments, 
                                             batch_size = FLAGS.batch_size, num_threads = 2, buffer = 30)
        
    
        with tf.name_scope('dataset_iterator'):
            it = tf.contrib.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes)
            next_data = it.get_next()
            init_data = it.make_initializer(train_data)
            it_test = tf.contrib.data.Iterator.from_structure(test_data.output_types, test_data.output_shapes)
            next_test_data = it_test.get_next()
            init_test_data = it_test.make_initializer(test_data)
            
            
    print('Finish Dataset.')

    restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name)]

    first_restore_var = [v for v in tf.trainable_variables() if ('Adam'  not in v.name and 'FLMG' not in v.name)]
    first_saver = tf.train.Saver(var_list=first_restore_var)

    my_saver = tf.train.Saver(var_list=restore_var, max_to_keep=5)

    
    config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    sess = tf.Session(config=config)
    
    with tf.name_scope('writer'):

        merged = tf.summary.merge_all()
        if not tf.gfile.Exists(FLAGS.log_path):
            tf.gfile.MakeDirs(FLAGS.log_path)
        previous_runs = os.listdir(FLAGS.log_path)
        if len(previous_runs) == 0:
            run_number = 1
        else:
            run_number = len(previous_runs) + 1
        logdir = 'run_%02d' % run_number
        tf.gfile.MakeDirs(os.path.join(FLAGS.log_path, logdir))
        writer = tf.summary.FileWriter(os.path.join(FLAGS.log_path, logdir), sess.graph)

    with tf.name_scope('saver'):

        if not tf.gfile.Exists(FLAGS.save_path):
            tf.gfile.MakeDirs(FLAGS.save_path)
        '''
        i_saver = tf.train.Saver(i_variable_list)
        mv_saver = tf.train.Saver(mv_variable_list)
        r_saver = tf.train.Saver(r_variable_list)
        i_resnet_saver = tf.train.Saver(i_resnet_variables)
        mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
        r_resnet_saver = tf.train.Saver(r_resnet_variables)
        '''

    with tf.name_scope('intialization'):
        sess.run(init_var)
        sess.run(init_data)
        sess.run(init_test_data)

        #init_i_resent (sess)
        #init_mv_resent (sess)
        #init_r_resent(sess)
        '''
        i_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_model.chkp'+FLAGS.steps))
        mv_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_model.chkp'+FLAGS.steps))
        r_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_model.chkp'+FLAGS.steps))
        i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
        mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
        r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
        '''
        try:
            my_saver.restore(sess, FLAGS.continue_training)
        except:
            # First train
            first_saver.restore(sess, FLAGS.continue_training)

            '''
            i_resnet_saver = tf.train.Saver(i_resnet_variables)
            mv_resnet_saver = tf.train.Saver(mv_resnet_variables)
            r_resnet_saver = tf.train.Saver(r_resnet_variables)
            i_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'i_resnet.chkp'+FLAGS.steps))
            mv_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'mv_resnet.chkp'+FLAGS.steps))
            r_resnet_saver.restore(sess, os.path.join(FLAGS.saved_model_path, 'r_resnet.chkp'+FLAGS.steps))
            '''

        print('Finish Loading Pretrained Model.')

        
    '''
    Main training loop
    '''
    combine_acc = 0
    i_acc = 0
    mv_acc = 0
    r_acc = 0
    start_time = time.time()
    for step in range(FLAGS.max_steps):
        # Validation
        
        
        if (step) % 1000 == 0 and step > 0:
            combine_classes = []
            mv_classes = []
            i_classes = []
            r_classes = []
            gt_label = []
            

            for i in range(100):
                ti_arr, tmv_arr, tr_arr, tlabel = sess.run(next_test_data)
                i_class, mv_class, r_class, com_class = sess.run([i_pred, mv_pred, r_pred, pred_class], 
                                    feed_dict={mv_placeholder: tmv_arr, i_placeholder: ti_arr,
                                               r_placeholder: tr_arr , label_placeholder : tlabel })
                combine_classes = np.append(combine_classes, com_class)
                mv_classes = np.append(mv_classes, mv_class)
                i_classes = np.append(i_classes, i_class)
                r_classes = np.append(r_classes, r_class)
                gt_label = np.append(gt_label, tlabel)
            
            combine_acc = np.sum((combine_classes == gt_label)) / gt_label.size
            i_acc = np.sum((i_classes == gt_label)) / gt_label.size
            mv_acc = np.sum((mv_classes == gt_label)) / gt_label.size
            r_acc = np.sum((r_classes == gt_label)) / gt_label.size

            print('Step %d finished with accuracy: %f , %f , %f, %f' % (step, i_acc, mv_acc, r_acc, combine_acc))
        
        
        # Training procedure
        i_arr, mv_arr, r_arr, flow_arr, label = sess.run(next_data)
        summary, _, pred, loss1, loss2, loss3, loss4, loss5 = sess.run([merged, train_opt, pred_class, mv_class_loss, i_class_loss, r_class_loss, combine_loss, flmg_loss],
                                    feed_dict={mv_placeholder: mv_arr, i_placeholder: i_arr,
                                                flow_placeholder: flow_arr,
                                               r_placeholder: r_arr , label_placeholder : label,
                                               combine_value_: combine_acc, i_value_ : i_acc,  
                                               mv_value_: mv_acc, r_value_ : r_acc})

        if (step) % 10 == 0 :
            duration = time.time() - start_time
            print('Step %d: %.3f sec' % (step, duration), 'mv_loss:', loss1,  'i_loss:', loss2,  'r_loss:', loss3, 'fusion_loss:', loss4, 'flmg_loss:', loss5)
            print('GT:', label)
            print('Pred:', pred)

            writer.add_summary(summary, step)
            start_time = time.time()

        # Model Saving 

        if (step) % 1000 == 0 and not step == 0 :
            '''
            i_saver.save(sess, os.path.join(FLAGS.save_path, 'i_model.chkp'), global_step = step)
            mv_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_model.chkp'), global_step = step)
            r_saver.save(sess, os.path.join(FLAGS.save_path, 'r_model.chkp'), global_step = step)

            i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
            mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
            r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)
            '''

            my_saver.save(sess, os.path.join(FLAGS.save_path, 'all_net.chkp'), global_step = step)

        #if (step) % 10000 == 0 and not step == 0 :
        #    i_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'i_resnet.chkp'), global_step = step)
        #    mv_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'mv_resnet.chkp'), global_step = step)
        #    r_resnet_saver.save(sess, os.path.join(FLAGS.save_path, 'r_resnet.chkp'), global_step = step)

    
    writer.close()
Ejemplo n.º 26
0
    def build_model(self):
        """
        :return:
        """
        
        """
        Helper Variables
        """
        #self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step')
        #self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1)
        self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch')
        self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1)
        
        """
        Inputs to the network
        """
        with tf.variable_scope('inputs'):
            self.x, self.y, self.bi = self.data_loader.get_input()
            self.is_training = tf.placeholder(tf.bool, name='Training_flag')
        tf.add_to_collection('inputs', self.x)
        tf.add_to_collection('inputs', self.y)
        tf.add_to_collection('inputs', self.bi)
        tf.add_to_collection('inputs', self.is_training)

        """
        Network Architecture
        """
        
        with tf.variable_scope('network'):
            self.logits, end_points = resnet_v1.resnet_v1_50(inputs = self.x, num_classes = self.num_classes)
            self.logits = tf.squeeze(self.logits, axis=[1,2])
            
            with tf.variable_scope('out'):
                #self.out = tf.squeeze(end_points['predictions'], axis=[1,2])
                self.out = tf.nn.softmax(self.logits, dim=-1)
            
            tf.add_to_collection('out', self.out)
            
            print("Logits shape: ", self.logits.shape)
            print("predictions out shape: ", self.out.shape)
            
            print("network output argmax resnet")
            with tf.variable_scope('out_argmax'):
                self.out_argmax = tf.argmax(self.logits, axis=-1, output_type=tf.int64, name='out_argmax')
                #self.out_argmax = tf.squeeze(tf.argmax(self.out, 1), axis=[1])
                
                print("Arg Max Shape: ", self.out_argmax.shape)

        with tf.variable_scope('loss-acc'):
            #one_hot_y = tf.one_hot(indices=self.y, depth=self.num_classes)
            
            self.loss = tf.losses.sparse_softmax_cross_entropy(labels = self.y, logits = self.logits)

            #probabilities = end_points['Predictions']

            #accuracy, accuracy_update = tf.metrics.accuracy(labels = one_hot_y, predictions = self.out_argmax)
            
            #self.acc = tf.reduce_mean(tf.cast(tf.equal(self.y, self.out_argmax), tf.float32))
            self.acc = self.evaluate_accuracy(self.y, self.out_argmax,
                                              self.is_training, self.config.patch_count)

        with tf.variable_scope('train_step'):
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                self.train_step = self.optimizer.minimize(self.loss, global_step=self.global_step_tensor)

        tf.add_to_collection('train', self.train_step)
        tf.add_to_collection('train', self.loss)
        tf.add_to_collection('train', self.acc)
Ejemplo n.º 27
0
def top_feature_net(input, anchors, inds_inside, num_bases):
    stride = 8
    arg_scope = resnet_v1.resnet_arg_scope(is_training=True)
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v1.resnet_v1_50(input,
                                                 None,
                                                 global_pool=False,
                                                 output_stride=16)
        block4 = end_points['resnet_v1_50/block4/unit_3/bottleneck_v1']
        block3 = end_points['resnet_v1_50/block3/unit_5/bottleneck_v1']
        block2 = end_points['resnet_v1_50/block2/unit_3/bottleneck_v1']
        tf.summary.histogram('top_block4', block4)
        tf.summary.histogram('top_block3', block3)
        tf.summary.histogram('top_block2', block2)
    with tf.variable_scope("top_up") as sc:
        block4_ = conv2d_relu(block4,
                              num_kernels=256,
                              kernel_size=(1, 1),
                              stride=[1, 1, 1, 1],
                              padding='SAME',
                              name='4')
        up_shape = tf.shape(block2)
        up4 = tf.image.resize_bilinear(block4_, [up_shape[1], up_shape[2]],
                                       name='up4')
        block3_ = conv2d_relu(block3,
                              num_kernels=256,
                              kernel_size=(1, 1),
                              stride=[1, 1, 1, 1],
                              padding='SAME',
                              name='3')
        up3 = tf.image.resize_bilinear(block3_, [up_shape[1], up_shape[2]],
                                       name='up3')
        block2_ = conv2d_relu(block2,
                              num_kernels=256,
                              kernel_size=(1, 1),
                              stride=[1, 1, 1, 1],
                              padding='SAME',
                              name='2')
        # up2     = upsample2d(block2_, factor = 2, has_bias=True, trainable=True, name='up2')
        up_34 = tf.add(up4, up3, name="up_add_3_4")
        up = tf.add(up_34, block2_, name="up_add_3_4_2")
        block = conv2d_relu(up,
                            num_kernels=256,
                            kernel_size=(3, 3),
                            stride=[1, 1, 1, 1],
                            padding='SAME',
                            name='rgb_ft')
    with tf.variable_scope('rpn_top') as scope:
        up = conv2d_relu(block,
                         num_kernels=256,
                         kernel_size=(3, 3),
                         stride=[1, 1, 1, 1],
                         padding='SAME',
                         name='2')
        scores = conv2d(up,
                        num_kernels=2 * num_bases,
                        kernel_size=(1, 1),
                        stride=[1, 1, 1, 1],
                        padding='SAME',
                        name='score')
        probs = tf.nn.softmax(tf.reshape(scores, [-1, 2]), name='prob')
        deltas = conv2d(up,
                        num_kernels=4 * num_bases,
                        kernel_size=(1, 1),
                        stride=[1, 1, 1, 1],
                        padding='SAME',
                        name='delta')
        deltasZ = conv2d(up,
                         num_kernels=2 * num_bases,
                         kernel_size=(1, 1),
                         stride=[1, 1, 1, 1],
                         padding='SAME',
                         name='deltaZ')

    feature = block
    return feature, scores, probs, deltas  #, rois, roi_scores,deltasZ, proposals_z, inside_inds_nms
Ejemplo n.º 28
0
def rgb_feature_net(input):

    arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    with slim.arg_scope(arg_scope):
        net, end_points = resnet_v1.resnet_v1_50(input,
                                                 None,
                                                 global_pool=False,
                                                 output_stride=8)
        # pdb.set_trace()
        block4 = end_points['resnet_v1_50/block4']
        block3 = end_points['resnet_v1_50/block3']
        block2 = end_points['resnet_v1_50/block2']
        # block1=end_points['resnet_v1_50/block1/unit_3/bottleneck_v1/conv1']
        with tf.variable_scope("rgb_up") as sc:
            block4_ = conv2d_bn_relu(block4,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='4')
            up4 = upsample2d(block4_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up4')
            block3_ = conv2d_bn_relu(block3,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='3')
            up3 = upsample2d(block3_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up3')
            block2_ = conv2d_bn_relu(block2,
                                     num_kernels=256,
                                     kernel_size=(1, 1),
                                     stride=[1, 1, 1, 1],
                                     padding='SAME',
                                     name='2')
            up2 = upsample2d(block2_,
                             factor=2,
                             has_bias=True,
                             trainable=True,
                             name='up2')
            up_34 = tf.add(up4, up3, name="up_add_3_4")
            up = tf.add(up_34, up2, name="up_add_3_4_2")
            block = conv2d_bn_relu(up,
                                   num_kernels=256,
                                   kernel_size=(3, 3),
                                   stride=[1, 1, 1, 1],
                                   padding='SAME',
                                   name='rgb_ft')
            # block1_   = conv2d_bn_relu(block1, num_kernels=256, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='1')
            # up      =tf.add(block1_, up_, name="up_add")
        # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
    #<todo> feature = upsample2d(block, factor = 4,  ...)
        tf.summary.histogram('rgb_top_block', block)

    feature = block
    return feature
Ejemplo n.º 29
0
    #print ("unitest for resnet")
    batch_size = 10
    img_size = 256
    img = cv2.imread(
        '/mnt/ilcompf8d0/user/weiyuewa/sources/pipeline1/tf_neural_renderer/img.png'
    )

    # with tf.Session('') as sess:

    with tf.device('/gpu:0'):
        inputbatch = tf.expand_dims(
            tf.constant(img, dtype=tf.float32),
            axis=0)  #tf.zeros([batch_size, img_size, img_size, 3])

        logits, endpoints = resnet_v1.resnet_v1_50(inputbatch,
                                                   1000,
                                                   is_training=False)

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)
        variables_to_restore = []

        a = [
            name for name, _ in checkpoint_utils.list_variables(
                'pretrained_model/resnet_v1_50.ckpt')
        ]
        # print a
        for var in slim.get_model_variables():
Ejemplo n.º 30
0
def _get_resnet_features(inputs):
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        resnet_v1.resnet_v1_50(inputs, num_classes=None, is_training=True)

    return tf.get_default_graph().get_tensor_by_name(
        'resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0')