def tower_loss(scope): images, labels = read_and_decode() if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes) elif net == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v2_50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) else: raise Exception('No network matched with net %s.' % net) assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes) _ = cal_loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def build_2(self, inputs, input_pixel_size, is_training, scope='resnet_v1_101', weight_decay=0.0001): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=None, is_training=is_training, global_pool=False, output_stride=None, spatial_squeeze=False) feature_maps_dict = { 'C2': self.share_net['resnet_v1_101/block1/unit_2/bottleneck_v1'], 'C3': self.share_net['resnet_v1_101/block2/unit_3/bottleneck_v1'], 'C4': self.share_net['resnet_v1_101/block3/unit_22/bottleneck_v1'], 'C5': self.share_net['resnet_v1_101/block4'] } feature_maps_out = feature_maps_dict['C5'] return feature_maps_out, feature_maps_dict
def get_slim_resnet_v1_byname(net_name, inputs, num_classes=None, is_training=True, global_pool=True, output_stride=None, weight_decay=0.): if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=num_classes, is_training=is_training, global_pool=global_pool, output_stride=output_stride, ) return logits, end_points
def Encoder_resnet_v1_101(x, weight_decay, is_training=True, reuse=False): """ Resnet v1-101 encoder, adds 2 fc layers after Resnet. Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool-> True if test Outputs: - net: N x F - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v1 with tf.name_scope("Encoder_resnet_v1_101", [x]): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): net, end_points = resnet_v1.resnet_v1_101(x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v1_101') net = tf.reshape(net, [net.shape.as_list()[0], -1]) variables = tf.contrib.framework.get_variables('resnet_v1_101') return net, variables
def main(_): os.environ["CUDA_VISIBLE_DEVICES"] = FLAGS.gpu_id if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) with tf.Graph().as_default() as g: with open(FLAGS.input_fname, 'r') as f: filenames = [line.split(',')[0][:-4] for line in f.readlines()] filenames = [ os.path.join(FLAGS.image_dir, name) for name in filenames \ if not os.path.exists(os.path.join(FLAGS.output_dir, name + '.npy')) ] filename_queue = tf.train.string_input_producer(filenames) reader = tf.WholeFileReader() key, value = reader.read(filename_queue) image = tf.image.decode_jpeg(value, channels=3) image_size = resnet_v1.resnet_v1.default_image_size processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False ) processed_images, keys = tf.train.batch( [processed_image, key], FLAGS.batch_size, num_threads=8, capacity=8*FLAGS.batch_size*5, allow_smaller_final_batch=True ) # Create the model with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( processed_images, num_classes=1000, is_training=False ) init_fn = slim.assign_from_checkpoint_fn( FLAGS.checkpoint_dir, slim.get_model_variables() ) pool5 = g.get_operation_by_name('resnet_v1_101/pool5').outputs[0] pool5 = tf.transpose(pool5, perm=[0, 3, 1, 2]) # (batch_size, 2048, 1, 1) with tf.Session() as sess: init_fn(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for step in tqdm(range(len(filenames) // FLAGS.batch_size + 1), ncols=70): if coord.should_stop(): break file_names, pool5_value = sess.run([keys, pool5]) for i in range(len(file_names)): np.save(os.path.join(FLAGS.output_dir, os.path.basename(file_names[i]).decode('utf-8') + '.npy'), pool5_value[i].astype(np.float32)) except tf.errors.OutOfRangeError: print("Done feature extraction -- epoch limit reached") finally: coord.request_stop() coord.join(threads)
def build(self): # Input self.input = tf.placeholder( dtype=tf.float32, shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]]) self.input_mean = tfutils.mean_value(self.input, self.img_mean) if self.base_net == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): outputs, end_points = vgg.vgg_16(self.input_mean, self.num_classes) self.prob = tf.nn.softmax(outputs, -1) self.logits = outputs elif self.base_net == 'res50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] elif self.base_net == 'res152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_152( self.input_mean, self.num_classes, is_training=self.is_train) self.prob = tf.nn.softmax(net[:, 0, 0, :], -1) self.logits = net[:, 0, 0, :] else: raise ValueError( 'base network should be vgg16, res50, -101, -152...') self.gt = tf.placeholder(dtype=tf.int32, shape=[None]) # self.var_list = tf.trainable_variables() if self.is_train: self.loss()
def backbone(self): with slim.arg_scope(resnet_v1.resnet_arg_scope()): with slim.arg_scope([slim.conv2d], trainable=False): # output, end_points = resnet_v1.resnet_v1_50(self.inputs, num_classes=cfgs.NUM_CLASS, is_training=self.is_training) output, end_points = resnet_v1.resnet_v1_101( self.inputs, num_classes=None, is_training=self.is_training, global_pool=False) output = slim.conv2d(output, cfgs.NUM_CLASS, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') output = tf.reduce_mean(output, [1, 2], name='global_pool') logits = tf.nn.softmax(output) return output, logits
def get_model(input_pls, is_training, bn=False, bn_decay=None, img_size=224, FLAGS=None): if FLAGS.act == "relu": activation_fn = tf.nn.relu elif FLAGS.act == "elu": activation_fn = tf.nn.elu input_imgs = input_pls['imgs'] input_pnts = input_pls['pnts'] input_gvfs = input_pls['gvfs'] input_onedge = input_pls['onedge'] input_trans_mat = input_pls['trans_mats'] input_obj_rot_mats = input_pls['obj_rot_mats'] batch_size = input_imgs.get_shape()[0].value # endpoints end_points = {} end_points['pnts'] = input_pnts if FLAGS.rot: end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats) end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats) else: end_points['gt_gvfs_xyz'] = input_gvfs #* 10 end_points['pnts_rot'] = input_pnts if FLAGS.edgeweight != 1.0: end_points['onedge'] = input_onedge input_pnts_rot = end_points['pnts_rot'] end_points['imgs'] = input_imgs # B*H*W*3|4 # Image extract features if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size: if FLAGS.alpha: ref_img_rgb = tf.compat.v1.image.resize_bilinear( input_imgs[:, :, :, :3], [img_size, img_size]) ref_img_alpha = tf.image.resize_nearest_neighbor( tf.expand_dims(input_imgs[:, :, :, 3], axis=-1), [img_size, img_size]) ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1) else: ref_img = tf.compat.v1.image.resize_bilinear( input_imgs, [img_size, img_size]) else: ref_img = input_imgs end_points['resized_ref_img'] = ref_img if FLAGS.encoder[:6] == "vgg_16": vgg.vgg_16.default_image_size = img_size with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(FLAGS.wd)): ref_feats_embedding, encdr_end_points = vgg.vgg_16( ref_img, num_classes=FLAGS.num_classes, is_training=False, scope='vgg_16', spatial_squeeze=False) elif FLAGS.encoder == "sim_res": ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder( ref_img, FLAGS.batch_size, is_training=is_training, activation_fn=activation_fn, bn=bn, bn_decay=bn_decay, wd=FLAGS.wd) elif FLAGS.encoder == "resnet_v1_50": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_50') scopelst = [ "resnet_v1_50/block1", "resnet_v1_50/block2", "resnet_v1_50/block3", 'resnet_v1_50/block4' ] elif FLAGS.encoder == "resnet_v1_101": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_101') scopelst = [ "resnet_v1_101/block1", "resnet_v1_101/block2", "resnet_v1_101/block3", 'resnet_v1_101/block4' ] elif FLAGS.encoder == "resnet_v2_50": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_50') scopelst = [ "resnet_v2_50/block1", "resnet_v2_50/block2", "resnet_v2_50/block3", 'resnet_v2_50/block4' ] elif FLAGS.encoder == "resnet_v2_101": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_101') scopelst = [ "resnet_v2_101/block1", "resnet_v2_101/block2", "resnet_v2_101/block3", 'resnet_v2_101/block4' ] end_points['img_embedding'] = ref_feats_embedding point_img_feat = None gvfs_feat = None sample_img_points = get_img_points(input_pnts, input_trans_mat) # B * N * 2 if FLAGS.img_feat_onestream: with tf.compat.v1.variable_scope("sdfimgfeat") as scope: if FLAGS.encoder[:3] == "vgg": conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv1/conv1_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv2/conv2_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv3/conv3_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) if FLAGS.encoder[-7:] != "smaller": conv4 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv4/conv4_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv4 = tf.contrib.resampler.resampler( conv4, sample_img_points) point_img_feat = tf.concat(axis=2, values=[ point_conv1, point_conv2, point_conv3, point_conv4 ]) # small else: print("smaller vgg") point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) # small elif FLAGS.encoder[:3] == "res": # print(encdr_end_points.keys()) conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) else: conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) print("point_img_feat.shape", point_img_feat.get_shape()) point_img_feat = tf.expand_dims(point_img_feat, axis=2) if FLAGS.decoder == "att": gvfs_feat = gvfnet.get_gvf_att_imgfeat( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) elif FLAGS.decoder == "skip": gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: if not FLAGS.multi_view: with tf.compat.v1.variable_scope("sdfprediction") as scope: gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot, ref_feats_embedding, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = None, None, None if FLAGS.XYZ: end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_dist'] = tf.sqrt( tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']), axis=2, keepdims=True)) end_points[ 'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum( end_points['pred_gvfs_dist'], 1e-6) else: end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'] = end_points[ 'pred_gvfs_direction'] * end_points['pred_gvfs_dist'] end_points["sample_img_points"] = sample_img_points # end_points["ref_feats_embedding"] = ref_feats_embedding end_points["point_img_feat"] = point_img_feat return end_points
def get_featuremap(net_name, input, num_classes=None): ''' #tensorlayer input = tl.layers.InputLayer(input) if net_name == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_50, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_50' ) sv = tf.train.Supervisor() with sv.managed_session() as sess: a = sess.run(featuremap.all_layers) print(a) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_101, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_101' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'resnet_v1_152': with slim.arg_scope(resnet_v1.resnet_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=resnet_v1.resnet_v1_152, slim_args={ 'num_classes': num_classes, 'is_training': True, 'global_pool': False }, name='resnet_v1_152' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params if net_name == 'vgg16': with slim.arg_scope(vgg.vgg_arg_scope()): featuremap = tl.layers.SlimNetsLayer(prev_layer=input, slim_layer=vgg.vgg_16, slim_args={ 'num_classes': num_classes, 'is_training': True, 'spatial_squeeze': False }, name='vgg_16' ) feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses()) return featuremap.outputs, feature_w_loss, featuremap.all_params ''' #slim if net_name == 'resnet_v1_50': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_50( inputs=input, num_classes=num_classes, is_training=False, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_50/block4'] # [7, 7] } return feature_maps_dict return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1'] #return featuremap if net_name == 'resnet_v1_101': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = resnet_v1.resnet_v1_101( inputs=input, num_classes=num_classes, is_training=True, global_pool=False) if cfg.USE_FPN: feature_maps_dict = { 'C2': layer_dic[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'], # [56, 56] 'C3': layer_dic[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'], # [28, 28] 'C4': layer_dic[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'], # [14, 14] 'C5': layer_dic['resnet_v1_101/block4'] } return feature_maps_dict return featuremap if net_name == 'vgg_16': with slim.arg_scope( resnet_v1.resnet_arg_scope( weight_decay=cfg.FEATURE_WEIGHT_DECAY)): featuremap, layer_dic = vgg.vgg_16( inputs=input, num_classes=7, is_training=False, spatial_squeeze=False, ) return layer_dic['vgg_16/conv5/conv5_3']