def get_imagenet_from_checkpoint(checkpoint_path): """ net = get_net_from_checkpoint(checkpoint) net.func_name # like inception_v4 net.default_image_size # like 299 """ checkpoint = melt.get_model_path(checkpoint_path) if not checkpoint or \ (not os.path.exists(checkpoint) \ and not os.path.exists(checkpoint + '.index')): return None from tensorflow.python import pywrap_tensorflow reader = pywrap_tensorflow.NewCheckpointReader(checkpoint) var_to_shape_map = reader.get_variable_to_shape_map() name = None for key in var_to_shape_map.keys(): name = key.split('/')[0] gnu_name = gezi.to_gnu_name(name) if gnu_name in nets_factory.networks_map: break if name is None: return None else: nets_factory.networks_map[gnu_name].name = name return nets_factory.networks_map[gnu_name]
def get_net_from_checkpoint(checkpoint): """ net = get_net_from_checkpoint(checkpoint) net.func_name # like inception_v4 net.default_image_size # like 299 """ from tensorflow.python import pywrap_tensorflow reader = pywrap_tensorflow.NewCheckpointReader(checkpoint) var_to_shape_map = reader.get_variable_to_shape_map() name = None for key in var_to_shape_map.keys(): name = key.split('/')[0] gnu_name = gezi.to_gnu_name(name) if gnu_name in nets_factory.networks_map: break if name is None: return None else: return nets_factory.networks_map[name], name
def construct_fn( encoded_image, height, width, trainable=False, is_training=False, resize_height=346, resize_width=346, random_crop=True, distort=True, slim_preprocessing=True, weight_decay=0.00004, finetune_end_point=None, feature_name=feature_name, image_format="jpeg", #for safe just use decode_jpeg reuse=None): logging.info('image model trainable:{}, is_training:{}'.format( trainable, is_training)) #allow [batch_size, 1] as input #print(encoded_image.shape) #should be (?,) #encoded_image = tf.squeeze(encoded_image) #this will casue problem if input batch size is 1, so squeeze seems danerous TODO check #if use tf.squeeze(encoded_image, 1) also not work, out of index TODO can if len(shape) > 1 squeeze ? batch_size = encoded_image.get_shape()[0].value or tf.shape( encoded_image)[0] encoded_image = tf.reshape(encoded_image, [ batch_size, ]) #below is alos ok? TODO CHECK # shape_list = encoded_image.get_shape().as_list() # if len(shape_list) > 1: # encoded_image = tf.squeeze(encoded_image, -1) #preprocess_image net_name = gezi.to_gnu_name(name) #well this is slightly slow and the result is differnt from im2txt inceptionV3 usage result, #use im2txt code seems ok, not sure if slim preprocess will be better! TODO #for inception related model I think im2txt process will be fine, for other models not sure TODO #using slim preprocessing real 2m45.737s user 3m12.896s sys 0m10.265s #using im2txt processing real 2m46.709s user 3m8.067s sys 0m8.297s #and the final feature will be slightly differnt #one thing intersting is use 2 tf.map_fn(1 decode image, 1 preprocess) will be much slower then use 1 tf.map_fn (decode image+ preprocess) if slim_preprocessing: preprocessing_fn = preprocessing_factory.get_preprocessing( net_name, is_training=(is_training and distort)) image = tf.map_fn(lambda img: preprocessing_fn( decode_image(img, image_format=image_format, dtype=tf.float32), height, width), encoded_image, dtype=tf.float32) else: #im2txt style preprocessing image = tf.map_fn( lambda img: process_image(img, is_training=is_training, height=height, width=width, resize_height=resize_height, resize_width=resize_width, random_crop=random_crop, distort=distort, image_format=image_format), encoded_image, dtype=tf.float32) #TODO like image_embedding.py add batch_norm ? fully understand! is_image_model_training = trainable and is_training if trainable: weights_regularizer = tf.contrib.layers.l2_regularizer( weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable ): #should this be faster then stop_gradient? exp show this slim.arg_scope with trainable=False work #actually final num class layer not used for image feature purpose, but since in check point train using 1001, for simplicity here set 1001 num_classes = 1001 #TODO might modify to let scope be '' ? net_fn = nets_factory.get_network_fn( net_name, num_classes=num_classes, is_training=is_image_model_training) logits, end_points = net_fn(image) # for key in end_points: # print(key, end_points[key].shape) if feature_name is None: print( 'image_model feature_name is None will get PreLogits' ) if 'PreLogitsFlatten' in end_points: image_feature = end_points['PreLogitsFlatten'] elif 'PreLogits' in end_points: net = end_points['PreLogits'] image_feature = slim.flatten(net, scope="flatten") else: raise ValueError('not found pre logits!') else: print('image_model will get feature_name %s' % feature_name) image_feature = end_points[feature_name] image_feature = slim.flatten(image_feature) #TODO check is it really ok? not finetune? seems still slow as im2txt it should be much faster then fintune.. FIXME? #TODO other method set not trainable, need to modify slim get_network_fn ? #if not trainable: #just for safe.. actuall slim.arg_scope with train_able=False works # image_feature = tf.stop_gradient(image_feature) if finetune_end_point: #None or '' logging.info( 'fintune image model from end point:{} {}'.format( finetune_end_point, end_points[finetune_end_point])) tf.stop_gradient(end_points[finetune_end_point]) elif trainable: logging.info('fintune all image model layers') #--below is the same for inception v3 # image_feature = melt.image.image_embedding.inception_v3( # image_feature, # trainable=trainable, # is_training=is_training, # reuse=reuse, # scope=scope) #if not set this eval_loss = trainer.build_train_graph(eval_image_feature, eval_text, eval_neg_text) will fail #but still need to set reuse for melt.image.image_embedding.inception_v3... confused.., anyway now works.. #with out reuse=True score = predictor.init_predict() will fail, resue_variables not work for it.. #trainer create function once use it second time(same function) work here(with scope.reuse_variables) #predictor create another function, though seem same name same scope, but you need to set reuse=True again! #even if use tf.make_template still need this.. #got it see hasky/jupter/scope.ipynb, because train then eval, use same fn() call again in eval scope.reuse_varaibbles() will in effect #escape_fn3 = create_escape_construct_fn('XXX') #escape_fn3() #escape_fn3() #ok becasue scope.reuse_variables() here #but for predictor escape_fn3 = create_escape_construct_fn('XXX') you call it again, then escape_fn3() will fail need reuse scope.reuse_variables( ) #this is fine make function() '' scope resue, set True, but if not use function with ... will fail also print('image_feature:', image_feature) return image_feature
def __init__(self, image_checkpoint_file=None, model_name=None, height=None, width=None, feature_name=None, image_format='jpeg', moving_average_decay=None, num_classes=None, top_k=None, sess=None, graph=None): assert image_checkpoint_file or model_name, 'need model_name if train from scratch otherwise need image_checkpoint_file' self.graph = tf.Graph() if graph is None else graph self.sess = melt.gen_session( graph=self.graph) if sess is None else sess self.feature_name = feature_name if image_checkpoint_file: net = melt.image.get_imagenet_from_checkpoint( image_checkpoint_file) assert net is not None, image_checkpoint_file model_name = model_name or net.name height = height or net.default_image_size width = width or net.default_image_size else: assert model_name is not None gnu_name = gezi.to_gnu_name(model_name) net = nets_factory.networks_map[gnu_name] height = height or net.default_image_size width = width or net.default_image_size print('checkpoint', image_checkpoint_file, 'model_name', model_name, 'height', height, 'width', width, file=sys.stderr) self.num_classes = num_classes self.model_name = model_name with self.sess.graph.as_default(): self.images_feed = tf.placeholder(tf.string, [ None, ], name='images') if not self.num_classes: print('build graph for final one feature', file=sys.stderr) self.feature = self._build_graph(model_name, height, width, image_format=image_format) print('build graph for attention features', file=sys.stderr) self.features = self._build_graph2(model_name, height, width, image_format=image_format) else: assert self.num_classes > 1 if feature_name != 'Logits': prelogits_feature = self._build_graph( model_name, height, width, image_format=image_format) #with tf.variable_scope('ImageModelLogits'): self.logits = slim.fully_connected(prelogits_feature, num_classes, activation_fn=None, scope='Logits') else: # directly use slim model self.logits = self._build_graph(model_name, height, width, num_classes=num_classes, image_format=image_format) if top_k: with tf.variable_scope('ImageModelTopN'): self.top_logits, self.top_indices = tf.nn.top_k( self.logits, top_k, name='TopK') self.predictions = tf.nn.softmax(self.logits, name='Predictions') # https://storage.googleapis.com/openimages/2017_07/oidv2-resnet_v1_101.readme.txt self.multi_predictions = tf.nn.sigmoid( self.logits, name='multi_predictions') init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(init_op) if image_checkpoint_file: #---load inception model check point file init_fn = melt.image.image_processing.create_image_model_init_fn( model_name, image_checkpoint_file, moving_average_decay=moving_average_decay) init_fn(self.sess)
def construct_fn( encoded_image, height=None, width=None, trainable=False, is_training=False, resize_height=346, resize_width=346, random_crop=True, distort=True, distort_color=True, slim_preprocessing=True, # if image_model_name.startswith('nasnet') and not moving_average_decay: weight_decay=0.00004, finetune_end_point=None, feature_name=feature_name, num_classes=num_classes, image_format="jpeg", #for safe just use decode_jpeg reuse=None): logging.info('image model trainable:{}, is_training:{}'.format( trainable, is_training)) #allow [batch_size, 1] as input #print(encoded_image.shape) #should be (?,) #encoded_image = tf.squeeze(encoded_image) #this will casue problem if input batch size is 1, so squeeze seems danerous TODO check #if use tf.squeeze(encoded_image, 1) also not work, out of index TODO can if len(shape) > 1 squeeze ? #below is alos ok? TODO CHECK # shape_list = encoded_image.get_shape().as_list() # if len(shape_list) > 1: # encoded_image = tf.squeeze(encoded_image, -1) #preprocess_image assert name is not None net_name = gezi.to_gnu_name(name) height = height or info[net_name]['height'] width = width or info[net_name]['width'] #well this is slightly slow and the result is differnt from im2txt inceptionV3 usage result, #use im2txt code seems ok, not sure if slim preprocess will be better! TODO #for inception related model I think im2txt process will be fine, for other models not sure TODO #using slim preprocessing real 2m45.737s user 3m12.896s sys 0m10.265s #using im2txt processing real 2m46.709s user 3m8.067s sys 0m8.297s #and the final feature will be slightly differnt #one thing intersting is use 2 tf.map_fn(1 decode image, 1 preprocess) will be much slower then use 1 tf.map_fn (decode image+ preprocess) if preprocess_image: batch_size = encoded_image.get_shape()[0].value or tf.shape( encoded_image)[0] encoded_image = tf.reshape(encoded_image, [ batch_size, ]) if slim_preprocessing: # HACK HERE # https://github.com/tensorflow/models/tree/master/research/slim # ^ ResNet V2 models use Inception pre-processing and input image size of 299 (use --preprocessing_name inception # --eval_image_size 299 when using eval_image_classifier.py). # TODO FIXME alos seems vgg prprocessing will got similar feature for resnet_v2_152.. why? # TODO also for resnet_v1_101 using openimage pretrain model, must also use 299 if using 224 then generated feature is all same for images net_name_ = net_name # HACK assume resnet101 model right now is only OpenimageV2 one TODO if net_name == 'resnet_v1_101': print( 'HACK for resnet_v1_101 openimage checkpoint preprocess image', file=sys.stderr) # TODO FIXME !! Notice set dtype=tf.float32 will casue much diff in result for resnet 101 openimage # image = tf.map_fn(lambda img: OpenimageV2PreprocessImage(decode_image(img, image_format=image_format, dtype=tf.float32), is_training=(is_training and distort)), # encoded_image, dtype=tf.float32) image = tf.map_fn( lambda img: OpenimageV2PreprocessImage( decode_image(img, image_format=image_format), is_training=(is_training and distort)), encoded_image, dtype=tf.float32) else: if net_name.startswith('resnet_v2'): #if net_name.startswith('resnet'): net_name_ = 'inception' height = 299 width = 299 print( 'HACK here adjust to use inception preprocessing and inception default height and width', file=sys.stderr) preprocessing_fn = preprocessing_factory.get_preprocessing( net_name_, is_training=(is_training and distort)) print('preprocessing_fn net_name', net_name_, 'height', height, 'width', width, file=sys.stderr) assert height is not None # Iteresting for other models other then resnet101 openimage model, with or without dtype=tf.float32 is all ok #with tf.device('/cpu:0'): # image = tf.map_fn(lambda img: preprocessing_fn(decode_image(img, image_format=image_format, dtype=tf.float32), height, width), # encoded_image, dtype=tf.float32) # set add_image_summaries to False for tf1.5 # otherwise alueError: Cannot use 'show_and_tell/main/encode/map/while/distort_image/image_with_bounding_boxes' as input to 'show_and_tell_1/Merge/MergeSummary' # because 'show_and_tell/main/encode/map/while/distort_image/image_with_bounding_boxes' is in a while loop. See info log for more details. image = tf.map_fn(lambda img: preprocessing_fn( decode_image(img, image_format=image_format), height, width, add_image_summaries=False), encoded_image, dtype=tf.float32) else: raise ValueError( 'not use im2txt stype preprocessing any more just use slim preprocessing' ) # im2txt style preprocessing #with tf.device('/cpu:0'): image = tf.map_fn( lambda img: process_image(img, is_training=is_training, height=height, width=width, resize_height=resize_height, resize_width=resize_width, random_crop=random_crop, distort=distort, distort_color=distort_color, image_format=image_format), encoded_image, dtype=tf.float32) else: image = encoded_image # TODO like image_embedding.py add batch_norm ? fully understand! is_image_model_training = trainable and is_training if trainable and weight_decay: weights_regularizer = tf.contrib.layers.l2_regularizer( weight_decay) else: weights_regularizer = None with tf.variable_scope(scope, reuse=reuse): # TODO Remove this arg scope ? since net_factory get function with proper arg scope! with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, trainable=trainable ): #should this be faster then stop_gradient? exp show this slim.arg_scope with trainable=False work # actually final num class layer not used for image feature purpose, but since in check point train using 1001, for simplicity here set 1001 # TODO might set try to use num_classes=None or 0, which will biuld graph before logits layer also without dropout, but now # what I use is pre logigts feature which is after dropout layer # TODO it should set num_classes default as None but slim return net just after global_pool, and for my code I used to use PreLogits # after that, so here the hack is by default setting it to 1001, if you are using other pretrain model you might need to # manually set num_classes like num_classes=5000(for openimage pretrain multilabel model) even if you just need the feature # before fc layer(logits) HACK! otherwise you might face problem loading model complaing wrong shape! if num_classes is None: num_classes = 1001 # TODO might modify to let scope be '' ? logging.info('pretrain image model num_classes:{}'.format( num_classes)) net_fn = nets_factory.get_network_fn( net_name, num_classes=num_classes, is_training=is_image_model_training) logits, end_points = net_fn(image) # from nets import inception # with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): # logits, end_points = inception.inception_resnet_v2(image, is_training=is_image_model_training, create_aux_logits=True) # for key in end_points: # print(key, end_points[key].shape) # print('end_points', end_points, file=sys.stderr) # if feature name is None will get final feature, other wise will get final attention feature # if feature in end point will directly get it other wise get from info[image_name][features_end_point] # TODO final end point for feature should be global_pool ? which is btter global_pool without dropout or PreLogits? # Try to finetune with using feature_name global_pool, or may be just use Logits (1001 dim as feature ?) print('feature_name', feature_name, file=sys.stderr) if feature_name and feature_name.lower() == 'logits': print('using slim image model logits', file=sys.stderr) image_feature = logits assert num_classes else: if (not feature_name) or (feature_name == 'final') or ( feature_name.lower == 'none'): # None or empty feature name or final get final single feature print( 'image_model feature_name is None will get PreLogits or PreLogitsFlatten', file=sys.stderr) #print('end_point', end_points) if 'PreLogitsFlatten' in end_points: image_feature = end_points['PreLogitsFlatten'] elif 'PreLogits' in end_points: net = end_points['PreLogits'] image_feature = slim.flatten(net, scope="flatten") else: print( 'not found pre logits! get default final feature', file=sys.stderr) feature_name = info[name]['feature_end_point'] print('image_model will get feature_name %s' % feature_name, file=sys.stderr) net = end_points[feature_name] image_feature = slim.flatten(net, scope="flatten") else: # get attention features if feature_name not in end_points: feature_name = info[name]['features_end_point'] image_feature = end_points[feature_name] image_feature = slim.flatten(image_feature) print('image_feature:', image_feature, file=sys.stderr) #TODO check is it really ok? not finetune? seems still slow as im2txt it should be much faster then fintune.. FIXME? #TODO other method set not trainable, need to modify slim get_network_fn ? #if not trainable: #just for safe.. actuall slim.arg_scope with train_able=False works # image_feature = tf.stop_gradient(image_feature) if finetune_end_point: #None or '' logging.info( 'fintune image model from end point:{} {}'.format( finetune_end_point, end_points[finetune_end_point])) tf.stop_gradient(end_points[finetune_end_point]) elif trainable: logging.info('fintune all image model layers') #--below is the same for inception v3 # image_feature = melt.image.image_embedding.inception_v3( # image_feature, # trainable=trainable, # is_training=is_training, # reuse=reuse, # scope=scope) #if not set this eval_loss = trainer.build_train_graph(eval_image_feature, eval_text, eval_neg_text) will fail #but still need to set reuse for melt.image.image_embedding.inception_v3... confused.., anyway now works.. #with out reuse=True score = predictor.init_predict() will fail, resue_variables not work for it.. #trainer create function once use it second time(same function) work here(with scope.reuse_variables) #predictor create another function, though seem same name same scope, but you need to set reuse=True again! #even if use tf.make_template still need this.. #got it see hasky/jupter/scope.ipynb, because train then eval, use same fn() call again in eval scope.reuse_varaibbles() will in effect #escape_fn3 = create_escape_construct_fn('XXX') #escape_fn3() #escape_fn3() #ok becasue scope.reuse_variables() here #but for predictor escape_fn3 = create_escape_construct_fn('XXX') you call it again, then escape_fn3() will fail need reuse scope.reuse_variables( ) #this is fine make function() '' scope resue, set True, but if not use function with ... will fail also return image_feature