def get_imagenet_from_checkpoint(checkpoint_path):
    """
  net = get_net_from_checkpoint(checkpoint)
  net.func_name  # like inception_v4
  net.default_image_size # like 299
  """
    checkpoint = melt.get_model_path(checkpoint_path)
    if not checkpoint or \
     (not os.path.exists(checkpoint) \
        and not os.path.exists(checkpoint + '.index')):
        return None

    from tensorflow.python import pywrap_tensorflow
    reader = pywrap_tensorflow.NewCheckpointReader(checkpoint)
    var_to_shape_map = reader.get_variable_to_shape_map()
    name = None
    for key in var_to_shape_map.keys():
        name = key.split('/')[0]
        gnu_name = gezi.to_gnu_name(name)
        if gnu_name in nets_factory.networks_map:
            break
    if name is None:
        return None
    else:
        nets_factory.networks_map[gnu_name].name = name
        return nets_factory.networks_map[gnu_name]
def get_net_from_checkpoint(checkpoint):
    """
  net = get_net_from_checkpoint(checkpoint)
  net.func_name  # like inception_v4
  net.default_image_size # like 299
  """
    from tensorflow.python import pywrap_tensorflow
    reader = pywrap_tensorflow.NewCheckpointReader(checkpoint)
    var_to_shape_map = reader.get_variable_to_shape_map()
    name = None
    for key in var_to_shape_map.keys():
        name = key.split('/')[0]
        gnu_name = gezi.to_gnu_name(name)
        if gnu_name in nets_factory.networks_map:
            break
    if name is None:
        return None
    else:
        return nets_factory.networks_map[name], name
Beispiel #3
0
        def construct_fn(
                encoded_image,
                height,
                width,
                trainable=False,
                is_training=False,
                resize_height=346,
                resize_width=346,
                random_crop=True,
                distort=True,
                slim_preprocessing=True,
                weight_decay=0.00004,
                finetune_end_point=None,
                feature_name=feature_name,
                image_format="jpeg",  #for safe just use decode_jpeg
                reuse=None):
            logging.info('image model trainable:{}, is_training:{}'.format(
                trainable, is_training))

            #allow [batch_size, 1] as input
            #print(encoded_image.shape) #should be (?,)
            #encoded_image = tf.squeeze(encoded_image) #this will casue problem if input batch size is 1, so squeeze seems danerous TODO check
            #if use tf.squeeze(encoded_image, 1) also not work, out of index TODO can if len(shape) > 1 squeeze ?

            batch_size = encoded_image.get_shape()[0].value or tf.shape(
                encoded_image)[0]
            encoded_image = tf.reshape(encoded_image, [
                batch_size,
            ])
            #below is alos ok? TODO CHECK
            # shape_list = encoded_image.get_shape().as_list()
            # if len(shape_list) > 1:
            #   encoded_image = tf.squeeze(encoded_image, -1)

            #preprocess_image
            net_name = gezi.to_gnu_name(name)
            #well this is slightly slow and the result is differnt from im2txt inceptionV3 usage result,
            #use im2txt code seems ok, not sure if slim preprocess will be better! TODO
            #for inception related model I think im2txt process will be fine, for other models not sure TODO
            #using slim preprocessing real 2m45.737s user  3m12.896s sys 0m10.265s
            #using im2txt processing real 2m46.709s user  3m8.067s sys 0m8.297s
            #and the final feature will be slightly differnt
            #one thing intersting is use 2 tf.map_fn(1 decode image, 1 preprocess) will be much slower then use 1 tf.map_fn (decode image+ preprocess)
            if slim_preprocessing:
                preprocessing_fn = preprocessing_factory.get_preprocessing(
                    net_name, is_training=(is_training and distort))
                image = tf.map_fn(lambda img: preprocessing_fn(
                    decode_image(img,
                                 image_format=image_format,
                                 dtype=tf.float32), height, width),
                                  encoded_image,
                                  dtype=tf.float32)
            else:
                #im2txt style preprocessing
                image = tf.map_fn(
                    lambda img: process_image(img,
                                              is_training=is_training,
                                              height=height,
                                              width=width,
                                              resize_height=resize_height,
                                              resize_width=resize_width,
                                              random_crop=random_crop,
                                              distort=distort,
                                              image_format=image_format),
                    encoded_image,
                    dtype=tf.float32)

            #TODO like image_embedding.py add batch_norm ? fully understand!
            is_image_model_training = trainable and is_training
            if trainable:
                weights_regularizer = tf.contrib.layers.l2_regularizer(
                    weight_decay)
            else:
                weights_regularizer = None

            with tf.variable_scope(scope, reuse=reuse):
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        trainable=trainable
                ):  #should this be faster then stop_gradient? exp show this slim.arg_scope with trainable=False work

                    #actually final num class layer not used for image feature purpose, but since in check point train using 1001, for simplicity here set 1001
                    num_classes = 1001
                    #TODO might modify to let scope be '' ?
                    net_fn = nets_factory.get_network_fn(
                        net_name,
                        num_classes=num_classes,
                        is_training=is_image_model_training)
                    logits, end_points = net_fn(image)

                    # for key in end_points:
                    #   print(key, end_points[key].shape)
                    if feature_name is None:
                        print(
                            'image_model feature_name is None will get PreLogits'
                        )
                        if 'PreLogitsFlatten' in end_points:
                            image_feature = end_points['PreLogitsFlatten']
                        elif 'PreLogits' in end_points:
                            net = end_points['PreLogits']
                            image_feature = slim.flatten(net, scope="flatten")
                        else:
                            raise ValueError('not found pre logits!')
                    else:
                        print('image_model will get feature_name %s' %
                              feature_name)
                        image_feature = end_points[feature_name]
                        image_feature = slim.flatten(image_feature)
                    #TODO check is it really ok? not finetune? seems still slow as im2txt it should be much faster then fintune.. FIXME?
                    #TODO other method set not trainable, need to modify slim get_network_fn ?
                    #if not trainable: #just for safe.. actuall slim.arg_scope with train_able=False works
                    #  image_feature = tf.stop_gradient(image_feature)
                    if finetune_end_point:  #None or ''
                        logging.info(
                            'fintune image model from end point:{} {}'.format(
                                finetune_end_point,
                                end_points[finetune_end_point]))
                        tf.stop_gradient(end_points[finetune_end_point])
                    elif trainable:
                        logging.info('fintune all image model layers')

                    #--below is the same for inception v3
                    # image_feature = melt.image.image_embedding.inception_v3(
                    #   image_feature,
                    #   trainable=trainable,
                    #   is_training=is_training,
                    #   reuse=reuse,
                    #   scope=scope)

                    #if not set this eval_loss = trainer.build_train_graph(eval_image_feature, eval_text, eval_neg_text) will fail
                    #but still need to set reuse for melt.image.image_embedding.inception_v3... confused.., anyway now works..
                    #with out reuse=True score = predictor.init_predict() will fail, resue_variables not work for it..
                    #trainer create function once use it second time(same function) work here(with scope.reuse_variables)
                    #predictor create another function, though seem same name same scope, but you need to set reuse=True again!
                    #even if use tf.make_template still need this..
                    #got it see hasky/jupter/scope.ipynb, because train then  eval, use same fn() call again in eval scope.reuse_varaibbles() will in effect
                    #escape_fn3 = create_escape_construct_fn('XXX')
                    #escape_fn3()
                    #escape_fn3() #ok becasue scope.reuse_variables() here
                    #but for predictor escape_fn3 = create_escape_construct_fn('XXX') you call it again, then escape_fn3() will fail need reuse

                scope.reuse_variables(
                )  #this is fine make function() '' scope resue, set True, but if not use function with ... will fail also
            print('image_feature:', image_feature)
            return image_feature
Beispiel #4
0
    def __init__(self,
                 image_checkpoint_file=None,
                 model_name=None,
                 height=None,
                 width=None,
                 feature_name=None,
                 image_format='jpeg',
                 moving_average_decay=None,
                 num_classes=None,
                 top_k=None,
                 sess=None,
                 graph=None):
        assert image_checkpoint_file or model_name, 'need model_name if train from scratch otherwise need image_checkpoint_file'
        self.graph = tf.Graph() if graph is None else graph
        self.sess = melt.gen_session(
            graph=self.graph) if sess is None else sess
        self.feature_name = feature_name

        if image_checkpoint_file:
            net = melt.image.get_imagenet_from_checkpoint(
                image_checkpoint_file)
            assert net is not None, image_checkpoint_file
            model_name = model_name or net.name
            height = height or net.default_image_size
            width = width or net.default_image_size
        else:
            assert model_name is not None
            gnu_name = gezi.to_gnu_name(model_name)
            net = nets_factory.networks_map[gnu_name]
            height = height or net.default_image_size
            width = width or net.default_image_size

        print('checkpoint',
              image_checkpoint_file,
              'model_name',
              model_name,
              'height',
              height,
              'width',
              width,
              file=sys.stderr)

        self.num_classes = num_classes
        self.model_name = model_name
        with self.sess.graph.as_default():
            self.images_feed = tf.placeholder(tf.string, [
                None,
            ],
                                              name='images')
            if not self.num_classes:
                print('build graph for final one feature', file=sys.stderr)
                self.feature = self._build_graph(model_name,
                                                 height,
                                                 width,
                                                 image_format=image_format)
                print('build graph for attention features', file=sys.stderr)
                self.features = self._build_graph2(model_name,
                                                   height,
                                                   width,
                                                   image_format=image_format)
            else:
                assert self.num_classes > 1
                if feature_name != 'Logits':
                    prelogits_feature = self._build_graph(
                        model_name, height, width, image_format=image_format)
                    #with tf.variable_scope('ImageModelLogits'):
                    self.logits = slim.fully_connected(prelogits_feature,
                                                       num_classes,
                                                       activation_fn=None,
                                                       scope='Logits')
                else:
                    # directly use slim model
                    self.logits = self._build_graph(model_name,
                                                    height,
                                                    width,
                                                    num_classes=num_classes,
                                                    image_format=image_format)
                if top_k:
                    with tf.variable_scope('ImageModelTopN'):
                        self.top_logits, self.top_indices = tf.nn.top_k(
                            self.logits, top_k, name='TopK')
                self.predictions = tf.nn.softmax(self.logits,
                                                 name='Predictions')
                # https://storage.googleapis.com/openimages/2017_07/oidv2-resnet_v1_101.readme.txt
                self.multi_predictions = tf.nn.sigmoid(
                    self.logits, name='multi_predictions')

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            self.sess.run(init_op)
            if image_checkpoint_file:
                #---load inception model check point file
                init_fn = melt.image.image_processing.create_image_model_init_fn(
                    model_name,
                    image_checkpoint_file,
                    moving_average_decay=moving_average_decay)
                init_fn(self.sess)
        def construct_fn(
                encoded_image,
                height=None,
                width=None,
                trainable=False,
                is_training=False,
                resize_height=346,
                resize_width=346,
                random_crop=True,
                distort=True,
                distort_color=True,
                slim_preprocessing=True,  # if image_model_name.startswith('nasnet') and not moving_average_decay:
                weight_decay=0.00004,
                finetune_end_point=None,
                feature_name=feature_name,
                num_classes=num_classes,
                image_format="jpeg",  #for safe just use decode_jpeg
                reuse=None):
            logging.info('image model trainable:{}, is_training:{}'.format(
                trainable, is_training))

            #allow [batch_size, 1] as input
            #print(encoded_image.shape) #should be (?,)
            #encoded_image = tf.squeeze(encoded_image) #this will casue problem if input batch size is 1, so squeeze seems danerous TODO check
            #if use tf.squeeze(encoded_image, 1) also not work, out of index TODO can if len(shape) > 1 squeeze ?

            #below is alos ok? TODO CHECK
            # shape_list = encoded_image.get_shape().as_list()
            # if len(shape_list) > 1:
            #   encoded_image = tf.squeeze(encoded_image, -1)

            #preprocess_image
            assert name is not None
            net_name = gezi.to_gnu_name(name)

            height = height or info[net_name]['height']
            width = width or info[net_name]['width']

            #well this is slightly slow and the result is differnt from im2txt inceptionV3 usage result,
            #use im2txt code seems ok, not sure if slim preprocess will be better! TODO
            #for inception related model I think im2txt process will be fine, for other models not sure TODO
            #using slim preprocessing real 2m45.737s user  3m12.896s sys 0m10.265s
            #using im2txt processing real 2m46.709s user  3m8.067s sys 0m8.297s
            #and the final feature will be slightly differnt
            #one thing intersting is use 2 tf.map_fn(1 decode image, 1 preprocess) will be much slower then use 1 tf.map_fn (decode image+ preprocess)
            if preprocess_image:
                batch_size = encoded_image.get_shape()[0].value or tf.shape(
                    encoded_image)[0]
                encoded_image = tf.reshape(encoded_image, [
                    batch_size,
                ])
                if slim_preprocessing:
                    # HACK HERE
                    # https://github.com/tensorflow/models/tree/master/research/slim
                    # ^ ResNet V2 models use Inception pre-processing and input image size of 299 (use --preprocessing_name inception
                    # --eval_image_size 299 when using eval_image_classifier.py).
                    # TODO FIXME alos seems vgg prprocessing will got similar feature for resnet_v2_152.. why?
                    # TODO also for resnet_v1_101 using openimage pretrain model, must also use 299 if using 224 then generated feature is all same for images
                    net_name_ = net_name

                    # HACK assume resnet101 model right now is only OpenimageV2 one  TODO
                    if net_name == 'resnet_v1_101':
                        print(
                            'HACK for resnet_v1_101 openimage checkpoint preprocess image',
                            file=sys.stderr)
                        # TODO FIXME !! Notice set dtype=tf.float32 will casue much diff in result for resnet 101 openimage
                        # image = tf.map_fn(lambda img: OpenimageV2PreprocessImage(decode_image(img, image_format=image_format, dtype=tf.float32), is_training=(is_training and distort)),
                        #         encoded_image, dtype=tf.float32)
                        image = tf.map_fn(
                            lambda img: OpenimageV2PreprocessImage(
                                decode_image(img, image_format=image_format),
                                is_training=(is_training and distort)),
                            encoded_image,
                            dtype=tf.float32)
                    else:
                        if net_name.startswith('resnet_v2'):
                            #if net_name.startswith('resnet'):
                            net_name_ = 'inception'
                            height = 299
                            width = 299
                            print(
                                'HACK here adjust to use inception preprocessing and inception default height and width',
                                file=sys.stderr)

                        preprocessing_fn = preprocessing_factory.get_preprocessing(
                            net_name_, is_training=(is_training and distort))

                        print('preprocessing_fn net_name',
                              net_name_,
                              'height',
                              height,
                              'width',
                              width,
                              file=sys.stderr)
                        assert height is not None
                        # Iteresting for other models other then resnet101 openimage model, with or without dtype=tf.float32 is all ok
                        #with tf.device('/cpu:0'):
                        # image = tf.map_fn(lambda img: preprocessing_fn(decode_image(img, image_format=image_format, dtype=tf.float32), height, width),
                        #                   encoded_image, dtype=tf.float32)
                        # set add_image_summaries to False for tf1.5
                        # otherwise alueError: Cannot use 'show_and_tell/main/encode/map/while/distort_image/image_with_bounding_boxes' as input to 'show_and_tell_1/Merge/MergeSummary'
                        # because 'show_and_tell/main/encode/map/while/distort_image/image_with_bounding_boxes' is in a while loop. See info log for more details.
                        image = tf.map_fn(lambda img: preprocessing_fn(
                            decode_image(img, image_format=image_format),
                            height,
                            width,
                            add_image_summaries=False),
                                          encoded_image,
                                          dtype=tf.float32)
                else:
                    raise ValueError(
                        'not use im2txt stype preprocessing any more just use slim preprocessing'
                    )
                    # im2txt style preprocessing
                    #with tf.device('/cpu:0'):
                    image = tf.map_fn(
                        lambda img: process_image(img,
                                                  is_training=is_training,
                                                  height=height,
                                                  width=width,
                                                  resize_height=resize_height,
                                                  resize_width=resize_width,
                                                  random_crop=random_crop,
                                                  distort=distort,
                                                  distort_color=distort_color,
                                                  image_format=image_format),
                        encoded_image,
                        dtype=tf.float32)
            else:
                image = encoded_image

            # TODO like image_embedding.py add batch_norm ? fully understand!
            is_image_model_training = trainable and is_training
            if trainable and weight_decay:
                weights_regularizer = tf.contrib.layers.l2_regularizer(
                    weight_decay)
            else:
                weights_regularizer = None

            with tf.variable_scope(scope, reuse=reuse):
                # TODO Remove this arg scope ? since net_factory get function with proper arg scope!
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected],
                        weights_regularizer=weights_regularizer,
                        trainable=trainable
                ):  #should this be faster then stop_gradient? exp show this slim.arg_scope with trainable=False work

                    # actually final num class layer not used for image feature purpose, but since in check point train using 1001, for simplicity here set 1001
                    # TODO might set try to use num_classes=None or 0, which will biuld graph before logits layer also without dropout, but now
                    # what I use is pre logigts feature which is after dropout layer
                    # TODO it should set num_classes default as None but slim return net just after global_pool, and for my code I used to use PreLogits
                    # after that, so here the hack is by default setting it to 1001, if you are using other pretrain model you might need to
                    # manually set num_classes like num_classes=5000(for openimage pretrain multilabel model) even if you just need the feature
                    # before fc layer(logits)  HACK! otherwise you might face problem loading model complaing wrong shape!
                    if num_classes is None:
                        num_classes = 1001
                    # TODO might modify to let scope be '' ?
                    logging.info('pretrain image model num_classes:{}'.format(
                        num_classes))
                    net_fn = nets_factory.get_network_fn(
                        net_name,
                        num_classes=num_classes,
                        is_training=is_image_model_training)
                    logits, end_points = net_fn(image)

                    # from nets import inception
                    # with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
                    #   logits, end_points = inception.inception_resnet_v2(image, is_training=is_image_model_training, create_aux_logits=True)

                    # for key in end_points:
                    # print(key, end_points[key].shape)
                    # print('end_points', end_points, file=sys.stderr)
                    # if feature name is None will get final feature, other wise will get final attention feature
                    # if feature in end point will directly get it other wise get from info[image_name][features_end_point]
                    # TODO final end point for feature should be global_pool ? which is btter global_pool without dropout or PreLogits?
                    # Try to finetune with using feature_name global_pool, or may be just use Logits (1001 dim as feature ?)

                    print('feature_name', feature_name, file=sys.stderr)
                    if feature_name and feature_name.lower() == 'logits':
                        print('using slim image model logits', file=sys.stderr)
                        image_feature = logits
                        assert num_classes
                    else:
                        if (not feature_name) or (feature_name == 'final') or (
                                feature_name.lower == 'none'):
                            # None or empty feature name or final get final single feature
                            print(
                                'image_model feature_name is None will get PreLogits or PreLogitsFlatten',
                                file=sys.stderr)
                            #print('end_point', end_points)
                            if 'PreLogitsFlatten' in end_points:
                                image_feature = end_points['PreLogitsFlatten']
                            elif 'PreLogits' in end_points:
                                net = end_points['PreLogits']
                                image_feature = slim.flatten(net,
                                                             scope="flatten")
                            else:
                                print(
                                    'not found pre logits! get default final feature',
                                    file=sys.stderr)
                                feature_name = info[name]['feature_end_point']
                                print('image_model will get feature_name %s' %
                                      feature_name,
                                      file=sys.stderr)
                                net = end_points[feature_name]
                                image_feature = slim.flatten(net,
                                                             scope="flatten")
                        else:
                            # get attention features
                            if feature_name not in end_points:
                                feature_name = info[name]['features_end_point']
                            image_feature = end_points[feature_name]
                            image_feature = slim.flatten(image_feature)

                    print('image_feature:', image_feature, file=sys.stderr)
                    #TODO check is it really ok? not finetune? seems still slow as im2txt it should be much faster then fintune.. FIXME?
                    #TODO other method set not trainable, need to modify slim get_network_fn ?
                    #if not trainable: #just for safe.. actuall slim.arg_scope with train_able=False works
                    #  image_feature = tf.stop_gradient(image_feature)
                    if finetune_end_point:  #None or ''
                        logging.info(
                            'fintune image model from end point:{} {}'.format(
                                finetune_end_point,
                                end_points[finetune_end_point]))
                        tf.stop_gradient(end_points[finetune_end_point])
                    elif trainable:
                        logging.info('fintune all image model layers')

                    #--below is the same for inception v3
                    # image_feature = melt.image.image_embedding.inception_v3(
                    #   image_feature,
                    #   trainable=trainable,
                    #   is_training=is_training,
                    #   reuse=reuse,
                    #   scope=scope)

                    #if not set this eval_loss = trainer.build_train_graph(eval_image_feature, eval_text, eval_neg_text) will fail
                    #but still need to set reuse for melt.image.image_embedding.inception_v3... confused.., anyway now works..
                    #with out reuse=True score = predictor.init_predict() will fail, resue_variables not work for it..
                    #trainer create function once use it second time(same function) work here(with scope.reuse_variables)
                    #predictor create another function, though seem same name same scope, but you need to set reuse=True again!
                    #even if use tf.make_template still need this..
                    #got it see hasky/jupter/scope.ipynb, because train then  eval, use same fn() call again in eval scope.reuse_varaibbles() will in effect
                    #escape_fn3 = create_escape_construct_fn('XXX')
                    #escape_fn3()
                    #escape_fn3() #ok becasue scope.reuse_variables() here
                    #but for predictor escape_fn3 = create_escape_construct_fn('XXX') you call it again, then escape_fn3() will fail need reuse

                    scope.reuse_variables(
                    )  #this is fine make function() '' scope resue, set True, but if not use function with ... will fail also
                    return image_feature