def get_image_features(image, is_training, config, resnet_scope="", cbn=None): image_input_type = config["image_input"] # Extract feature from 1D-image feature s if image_input_type == "fc8" \ or image_input_type == "fc7" \ or image_input_type == "dummy": image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image, dim=1, name="fc_normalization") elif image_input_type == "rcnn": img = tf.transpose(image, perm=[0, 2, 1]) # B, 2048, 36 att = tf.nn.softmax(tf.ones([tf.shape(img)[0], 1, 36]), axis=-1) image_out = tf.reduce_sum(img * att, axis=-1) # B, 2048 if config.get('normalize', False): image_out = tf.nn.l2_normalize(image_out, dim=1, name="fc_normalization") elif image_input_type.startswith("conv") or image_input_type.startswith( "raw"): # Extract feature from raw images if image_input_type.startswith("raw"): # Create ResNet resnet_version = config['resnet_version'] image_out = create_resnet(image, is_training=is_training, scope=resnet_scope, cbn=cbn, resnet_version=resnet_version, resnet_out=config.get( 'resnet_out', "block4")) else: image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image_out, dim=[1, 2, 3]) else: assert False, "Wrong input type for image" return image_out
def get_image_features(image, is_training, config, resnet_scope="", cbn=None): image_input_type = config["image_input"] # Extract feature from 1D-image feature s if image_input_type == "fc8" \ or image_input_type == "fc7" \ or image_input_type == "dummy": image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image, dim=1, name="fc_normalization") elif image_input_type.startswith("conv") or image_input_type.startswith( "raw"): # Extract feature from raw images if image_input_type.startswith("raw"): # Create ResNet resnet_version = config['resnet_version'] image_out = create_resnet(image, is_training=is_training, scope=resnet_scope, cbn=cbn, resnet_version=resnet_version, resnet_out=config.get( 'resnet_out', "block4")) else: image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image_out, dim=[1, 2, 3]) else: assert False, "Wrong input type for image" return image_out
width=args.img_size, scale=args.crop_scale, channel=channel_mean) else: assert False, "Invalid mode: {}".format(args.mode) # Define the output folder out_file = "gw_{mode}_{network}_{feature_name}_{size}".format( mode=args.mode, network=args.network, feature_name=args.feature_name, size=args.img_size) print("Create networks...") if args.network == "resnet": ft_output = resnet.create_resnet(images, resnet_out=args.feature_name, resnet_version=args.resnet_version, is_training=False) # create network with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)): _, end_points = resnet_v1.resnet_v1_152(images, 1000) # 1000 is the number of softmax class elif args.network == "vgg": _, end_points = vgg.vgg_16(images, is_training=False, dropout_keep_prob=1.0) ft_name = os.path.join("vgg_16", args.feature_name) ft_output = end_points[ft_name] else: assert False, "Incorrect Network" extract_features(
def get_image_features(image, question, is_training, scope_name, config, dropout_keep=1., reuse=False, att = True): image_input_type = config["image_input"] # Extract feature from 1D-image feature s if image_input_type == "fc8" \ or image_input_type == "fc7" \ or image_input_type == "dummy": image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image, dim=1, name="fc_normalization") elif image_input_type.startswith("conv") or image_input_type.startswith("raw"): # Extract feature from raw images if image_input_type.startswith("raw"): # Create CBN cbn = None if "cbn" in config and config["cbn"].get("use_cbn", False) and question is not None: cbn_factory = CBNfromLSTM(question, no_units=config['cbn']["cbn_embedding_size"]) excluded_scopes = config["cbn"].get('excluded_scope_names', []) cbn = ConditionalBatchNorm(cbn_factory, excluded_scope_names=excluded_scopes, is_training=is_training) # Due to the following bug #"There is a bug with classic batchnorm with slim networks (https://github.com/tensorflow/tensorflow/issues/4887). \n" \ #"Please use the following config -> 'cbn': {'use_cbn':true, 'excluded_scope_names': ['*']}" else: cbn_factory = CBNfromLSTM(question, no_units=config['cbn']["cbn_embedding_size"]) excluded_scopes = ["*"] cbn = ConditionalBatchNorm(cbn_factory, excluded_scope_names=excluded_scopes, is_training=is_training) # Create ResNet resnet_version = config['resnet_version'] image_feature_maps = create_resnet(image, is_training=is_training, scope=scope_name, cbn=cbn, resnet_version=resnet_version, resnet_out=config.get('resnet_out', "block4")) image_feature_maps = image_feature_maps if config.get('normalize', False): image_feature_maps = tf.nn.l2_normalize(image_feature_maps, dim=[1, 2, 3]) # Extract feature from 3D-image features else: image_feature_maps = image # apply attention if att: image_out = get_attention(image_feature_maps, question, config=config["attention"], dropout_keep=dropout_keep, reuse=reuse) else: image_out = image_feature_maps else: assert False, "Wrong input type for image" return image_out
def get_image_features(image, question, is_training, scope_name, scope_feature, config, dropout_keep=1., reuse=False, co_attention=True): image_input_type = config["image_input"] # Extract feature from 1D-image feature s if image_input_type == "fc8" \ or image_input_type == "fc7" \ or image_input_type == "dummy": image_out = image if config.get('normalize', False): image_out = tf.nn.l2_normalize(image, dim=1, name="fc_normalization") elif image_input_type.startswith("conv") or image_input_type.startswith( "raw"): print("---------------------------------- Before IF") # Extract feature from raw images if image_input_type.startswith("raw"): # Create CBN cbn = None if config["cbn"].get("use_cbn", False): cbn_factory = CBNfromLSTM( question, no_units=config['cbn']["cbn_embedding_size"]) excluded_scopes = config["cbn"].get('excluded_scope_names', []) cbn = ConditionalBatchNorm( cbn_factory, excluded_scope_names=excluded_scopes, is_training=is_training) print("Image = {} ".format(image)) print("cbn_factory = {} ".format(cbn_factory)) print("excluded_scopes = {} ".format(excluded_scopes)) print("cbn = {} ".format(cbn)) # exit() # print("---------------------------------- Before resnet_version") # Create ResNet resnet_version = config['resnet_version'] image_feature_maps, _ = create_resnet( image, is_training=is_training, scope=scope_name, scope_feature=scope_feature, cbn=cbn, resnet_version=resnet_version, resnet_out=config.get('resnet_out', "block4")) print("-- image_feature_maps = {}".format(image_feature_maps)) print("---------------------------------- After resnet_version") image_feature_maps = image_feature_maps if config.get('normalize', False): image_feature_maps = tf.nn.l2_normalize(image_feature_maps, dim=[1, 2, 3]) # Extract feature from 3D-image features else: image_feature_maps = image # apply attention image_out = image_feature_maps print("image_out 1= {}".format(image_out)) # exit() # print("before im") if not co_attention: image_out = get_attention(image_feature_maps, question, config=config["attention"], dropout_keep=dropout_keep, reuse=reuse) # print("-------- image_out = ",image_out) # exit() else: assert False, "Wrong input type for image" print("---------------------------------- Finish image_out") return image_out