def create_module_graph(module_spec): """Creates a graph and loads Hub Module into it. Args: module_spec: the hub.ModuleSpec for the image module being used. Returns: graph: the tf.Graph that was created. bottleneck_tensor: the bottleneck values output by the module. jpeg_data for the node to feed JPEG data into """ height, width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) with tf.Graph().as_default() as graph: jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([height, width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) m = hub.Module(module_spec) bottleneck_tensor = m(resized_image) return graph, bottleneck_tensor, jpeg_data
def mobileNet(x_dict, n_classes, dropout, reuse, is_training): # transfer learning using tf-hub modules module = hub.Module( "https://tfhub.dev/google/imagenet/mobilenet_v2_100_160/feature_vector/2" ) height, width = hub.get_expected_image_size(module) features = module( x_dict['feature']) # Features with shape [batch_size, num_features]. # define a scope for reusing the variables with tf.variable_scope('my_mobileNet', reuse=reuse): # fully connected layers fc1 = tf.layers.dense(inputs=features, units=1280, activation=tf.nn.relu) fc1 = tf.layers.dropout(inputs=fc1, rate=dropout, training=is_training) fc2 = tf.layers.dense(inputs=fc1, units=640, activation=tf.nn.relu) fc2 = tf.layers.dropout(inputs=fc2, rate=dropout, training=is_training) fc3 = tf.layers.dense(inputs=fc2, units=320, activation=tf.nn.relu) fc3 = tf.layers.dropout(inputs=fc3, rate=dropout, training=is_training) # output layer pred = tf.layers.dense(inputs=fc1, units=n_classes, activation=None) return pred
def main(args, sess): module = hub.Module(args.url) height, width = hub.get_expected_image_size(module) print(height, width) image_in = tf.placeholder('float', [1, height, width, 3]) z_op = module(image_in) sess.run(tf.global_variables_initializer()) zvect = [] imglist = list_images(args.source, ext='jpg') for idx, imgpath in enumerate(imglist): img = cv2.imread(imgpath)[:,:,::-1] img = np.expand_dims(img / 255., 0) z = sess.run(z_op, {image_in: img}) zvect.append(z) if idx % 100 == 0: print('{} --> {} --> {}'.format(imgpath, img.shape, z.shape)) zvect = np.concatenate(zvect, axis=0) dst = os.path.join(args.dest, 'z.npy') np.save(dst, zvect)
def tfhub_encoder(x, dropout_pr=1.0): """Applies TFHub encoder to batch of images. Args: x: Images on [0, 255] sized (batch_size, scale_size, scale_size, 3). Returns: enc_x: Encodings sized (batch_size, encoding_size). """ x = x / 255. #module_spec_str = ('https://tfhub.dev/google/imagenet/inception_v3/' # 'feature_vector/1') # This module takes (224, 224) and encodes to (1280). module_spec_str = ( 'https://tfhub.dev/google/imagenet/mobilenet_v2_035_224/' 'feature_vector/2') module_spec = hub.load_module_spec(module_spec_str) height, width = hub.get_expected_image_size(module_spec) assert x.shape[1] == height, 'height is {}. Must be {}'.format( x.shape[1], height) assert x.shape[2] == width, 'width is {}. Must be {}'.format( x.shape[2], width) module = hub.Module(module_spec) embedding_tensor = module(x) batch_size, embedding_tensor_size = embedding_tensor.get_shape().as_list() #assert batch_size is None, 'We want to work with arbitrary batch size.' return embedding_tensor
def single_iteration(device, cap, sess, results_output, frame_placeholder, module): module_input_height, module_input_width = hub.get_expected_image_size( module) frame = camera_access.get_camera_frame(cap) frame_squared = camera_access.square_frame(frame) frame_scaled = camera_access.scale_frame(frame, module_input_height, module_input_width) # Tensorflow expects values between 0.0 and 1.0, and we have # between 0.0 and 255.0 frame_scaled = frame_scaled / 255.0 # Tensorflow needs the data as a batch of size 1. frame_batch = np.expand_dims(frame_scaled, 0) # Run Tensorflow. results = sess.run(results_output, feed_dict={frame_placeholder: frame_batch}) # Package data. results = nn_data_packager.package_data(results, nn_outputs_order) # Send data to the device. device.send_to_device(results)
def run_experiment(module_url, sample_num=100, run_num=3): times = [] with tf.Graph().as_default(): module = hub.Module(module_url) height, width = hub.get_expected_image_size(module) logger.info("model input should be ?x{}x{}".format(width, height)) input = tf.placeholder(shape=(None, height, width, 3), dtype=tf.float32) output = module(input) samples = np.random.rand(sample_num, height, width, 3) * 255 with tf.train.MonitoredSession() as sess: # warm-up logger.debug("warming up") for warmup_run in range(3): image = samples[0] image = np.expand_dims(image, axis=0) sess.run(output, feed_dict={input: image}) logger.debug("Warm up finished. Start running.") for run in range(run_num): for image in samples: image = np.expand_dims(image, axis=0) st = time.time() [_] = sess.run(output, feed_dict={input: image}) end = time.time() times.append((end - st) * 1000) times = np.array(times) logger.info( "Tested with {} images for {} runs. average time is {:.1f} ms, std is {:.1f} ms" .format(sample_num, run_num, np.mean(times), np.std(times)))
def calculate_embedding(images, shape): #bottleneck_tensor_size = 1024 height, width, color = shape x = tf.placeholder(tf.float32, [None, height, width, 3], name='Placeholder-x') resized_input_tensor = tf.reshape(x, [-1, height, width, 3]) #module = hub.Module("https://tfhub.dev/google/imagenet/resnet_v2_152/classification/1") module = hub.Module( "https://tfhub.dev/google/imagenet/resnet_v2_152/feature_vector/1") # num_features = 2048, height x width = 224 x 224 pixels assert height, width == hub.get_expected_image_size(module) bottleneck_tensor = module( resized_input_tensor) # Features with shape [batch_size, num_features] print('bottleneck_tensor:', bottleneck_tensor) with tf.Session() as sess: # Connect to the TF runtime. init = tf.global_variables_initializer() sess.run(init) # Randomly initialize weights. embedding = [bottleneck_tensor.eval(\ feed_dict={ x : images[i:i+1] })\ for i in range(len(images))] print(len(embedding)) return embedding
def extract_inception_bottleneck_features(images_train, images_test): # Combine train & test set to single set of images images = images_train + images_test print('Extracting inception bottleneck features...') module_spec = hub.load_module_spec(TFHUB_INCEPTION_V3_MODULE_SPEC_URL) module = hub.Module(module_spec) (image_height, image_width) = hub.get_expected_image_size(module) images = [tf.image.convert_image_dtype(x, tf.float32) for x in images] images = [ tf.image.resize_images(x, (image_height, image_width)) for x in images ] sess = tf.Session() m = hub.Module(module_spec) X = [] sess.run(tf.global_variables_initializer()) batches = [ images[i:i + BATCH_SIZE] for i in range(0, len(images), BATCH_SIZE) ] for batch in batches: bottleneck_tensors = m(batch) x_batch = sess.run(bottleneck_tensors) X.extend(x_batch) # Recover train & test set X_train = X[:len(images_train)] X_test = X[len(images_train):] return (X_train, X_test)
def model_fn(features, labels, mode, params): module = hub.Module("https://tfhub.dev/google/imagenet/inception_v3/classification/1") height, width = hub.get_expected_image_size(module) # Done here to get the summaries in the model_fn execution images = tf.map_fn( lambda i: parse_tfrecord_inception(params, i, width, height, is_training=False, use_summary=True)[0], features, dtype=tf.float32 ) tf.summary.image("final_image", images) logits = module(images) # [batch_size, height, width, 3] => [batch_size, num_classes] # Does nothing useful, just to run tensors through the graph loss = tf.reduce_mean(tf.layers.dense(images, 1)) train_op = tf.train.AdamOptimizer().minimize(loss, tf.train.get_global_step()) predictions = logits return tf.estimator.EstimatorSpec( loss=loss, mode=mode, train_op=train_op, predictions=predictions, )
def _add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph... Args: module_spec: The hub.ModuleSpec for the image module being used. Returns: Tensors for the node to feed JPEG data into, and the output of the preprocessing steps. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) # 3 # placeholder Tensor of any size, capable of taking current input.shape() = [?, image_height, image_width, num_channels=3] jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') # Decode a single JPEG-encoded image to a unit8 tensor, with the desired number of color channels (3 in this case) for decoded img: decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) # Insert a "batch dimension" of 1 to the existing decoded_image_as_float tensor so size is now: [1, ?, image_height, image_width, 3] decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) ''' Tensors are decoded and represented as 3-d unit8 tensors of shape [height, width, channels], that is shape=(3,) (see: https://www.tensorflow.org/api_guides/python/image). This tf.stack call seems to go from: [input_height=299, input_width=299] -> [input_height=299, input_width=299] with .shape == (2,) e.g. row vector I don't see why this call is here: ''' resize_shape = tf.stack([input_height, input_width]) # Switch back to int32, not sure why we do this, probably to save memory space? Float precision for [0-255] is unnecessary. resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) # resize the decoded image using bilinear interpolation, this produces shape (1, 299, 299, 3) at runtime for a single image. # I am not sure why this is needed for a scalar decoded image, although I see how this might be needed for a batch of images: resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def get_bottleneck_tensor(input_jpeg_str): # type: tf.Tensor -> tf.Tensor """Calculates the bottleneck tensor for input JPEG string tensor. This function will resize/encode the image as required by Inception V3 model. Then it will run it through the InceptionV3 checkpoint to calculate bottleneck values. Args: input_jpeg_str: Tensor for input JPEG image. Returns: bottleneck_tensor: Tensor for output bottleneck Tensor. """ module_spec = tensorflow_hub.load_module_spec(_FEATURE_VECTORS_MODULE_URL) input_height, input_width = tensorflow_hub.get_expected_image_size( module_spec) input_depth = tensorflow_hub.get_num_image_channels(module_spec) decoded_image = tf.image.decode_jpeg(input_jpeg_str, channels=input_depth) decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image_4d = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) m = tensorflow_hub.Module(module_spec) bottleneck_tensor = m(resized_image_4d) return bottleneck_tensor
def run(constant_overwrites): config_path = os.path.join(os.path.dirname(__file__), 'hyperparams.yml') constants = merge_dict(load_hyperparams(config_path), constant_overwrites) data_generator = ImageDataGenerator(rescale=1/255, rotation_range=90, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) print('Load', constants['module_spec']) module_spec = hub.load_module_spec(constants['module_spec']) image_size, _ = hub.get_expected_image_size(module_spec) # n_channels = hub.get_num_image_channels(module_spec) # project_dir = 'tmp/semihard_full_' + 'time:' + str(int(time()))[-3:] +\ # '/top:lambda:' + str(constants['lambda_reg']) +\ # 'margin:' + str(constants['tl_margin']) project_dir = '/Users/d777710/src/DeepLearning/vision' print('Project dir:', project_dir) _, _, bottleneck_config = get_bottleneck_config(os.path.join(project_dir, constants['bottleneck_dir']), os.path.join(project_dir, constants['splits_dir'])) bottleneck_flow_gen = ImageFlowGenerator(bottleneck_config, mode='bottleneck') constants.update({ 'train_dir': os.path.join(project_dir, constants['train_subdir']), 'top_model_dir': os.path.join(project_dir, constants['top_model_subdir']), 'val_dir': os.path.join(project_dir, constants['val_subdir']), 'top_model_val_dir': os.path.join(project_dir, constants['top_model_val_subdir']), 'data_flow_gen': bottleneck_flow_gen, 'eval_every_n_steps': 5, 'generator': data_generator, 'image_size': image_size }) model = SemiHardModel(constants, train_top_only=True) run_training(model, constants)
def get_pretrained_model(feature_extractor_url): # get classifier from tf hub without top layer NUM_CLASSES = 2 def feature_extractor(x): feature_extractor_module = hub.Module(feature_extractor_url) return feature_extractor_module(x) IMAGE_SIZE = hub.get_expected_image_size(hub.Module(feature_extractor_url)) features_extractor_layer = tf.keras.layers.Lambda(feature_extractor, input_shape=IMAGE_SIZE + [3]) features_extractor_layer.trainable = False # add classification layer to model model = tf.keras.Sequential([ features_extractor_layer, tf.keras.layers.Dense(NUM_CLASSES, activation='softmax') ]) model.summary() # variables have to be manually initialized this time (?) sess = K.get_session() init = tf.global_variables_initializer() sess.run(init) return model
def create_module_graph(module_spec): """Returns our graph , the bottleneck and the resized input tensor.""" height, width = hub.get_expected_image_size(module_spec) with tf.Graph().as_default() as graph: resized_input_tensor = tf.placeholder( tf.float32, [None, height, width, 3]) # Setting channels to 3. bottleneck_tensor = hub.Module(module_spec)(resized_input_tensor) return graph, bottleneck_tensor, resized_input_tensor
def runClass(classifier_url, image, runCount, k, threshold): # number of classes to return k = 1 # setup the path to the labels labels_path = tf.keras.utils.get_file( 'ImageNetLabels.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt' ) if ("mobilenet_v" in classifier_url): print("Mobilenet detected, using url and not module") IMAGE_SHAPE = (224, 224) classifier = tf.keras.Sequential( [hub.KerasLayer(classifier_url, input_shape=IMAGE_SHAPE + (3, ))]) elif ("inception_v3" in classifier_url): print("Inception_v3 detected, using url and not module") IMAGE_SHAPE = (299, 299) classifier = tf.keras.Sequential( [hub.KerasLayer(classifier_url, input_shape=IMAGE_SHAPE + (3, ))]) else: module = hub.Module(classifier_url) height, width = hub.get_expected_image_size(module) # this call does not work with tf2. You'll need to input the shape from the commandline IMAGE_SHAPE = (height, width) classifier = tf.keras.Sequential( [hub.KerasLayer(module, input_shape=IMAGE_SHAPE + (3, ))]) # setup for probability vs raw scores probability_model = tf.keras.Sequential( [classifier, tf.keras.layers.Softmax()]) theImage = Image.open(image).resize(IMAGE_SHAPE) theImage = np.array(theImage) / 255.0 print("First inference may be slower") for _ in range(runCount): start = time.perf_counter() result = probability_model.predict(theImage[np.newaxis, ...]) resultLength = len(result[0]) inference_time = time.perf_counter() - start print('%.1fms' % (inference_time * 1000)) predicted_class = np.argmax(result[0], axis=-1) imagenet_labels = np.array(open(labels_path).read().splitlines()) # some of the TF Models, e.g. efficientnet, assume the second line is index 0. # so we need to adjust offset = 0 if resultLength < len(imagenet_labels): offset = 1 predicted_class = predicted_class + 1 classes = get_output(result[0], k, threshold) print("") print('Label: Confidence') for klass in classes: pc = klass.id + offset print('%s: %.5f' % (imagenet_labels[pc], klass.score)) return
def runClass(classifier_url, image, runCount, k, threshold): # number of classes to return k = 1 # setup the path to the labels labels_path = tf.keras.utils.get_file( 'ImageNetLabels.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt' ) if ("mobilenet_v" in classifier_url): print("Mobilenet detected, using url and not module") IMAGE_SHAPE = (224, 224) classifier = tf.keras.Sequential( [hub.KerasLayer(classifier_url, input_shape=IMAGE_SHAPE + (3, ))]) elif ("inception_v3" in classifier_url): print("Inception_v3 detected, using url and not module") IMAGE_SHAPE = (299, 299) classifier = tf.keras.Sequential( [hub.KerasLayer(classifier_url, input_shape=IMAGE_SHAPE + (3, ))]) else: # WHen running with TF2, hub.Module and hub.get_expected_image_size(module) haven't worked # for me. Look at how mobilenet is used if usng TF 2. # Also think about to get the image sizes passed in. module = hub.Module(classifier_url) height, width = hub.get_expected_image_size(module) IMAGE_SHAPE = (height, width) classifier = tf.keras.Sequential( [hub.KerasLayer(module, input_shape=IMAGE_SHAPE + (3, ))]) # setup for probability vs raw scores probability_model = tf.keras.Sequential( [classifier, tf.keras.layers.Softmax()]) theImage = Image.open(image).resize(IMAGE_SHAPE) theImage = np.array(theImage) / 255.0 print("First inference may be slower") for _ in range(runCount): start = time.perf_counter() result = probability_model.predict(theImage[np.newaxis, ...]) resultLength = len(result[0]) inference_time = time.perf_counter() - start print('%.1fms' % (inference_time * 1000)) predicted_class = np.argmax(result[0], axis=-1) imagenet_labels = np.array(open(labels_path).read().splitlines()) classes = get_output(result[0], k, threshold) print("") print('Label: Confidence') for klass in classes: k = klass.id if ("efficient" in classifier_url): k = k + 1 print('%s: %.5f' % (imagenet_labels[k], klass.score)) return
def download_image_model(mdl_url): # type: str -> (tensorflow_hub.Module, int, int, int) """Returns the Tensorflow Hub model used to process images.""" module_spec = tensorflow_hub.load_module_spec(mdl_url) input_height, input_width = tensorflow_hub.get_expected_image_size( module_spec) input_depth = tensorflow_hub.get_num_image_channels(module_spec) m = tensorflow_hub.Module(module_spec) return (m, input_height, input_width, input_depth)
def create_module_graph(module_spec): height, width = hub.get_expected_image_size(module_spec) with tf.Graph().as_default() as graph: resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3]) m = hub.Module(module_spec) bottleneck_tensor = m(resized_input_tensor) wants_quantization = any(node.op in FAKE_QUANT_OPS for node in graph.as_graph_def().node) return graph, bottleneck_tensor, resized_input_tensor, wants_quantization
def classify_image(classifier, img): h, w = hub.get_expected_image_size(classifier) x = tf.placeholder(tf.float32, shape=(None, h, w, 3)) y = tf.nn.softmax(classifier(x)) data = transform.resize(img, [h, w]) with tf.Session().as_default() as sess: tf.global_variables_initializer().run() y_pred = sess.run(y, feed_dict={x: [data]}) return y_pred
def create_module_graph(module_spec): height, width = hub.get_expected_image_size(module_spec) with tf.Graph().as_default() as graph: resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3], name="ImageInput") m = hub.Module(module_spec) bottleneck_tensor = m(resized_input_tensor) #for op in graph.get_operations(): #print(str(op.name)) return graph, bottleneck_tensor, resized_input_tensor
def load_hub_weights(models): for alpha, rows in models: tf.reset_default_graph() print('alpha: ', alpha, 'rows: ', rows) WEIGHTS_SAVE_PATH_INCLUDE_TOP = '/home/jon/Documents/keras_mobilenetV2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' + str(rows) + '.h5' WEIGHTS_SAVE_PATH_NO_TOP = '/home/jon/Documents/keras_mobilenetV2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + \ str(alpha) + '_' + str(rows) + '_no_top' + '.h5' # Load tf stuff img = nets.utils.load_img('cat.png', target_size=256, crop_size=rows) img = (img / 128.0) - 1.0 inputs = tf.placeholder(tf.float32, [None, rows, rows, 3]) model = hub.Module( "https://tfhub.dev/google/imagenet/mobilenet_v2_" + map_alpha_to_slim(alpha) + "_" + str(rows) + "/classification/1") h, w = hub.get_expected_image_size(model) features = model(inputs, signature="image_classification", as_dict=True) probs = tf.nn.softmax(features['default']) # Load local model with tf.variable_scope('keras'): model2 = MobileNetV2(weights=None, alpha = alpha, input_shape=(rows, rows, 3)) model2.load_weights('./old_weights_nonhub/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + str(alpha) + '_' +str(rows) + '.h5') preds1 = model2.predict(img) print('preds1: (remote weights) new BN no set w:: ', nets.utils.decode_predictions(preds1)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) weights = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='module/MobilenetV2') values = sess.run(weights) values[-2] = np.delete(np.squeeze(values[-2]), 0, axis=-1) values[-1] = np.delete(values[-1], 0, axis=-1) sess.close() # Save weights no top and model model2.set_weights(values) model2.save_weights(WEIGHTS_SAVE_PATH_INCLUDE_TOP) model2_no_top = Model(input = model2.input, output = model2.get_layer('out_relu').output) model2_no_top.save_weights(WEIGHTS_SAVE_PATH_NO_TOP) # Predictions with new BN, new weights preds2 = model2.predict(img) print('preds2: (after set weights) ', nets.utils.decode_predictions(preds2))
def run(dataset_folder, network='inception_v3', batch_size=16): assert network in modules, 'Invalid network, pick one of %s' % list(modules.keys()) assert dataset_folder is not None with tf.Graph().as_default(): dataset = basename(dirname(dataset_folder)) filenames_output = os.path.join(dataset_folder, dataset + '_' + network + '_filenames.csv') labels_output = os.path.join(dataset_folder, dataset + '_' + network + '_labels.csv') features_output = os.path.join(dataset_folder, dataset + '_' + network + '_features.csv') module_url = modules[network] types = ('/*/*.jpg', '/*/*.png') filenames = [] for files in types: filenames.extend(glob.glob(dataset_folder + files)) pbar = tqdm(total=len(filenames)) labels = [basename(dirname(f)) for f in filenames] filenames = tf.constant(filenames) labels = tf.constant(labels) module_spec = hub.load_module_spec(module_url) output_size = module_spec.get_output_info_dict()['default'].get_shape()[1] height, width = hub.get_expected_image_size(module_spec) images, labels, files = input_fn(filenames, labels, [height, width], batch_size) features = np.empty((0, output_size), float) classes = np.empty(0, int) filenames = np.empty(0, str) network = hub.Module(module_spec) network = network(images), labels, files with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(tf.compat.v1.tables_initializer()) while True: try: x, y, f = sess.run(network) f = [basename(k) for k in f] filenames = np.append(filenames, f) classes = np.append(classes, y) features = np.append(features, x, axis=0) pbar.update(len(y)) except tf.errors.OutOfRangeError: break pbar.close() # pca = PCA(n_components=100, random_state=1) # features = pca.fit_transform(features) np.savetxt(filenames_output, filenames.astype(str), fmt='%s', delimiter=',') np.savetxt(labels_output, classes.astype(str), fmt='%s', delimiter=',') np.savetxt(features_output, features, delimiter=',')
def __init__(self, seg_width, seg_height): # make module g = tf.Graph() with g.as_default(): module = hub.Module(MODEL_URL) self.img_shape = tuple(hub.get_expected_image_size(module)) self.img_input = tf.placeholder( dtype=float, shape=[ tf.Dimension(None), tf.Dimension(self.img_shape[0]), tf.Dimension(self.img_shape[1]), tf.Dimension(3) ]) self.module_img = module(self.img_input) init_op = tf.group( [tf.global_variables_initializer(), tf.tables_initializer()]) g.finalize() # create session and initialize self.session = tf.Session(graph=g) self.session.run(init_op) # load or download label infomation self.label_info = None if os.path.exists(PICKLE_NAME): print("the pickle file has already existed!!") with open(PICKLE_NAME, "rb") as f: self.label_info = pickle.load(f) else: # download label data labels = requests.get(LABEL_URL) index_to_label = { i: label for i, label in enumerate(labels.text.split("\n")) } label_to_index = { label: i for i, label in enumerate(labels.text.split("\n")) } # save data self.label_info = { "idx2lab": index_to_label, "lab2idx": label_to_index } with open(PICKLE_NAME, "wb") as f: pickle.dump(self.label_info, f) # load image and segmentation self.original_img = cv2.imread(IMAGE_PATH) self.labels, self.num_labels = self.get_grid_labels( self.original_img, seg_width, seg_height) super(ImageClf, self).__init__()
def get_features_labels_pairs(dataset_dir, prepro_func=None, module=None, subset='train'): """Given an AutoDL dataset, get an generator that generates (example, labels) pairs, where `example` is preprocessed using `prepro_func` and `module`. """ batch_size = 100 if module is None: module_name = 'inception_v3' module = get_module(module_name) expected_image_size = hub.get_expected_image_size(module) prepro_func = get_prepro_func(expected_image_size=expected_image_size) if prepro_func is None: prepro_func = lambda x: x raw_dataset = dm.TFRecordFormatDataset(dataset_dir) autodl_dataset = raw_dataset.get_autodl_dataset(subset=subset) tfrecord_dataset = autodl_dataset.get_dataset() preprocessed_dataset = tfrecord_dataset.map( lambda *x: (prepro_func(x[0]), x[1])) preprocessed_dataset = preprocessed_dataset.batch(batch_size) iterator = preprocessed_dataset.make_one_shot_iterator() example, labels = iterator.get_next() logger.info("Example shape before applying pretrained model: {}"\ .format(example.shape)) example = module(example) logger.info("Example shape after applying pretrained model: {}"\ .format(example.shape)) li_examples = [] li_labels = [] count = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) while True: try: ele = sess.run((example, labels)) li_examples.append(ele[0]) if subset == 'train': label_confidence_pairs = [label_dense_to_sparse(x) for x in ele[1]] li_labels += label_confidence_pairs count += 1 if count % 10 == 1: logger.info("Preprocessed {} examples.".format(count * batch_size)) except tf.errors.OutOfRangeError: break if subset == 'test': li_labels = raw_dataset.get_test_labels() func = lambda li: ([x[0] for x in li], [x[1] for x in li]) li_labels = list(map(func, li_labels)) if li_labels is None: raise ValueError("No solution file found. " + "Please put one solution file at {}."\ .format(dataset_dir)) li_examples = np.concatenate(li_examples, axis=0) li_examples = [[x] for x in li_examples] generator = lambda:zip(li_examples, li_labels) return generator
def inception_v3(inputs, is_training): m4 = hub.Module( "https://tfhub.dev/google/imagenet/inception_v3/classification/1", tags={"train"}, trainable=True) height, width = hub.get_expected_image_size(m4) resize_inputs = tf.image.resize_images(inputs, [height, width]) net = resize_inputs net = m4(net) return net
def create_module_graph(module_spec): """Creates a graph and loads Hub Module into it. Args: module_spec: the hub.ModuleSpec for the image module being used. """ height, width = hub.get_expected_image_size(module_spec) with tf.Graph().as_default() as graph: resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3]) m = hub.Module(module_spec) final_tensor = m(resized_input_tensor) return graph, final_tensor, resized_input_tensor
def train_and_evaluate(output_dir, hparams): # Start up logging tf.logging.info('TF Version {}'.format(tf.__version__)) tf.logging.info('GPU Available {}'.format(tf.test.is_gpu_available())) if 'TF_CONFIG' in os.environ: tf.logging.info('TF_CONFIG: {}'.format(os.environ["TF_CONFIG"])) # Begin estimator definition, and train/evaluate run_config = tf.estimator.RunConfig(save_checkpoints_secs=EVAL_INTERVAL) # TODO: remove data_directory = get_data('/tmp', run_config.is_chief) model_directory = '/tmp/dogscats/run2' params = { 'module_spec': 'https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/1', 'module_name': 'resnet_v2_50', 'learning_rate': 1e-3, 'train_module': False, # Whether we want to finetune the module 'label_vocab': tf.gfile.ListDirectory(os.path.join(data_directory, 'valid')) } classifier = tf.estimator.Estimator( model_fn=model_fn, model_dir=model_directory, config=run_config, params=params ) input_img_size = hub.get_expected_image_size(hub.Module(params['module_spec'])) # Train train_files = os.path.join(data_directory, 'train', '**/*.jpg') def train_input_fn(): return make_dataset(train_files, image_size=input_img_size, batch_size=hparams['batch_size'], shuffle=True) train_spec = tf.estimator.TrainSpec( train_input_fn, max_steps=hparams['train_steps']) # Eval eval_files = os.path.join(data_directory, 'valid', '**/*.jpg') def eval_input_fn(): return make_dataset(eval_files, image_size=input_img_size, batch_size=hparams['batch_size']) eval_spec = tf.estimator.EvalSpec(eval_input_fn) tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
def build_graph(hub_module_url, target_image_path, Mypet_url): module = hub.Module(hub_module_url) height, width = hub.get_expected_image_size(module) # Copied a method of https://github.com/GoogleCloudPlatform/cloudml-samples/blob/bf0680726/flowers/trainer/model.py#L181 # and fixed for all type images (not only jpeg) def decode_and_resize(image_str_tensor): """Decodes jpeg string, resizes it and returns a uint8 tensor.""" # image = tf.image.decode_image(image_str_tensor, channels=CHANNELS) # # Note resize expects a batch_size, but tf_map supresses that index, # # thus we have to expand then squeeze. Resize returns float32 in the # # range [0, uint8_max] image = tf.expand_dims(image_str_tensor, 0) image = tf.compat.v1.image.resize_bilinear(image, [height, width], align_corners=False) image = tf.squeeze(image, [0]) image = tf.cast(image, dtype=tf.uint8) return image def to_img_feature(images): """Extract the feature of image vectors""" outputs = module(images, signature="image_feature_vector") return outputs # Step 2) Extract image features of the target image. target_image = decode_and_resize(target_image_path) target_image = tf.image.convert_image_dtype(target_image, dtype=tf.float32) target_image = tf.expand_dims(target_image, 0) target_image = to_img_feature(target_image) # Step 3) Extract image features of input images. input_images = [] for my_img in Mypet_url: input_image = decode_and_resize(my_img) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image = tf.expand_dims(input_image, 0) input_image = to_img_feature(input_image) input_images.append(input_image) similarities_1 = [] for input_image in input_images: # Step 4) Compare cosine_similarities of the target image and the input images. dot = tf.tensordot(target_image, tf.transpose(input_image), 1) similarity = dot / (tf.norm(target_image, axis=1) * tf.norm(input_image, axis=1)) similarity = tf.reshape(similarity, [-1]) similarities_1.append(similarity) return similarities_1
def mobilenet_module(): """Get a mobile net v1 module with. Returns: module: mobile net tf-hub module height: of input image width: of input image num_features: number of output features """ module = hub.Module( "https://tfhub.dev/google/imagenet/mobilenet_v1_100_224/feature_vector/1", trainable=False) height, width = hub.get_expected_image_size(module) num_features = 1024 return module, height, width, num_features
def add_jpeg_decoding(module_spec): input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def create_module_graph(module_spec): """Creates a graph and loads Hub Module into it. Args: module_spec: the hub.ModuleSpec for the image module being used. Returns: graph: the tf.Graph that was created. bottleneck_tensor: the bottleneck values output by the module. resized_input_tensor: the input images, resized as expected by the module. wants_quantization: a boolean, whether the module has been instrumented with fake quantization ops. """ height, width = hub.get_expected_image_size(module_spec) with tf.Graph().as_default() as graph: resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3]) m = hub.Module(module_spec) bottleneck_tensor = m(resized_input_tensor) wants_quantization = any(node.op in FAKE_QUANT_OPS for node in graph.as_graph_def().node) return graph, bottleneck_tensor, resized_input_tensor, wants_quantization
def add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph.. Args: module_spec: The hub.ModuleSpec for the image module being used. Returns: Tensors for the node to feed JPEG data into, and the output of the preprocessing steps. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): """Creates the operations to apply the specified distortions. During training it can help to improve the results if we run the images through simple distortions like crops, scales, and flips. These reflect the kind of variations we expect in the real world, and so can help train the model to cope with natural data more effectively. Here we take the supplied parameters and construct a network of operations to apply them to an image. Cropping ~~~~~~~~ Cropping is done by placing a bounding box at a random position in the full image. The cropping parameter controls the size of that box relative to the input image. If it's zero, then the box is the same size as the input and no cropping is performed. If the value is 50%, then the crop box will be half the width and height of the input. In a diagram it looks like this: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ Scaling ~~~~~~~ Scaling is a lot like cropping, except that the bounding box is always centered and its size varies randomly within the given range. For example if the scale percentage is zero, then the bounding box is the same size as the input and no scaling is applied. If it's 50%, then the bounding box will be in a random range between half the width and height and full size. Args: flip_left_right: Boolean whether to randomly mirror images horizontally. random_crop: Integer percentage setting the total margin used around the crop box. random_scale: Integer percentage of how much to vary the scale by. random_brightness: Integer range to randomly multiply the pixel values by. graph. module_spec: The hub.ModuleSpec for the image module being used. Returns: The jpeg input layer and the distorted result tensor. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(shape=[], minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, input_width) precrop_height = tf.multiply(scale_value, input_height) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) cropped_image = tf.random_crop(precropped_image_3d, [input_height, input_width, input_depth]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(shape=[], minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def convert_images(images_path, save_path, lab_to_int=None): """ Convert images into feature vectors and saves them in a pickle file. This function uses transfer learning. A pre-trained network is loaded and used. A dictionary mapping labels to integers can be passed in, or can be generated and returned. This is so it can be reused on other datasets. E.g. the training data may have more classes in than the test data, so this mapping needs to be created using the training data and then reused on the validation and test data. Args: images_path (string): Filepath of the directory containing the training images. The images must be in folders with the category names. A suitable file structure is shown below: |- images_path/ | |- category_1 | |- image_1.jpg | |- image_2.jpg | |- ... | |- category_2 | |- image_3.jpg | |- image_4.jpg | |- ... | |- ... save_path (string): Filepath to a pickle file that will be created by this function. lab_to_int (dict): Mapping from labels (strings) to integers. Optional argument. If provided, this dictionary will be used. If not provided, then this dictionary will be generated. Returns: A dictionary mapping from labels (strings) to integers. """ print('Converting images from: ' + images_path) # Convert each image to a feature vector feature_vectors = [] labels = [] if not lab_to_int: _, lab_to_int = enumerate_labels(images_path) with tf.Graph().as_default(): mod = hub.Module("https://tfhub.dev/google/imagenet/inception_v3/" "feature_vector/1") height, width = hub.get_expected_image_size(mod) # [batch_size, height, width, channels] images = tf.placeholder(tf.float32, shape=[1, height, width, 3], name='Input_images') # Features have shape [batch_size, num_features]. features = mod(images) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) for category_dir in os.scandir(images_path): label = os.path.basename(os.path.normpath(category_dir)) for image_path in os.scandir(category_dir): print(image_path.name) # img_num = 0 accounts for images with multiple frames image = imread(os.path.abspath(image_path), img_num=0) image = make_square(image) # Constant argument prevents deprecation warning image = resize(image, (height, width), anti_aliasing=True, mode='constant') image = np.expand_dims(image, axis=0) vec = sess.run(features, feed_dict={images: image}) feature_vectors.append(vec) labels.append(lab_to_int[label]) feature_vectors_array = np.concatenate(feature_vectors, axis=0) labels_array = np.array(labels) data = {'feature_vectors_array': feature_vectors_array, 'labels_array': labels_array, 'label_to_int': lab_to_int} with open(save_path, 'wb') as file: pickle.dump(data, file, pickle.HIGHEST_PROTOCOL) return lab_to_int