def augment_input_data(tensor_dict, data_augmentation_options): """Applies data augmentation ops to input tensors. Args: tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. data_augmentation_options: A list of tuples, where each tuple contains a function and a dictionary that contains arguments and their values. Usually, this is the output of core/preprocessor.build. Returns: A dictionary of tensors obtained by applying data augmentation ops to the input tensor dictionary. """ tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tf.to_float(tensor_dict[fields.InputDataFields.image]), 0) include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) return tensor_dict
def _extract_prediction_tensors(model, create_input_dict_fn, data_preprocessing_steps, ignore_groundtruth=False, evaluate_with_lexicon=False): # input queue input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.to_float(input_dict[fields.InputDataFields.image]) original_image_shape = tf.shape(original_image) input_dict[fields.InputDataFields.image] = original_image # data preprocessing preprocessed_input_dict = preprocessor.preprocess(input_dict, data_preprocessing_steps) # model inference preprocessed_image = preprocessed_input_dict[fields.InputDataFields.image] preprocessed_image_shape = tf.shape(preprocessed_image) predictions_dict = model.predict(tf.expand_dims(preprocessed_image, 0)) recognitions = model.postprocess(predictions_dict) def _lexicon_search(lexicon, word): edit_distances = [] for lex_word in lexicon: edit_distances.append(editdistance.eval(lex_word.lower(), word.lower())) edit_distances = np.asarray(edit_distances, dtype=np.int) argmin = np.argmin(edit_distances) return lexicon[argmin] if evaluate_with_lexicon: lexicon = input_dict[fields.InputDataFields.lexicon] recognition_text = tf.py_func( _lexicon_search, [lexicon, recognitions['text'][0]], tf.string, stateful=False, ) else: recognition_text = recognitions['text'][0] tensor_dict = { 'original_image': original_image, 'original_image_shape': original_image_shape, 'preprocessed_image_shape': preprocessed_image_shape, 'filename': preprocessed_input_dict[fields.InputDataFields.filename], 'groundtruth_text': input_dict[fields.InputDataFields.groundtruth_text], 'recognition_text': recognition_text, } if 'control_points' in predictions_dict: tensor_dict.update({ 'control_points': predictions_dict['control_points'], 'rectified_images': predictions_dict['rectified_images'], 'generated_images': predictions_dict['generated_images'] }) return tensor_dict
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images tensor_dict[ fields.InputDataFields.groundtruth_oriented_boxes] = tf.reshape( tensor_dict[fields.InputDataFields.groundtruth_oriented_boxes], [-1, 4, 2]) if data_augmentation_options: tensor_dict = preprocessor.preprocess(tensor_dict, data_augmentation_options) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) if False: init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) tf.train.start_queue_runners(sess) result = sess.run(input_queue) import pdb pdb.set_trace() import pdb pdb.set_trace() return input_queue
def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) include_multiclass_scores = (fields.InputDataFields.multiclass_scores in tensor_dict) if data_augmentation_options: tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_label_weights=True, include_multiclass_scores=include_multiclass_scores, include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.to_float( tensor_dict[fields.InputDataFields.image]) tensor_dict = preprocessor.preprocess(tensor_dict, data_augmentation_options) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, detection_model, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options, image_path, seq_length=20): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() def _read_image(folder, im_names, groundtruth_boxes, seq_length=20): num_frames = len(im_names) size = 300 if num_frames >= seq_length: start_id = np.random.randint(0, num_frames - seq_length + 1) frame_ids = range(start_id, start_id + seq_length) else: frame_ids = np.random.randint(0, num_frames, seq_length) imgs = np.zeros([seq_length, size, size, 3], dtype=np.uint8) # imgs = list() for ind, frame_id in enumerate(frame_ids): img = Image.open( os.path.join(image_path + folder, im_names[frame_id] + '.JPEG')) img = img.resize(np.int32([size, size])) img = np.array(img).astype(np.uint8) if img.ndim < 3: img = np.repeat(np.expand_dims(img, axis=2), repeats=3, axis=2) imgs[ind] = img # imgs.append(img) groundtruth_boxes = groundtruth_boxes[frame_ids, :] groundtruth_classes = np.ones([seq_length, 1], dtype=np.float32) return imgs, groundtruth_boxes, groundtruth_classes # # sess = tf.Session() # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(coord=coord, sess=sess) # out_dict = sess.run(tensor_dict) # for i in range(100): # out_dict = sess.run(tensor_dict) # _read_image(out_dict['folder'], out_dict['filename'], out_dict['groundtruth_boxes'], seq_length) images, groundtruth_boxes, groundtruth_classes = tf.py_func( _read_image, [ tensor_dict['folder'], tensor_dict['filename'], tensor_dict['groundtruth_boxes'], seq_length ], [tf.uint8, tf.float32, tf.float32]) images.set_shape([seq_length, 300, 300, 3]) float_images = tf.to_float(images) groundtruth_boxes.set_shape([seq_length, 4]) groundtruth_classes.set_shape([seq_length, 1]) tensor_dict = dict() tensor_dict[fields.InputDataFields.image] = float_images tensor_dict[fields.InputDataFields.groundtruth_boxes] = groundtruth_boxes tensor_dict[ fields.InputDataFields.groundtruth_classes] = groundtruth_classes tensor_dicts = _split_tensor_dict(tensor_dict, seq_length) if data_augmentation_options: tensor_dicts = [ preprocess(tensor_dict.copy()) for tensor_dict in tensor_dicts ] for i in range(seq_length): tensor_dicts[i][fields.InputDataFields.image] = \ detection_model.preprocess(tensor_dicts[i][fields.InputDataFields.image]) tensor_dicts[i][fields.InputDataFields.groundtruth_classes].set_shape( [1, 1]) # tensor_dicts[i][fields.InputDataFields.filename].set_shape([1, 1]) tensor_dicts[i][fields.InputDataFields.groundtruth_boxes].set_shape( [1, 4]) concat_tensor_dict = _concat_tensor_dicts(tensor_dicts) batched_tensor = tf.train.batch(concat_tensor_dict, capacity=batch_queue_capacity, batch_size=batch_size_per_clone, num_threads=num_batch_queue_threads, dynamic_pad=True) dtypes = [t.dtype for t in batched_tensor.values()] shapes = [t.get_shape() for t in batched_tensor.values()] names = list(batched_tensor.keys()) prefetch_queue = tf.FIFOQueue(capacity=prefetch_queue_capacity, dtypes=dtypes, shapes=shapes, names=names) init_prefetch = prefetch_queue.enqueue(batched_tensor) tf.train.add_queue_runner( tf.train.QueueRunner(prefetch_queue, [init_prefetch] * num_batch_queue_threads)) # x = prefetch_queue.dequeue() # sess = tf.Session() # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess=sess, coord=coord) # def _normalize(x): # return (x-x.min()) / (x.max()-x.min()) # # fig = plt.figure() # for i in range(100): # plt.clf() # a = sess.run(x) # for j in range(2): # image = _normalize(a['image'][0,j]) # box = a['groundtruth_boxes'][0, j] * 300 # ax = plt.subplot(1,2,j+1) # plt.imshow(image) # ax.add_patch( # patches.Rectangle( # (box[1], box[0]), # (x,y) # box[3]-box[1], # width # box[2]-box[0], # height # fill=False, # edgecolor="red" # ) # ) return prefetch_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, detection_model, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, image_path): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() def _read_image(im_name): img = Image.open(os.path.join(image_path, im_name + '.JPEG')) max_size = max(img.width, img.height) if max_size > 500: scale = 500.0 / max_size img = img.resize(np.int32([img.width * scale, img.height * scale])) img = np.array(img) if img.ndim < 3: img = np.repeat(np.expand_dims(img, axis=2), repeats=3, axis=2) return img image = tf.py_func(_read_image, [tensor_dict[fields.InputDataFields.filename][0, 0]], [tf.uint8])[0] image.set_shape([None, None, 3]) image = tf.expand_dims(image, 0) float_image = tf.to_float(image) tensor_dict[fields.InputDataFields.image] = float_image seq_length = 2 tensor_dicts = [tensor_dict.copy()] * seq_length tensor_dicts = [preprocess(tensor_dict.copy()) for tensor_dict in tensor_dicts] for i in range(seq_length): tensor_dicts[i][fields.InputDataFields.image] = \ detection_model.preprocess(tensor_dicts[i][fields.InputDataFields.image]) tensor_dicts[i]['original_image'] = tensor_dicts[i][fields.InputDataFields.image] tensor_dicts[i][fields.InputDataFields.groundtruth_classes].set_shape([1, 1]) tensor_dicts[i][fields.InputDataFields.filename].set_shape([1, 1]) tensor_dicts[i][fields.InputDataFields.groundtruth_boxes].set_shape([1, 4]) # concat_tensor_dict = dict([(k, tf.concat([tensor_dicts[0][k], tensor_dicts[1][k]], axis=0)) # for k in tensor_dicts[0].keys() # if k is not fields.InputDataFields.filename]) concat_tensor_dict = _concat_tensor_dicts(tensor_dicts) # concat_tensor_dict[fields.InputDataFields.filename] = \ # tf.stack([tensor_dicts[0][fields.InputDataFields.filename], # tensor_dicts[1][fields.InputDataFields.filename]]) batched_tensor = tf.train.batch(concat_tensor_dict, capacity=batch_queue_capacity, batch_size=batch_size_per_clone, num_threads=num_batch_queue_threads, dynamic_pad=True ) dtypes = [t.dtype for t in batched_tensor.values()] shapes = [t.get_shape() for t in batched_tensor.values()] names = list(batched_tensor.keys()) prefetch_queue = tf.PaddingFIFOQueue(capacity=prefetch_queue_capacity, dtypes=dtypes, shapes=shapes, names=names) init_prefetch = prefetch_queue.enqueue(batched_tensor) tf.train.add_queue_runner(tf.train.QueueRunner(prefetch_queue, [init_prefetch] * num_batch_queue_threads)) return prefetch_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, detection_model, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, image_path): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() def _read_image(folder, im_names, groundtruth_boxes): num_frames = len(im_names) size = 300 seq_length = 2 frame_ids = np.random.randint(0, num_frames, seq_length) imgs = np.zeros([seq_length, size, size, 3], dtype=np.uint8) for ind, frame_id in enumerate(frame_ids): img = Image.open( os.path.join(image_path + folder, im_names[frame_id] + '.JPEG')) img = img.resize(np.int32([size, size])) img = np.array(img).astype(np.uint8) if img.ndim < 3: img = np.repeat(np.expand_dims(img, axis=2), repeats=3, axis=2) imgs[ind] = img # imgs.append(img) groundtruth_boxes = groundtruth_boxes[frame_ids, :] groundtruth_classes = np.ones([seq_length, 1], dtype=np.float32) return imgs, groundtruth_boxes, groundtruth_classes images, groundtruth_boxes, groundtruth_classes = tf.py_func( _read_image, [ tensor_dict['folder'], tensor_dict['filename'], tensor_dict['groundtruth_boxes'] ], [tf.uint8, tf.float32, tf.float32]) seq_length = 2 images.set_shape([seq_length, 300, 300, 3]) float_images = tf.to_float(images) groundtruth_boxes.set_shape([seq_length, 4]) groundtruth_classes.set_shape([seq_length, 1]) tensor_dict = dict() tensor_dict[fields.InputDataFields.image] = float_images tensor_dict[fields.InputDataFields.groundtruth_boxes] = groundtruth_boxes tensor_dict[ fields.InputDataFields.groundtruth_classes] = groundtruth_classes tensor_dicts = _split_tensor_dict(tensor_dict, seq_length) tensor_dicts = [ preprocess(tensor_dict.copy()) for tensor_dict in tensor_dicts ] for i in range(seq_length): tensor_dicts[i][fields.InputDataFields.image] = \ detection_model.preprocess(tensor_dicts[i][fields.InputDataFields.image]) tensor_dicts[i]['original_image'] = tensor_dicts[i][ fields.InputDataFields.image] tensor_dicts[i][fields.InputDataFields.groundtruth_classes].set_shape( [1, 1]) # tensor_dicts[i][fields.InputDataFields.filename].set_shape([1, 1]) tensor_dicts[i][fields.InputDataFields.groundtruth_boxes].set_shape( [1, 4]) concat_tensor_dict = _concat_tensor_dicts(tensor_dicts) batched_tensor = tf.train.batch(concat_tensor_dict, capacity=batch_queue_capacity, batch_size=batch_size_per_clone, num_threads=num_batch_queue_threads, dynamic_pad=True) dtypes = [t.dtype for t in batched_tensor.values()] shapes = [t.get_shape() for t in batched_tensor.values()] names = list(batched_tensor.keys()) prefetch_queue = tf.FIFOQueue(capacity=prefetch_queue_capacity, dtypes=dtypes, shapes=shapes, names=names) init_prefetch = prefetch_queue.enqueue(batched_tensor) tf.train.add_queue_runner( tf.train.QueueRunner(prefetch_queue, [init_prefetch] * num_batch_queue_threads)) return prefetch_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, detection_model, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options, image_path): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() def _read_image(im_name): img = Image.open(os.path.join(image_path, im_name + '.JPEG')) max_size = max(img.width, img.height) if max_size > 500: scale = 500.0 / max_size img = img.resize(np.int32([img.width * scale, img.height * scale])) img = np.array(img) if img.ndim < 3: img = np.repeat(np.expand_dims(img, axis=2), repeats=3, axis=2) elif img.shape[2] != 3: print('Warning: image %s has four channels\n' % (im_name)) return img image = tf.py_func(_read_image, [tensor_dict[fields.InputDataFields.filename][0, 0]], [tf.uint8])[0] image.set_shape([None, None, 3]) image = tf.expand_dims(image, 0) float_image = tf.to_float(image) tensor_dict[fields.InputDataFields.image] = float_image seq_length = 2 tensor_dicts = [tensor_dict.copy()] * seq_length if data_augmentation_options: tensor_dicts = [ preprocess(tensor_dict.copy()) for tensor_dict in tensor_dicts ] for i in range(seq_length): tensor_dicts[i][fields.InputDataFields.image] = \ detection_model.preprocess(tensor_dicts[i][fields.InputDataFields.image]) tensor_dicts[i][fields.InputDataFields.groundtruth_classes].set_shape( [1, 1]) tensor_dicts[i][fields.InputDataFields.filename].set_shape([1, 1]) tensor_dicts[i][fields.InputDataFields.groundtruth_boxes].set_shape( [1, 4]) # concat_tensor_dict = dict([(k, tf.concat([tensor_dicts[0][k], tensor_dicts[1][k]], axis=0)) # for k in tensor_dicts[0].keys() # if k is not fields.InputDataFields.filename]) concat_tensor_dict = _concat_tensor_dicts(tensor_dicts) # concat_tensor_dict[fields.InputDataFields.filename] = \ # tf.stack([tensor_dicts[0][fields.InputDataFields.filename], # tensor_dicts[1][fields.InputDataFields.filename]]) batched_tensor = tf.train.batch(concat_tensor_dict, capacity=batch_queue_capacity, batch_size=batch_size_per_clone, num_threads=num_batch_queue_threads, dynamic_pad=True) dtypes = [t.dtype for t in batched_tensor.values()] shapes = [t.get_shape() for t in batched_tensor.values()] names = list(batched_tensor.keys()) prefetch_queue = tf.FIFOQueue(capacity=prefetch_queue_capacity, dtypes=dtypes, shapes=shapes, names=names) init_prefetch = prefetch_queue.enqueue(batched_tensor) tf.train.add_queue_runner( tf.train.QueueRunner(prefetch_queue, [init_prefetch] * num_batch_queue_threads)) # x = prefetch_queue.dequeue() # sess = tf.Session() # coord = tf.train.Coordinator() # threads = tf.train.start_queue_runners(sess=sess, coord=coord) # def _normalize(x): # return (x-x.min()) / (x.max()-x.min()) # # fig = plt.figure() # for i in range(100): # plt.clf() # a = sess.run(x) # for j in range(2): # image = _normalize(a['image'][0,j]) # box = a['groundtruth_boxes'][0, j] * 300 # ax = plt.subplot(1,2,j+1) # plt.imshow(image) # ax.add_patch( # patches.Rectangle( # (box[1], box[0]), # (x,y) # box[3]-box[1], # width # box[2]-box[0], # height # fill=False, # edgecolor="red" # ) # ) return prefetch_queue