def testRandomCropToAsepctRatio_custom(self, i=0): root_path = "/home/wenxiang/Documents/test/" image_file_path = root_path + "1.jpg" reader = tf.read_file(image_file_path) images = tf.image.decode_jpeg(reader) images = tf.expand_dims(images, 0) images = tf.image.convert_image_dtype(images, dtype=tf.float32) boxes = tf.constant( [[0.00, 0.00, 0.20, 0.20], [0.00, 0.80, 0.20, 1.00], [0.80, 0.00, 1.00, 0.20], [0.80, 0.80, 1.00, 1.00], [0.40, 0.40, 0.60, 0.60]], dtype=tf.float32) # labels = self.createTestLabels() labels = tf.constant([1, 2, 2, 3, 4], dtype=tf.int32) # TO DO # Test random_crop (with blackout) Seperate test case? tensor_dict = { fields.InputDataFields.image: images, fields.InputDataFields.groundtruth_boxes: boxes, fields.InputDataFields.groundtruth_classes: labels, } tensor_dict = preprocessor.preprocess(tensor_dict, []) images = tensor_dict[fields.InputDataFields.image] preprocessing_options = [(preprocessor.random_crop_image, {})] cropped_tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options) cropped_images = cropped_tensor_dict[fields.InputDataFields.image] def _write_jpeg(filename, img): img = tf.squeeze(img) img = tf.image.convert_image_dtype(img, dtype=tf.uint8) img = tf.image.encode_jpeg(img, format='rgb', quality=100) file_path = root_path + "result/" + filename + ".jpg" fwrite_op = tf.write_file(file_path, img) return fwrite_op run_op = [] write_op = _write_jpeg("test_" + str(i), cropped_images) run_op.append(write_op) with self.test_session() as sess: sess.run(run_op)
def augment_input_data(tensor_dict, data_augmentation_options): """Applies data augmentation ops to input tensors. Args: tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. data_augmentation_options: A list of tuples, where each tuple contains a function and a dictionary that contains arguments and their values. Usually, this is the output of core/preprocessor.build. Returns: A dictionary of tensors obtained by applying data augmentation ops to the input tensor dictionary. """ tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tf.to_float(tensor_dict[fields.InputDataFields.image]), 0) include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) return tensor_dict
def augment_input_data(tensor_dict, data_augmentation_options): tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0) include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) include_label_weights = (fields.InputDataFields.groundtruth_weights in tensor_dict) include_label_confidences = (fields.InputDataFields.groundtruth_confidences in tensor_dict) include_multiclass_scores = (fields.InputDataFields.multiclass_scores in tensor_dict) tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_label_weights=include_label_weights, include_label_confidences=include_label_confidences, include_multiclass_scores=include_multiclass_scores, include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) return tensor_dict
def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images # for audio input tensor_dict[fields.InputDataFields.audio] = tf.expand_dims( tensor_dict[fields.InputDataFields.audio], 0) audios = tensor_dict[fields.InputDataFields.audio] float_audios = tf.to_float(audios) tensor_dict[fields.InputDataFields.audio] = float_audios include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) if data_augmentation_options: tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() # def func(x): # import ipdb; ipdb.set_trace() # return x # # img = tensor_dict[fields.InputDataFields.image] # img = tf.py_func( # func, # [img], # tf.float32, # ) img = tf.sparse_tensor_to_dense(img) tensor_dict[fields.InputDataFields.image] = tf.expand_dims(img, 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images if data_augmentation_options: tensor_dict = preprocessor.preprocess(tensor_dict, data_augmentation_options) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) include_multiclass_scores = (fields.InputDataFields.multiclass_scores in tensor_dict) if data_augmentation_options: tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_label_weights=True, include_multiclass_scores=include_multiclass_scores, include_instance_masks=include_instance_masks, include_keypoints=include_keypoints)) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images next_images = tensor_dict.get(fields.InputDataFields.next_image) if next_images is not None: next_float_images = tf.to_float(next_images) tensor_dict[fields.InputDataFields.next_image] = next_float_images if data_augmentation_options: # TODO handle next_image, depth and flow to re-enable augmentations tensor_dict = preprocessor.preprocess(tensor_dict, data_augmentation_options) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. #how to set up clones ???????????? create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[ fields.InputDataFields. image] = tf.expand_dims( #expand images daata , acrually etract data tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float( images) #not much turning the image data in to fload tensor_dict[fields.InputDataFields. image] = float_images #put that in to tensor dict if data_augmentation_options: #here we will pre process tensor_dict = preprocessor.preprocess( tensor_dict, #return tensor_dict: which contains the preprocessed images, bounding boxes, etc. data_augmentation_options) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue
def augment_input_data(tensor_dict, data_augmentation_options): """Applies data augmentation ops to input tensors. Args: tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. data_augmentation_options: A list of tuples, where each tuple contains a function and a dictionary that contains arguments and their values. Usually, this is the output of core/preprocessor.build. Returns: A dictionary of tensors obtained by applying data augmentation ops to the input tensor dictionary. """ tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0) include_instance_masks = (fields.InputDataFields.groundtruth_instance_masks in tensor_dict) include_keypoints = (fields.InputDataFields.groundtruth_keypoints in tensor_dict) include_keypoint_visibilities = ( fields.InputDataFields.groundtruth_keypoint_visibilities in tensor_dict) include_label_weights = (fields.InputDataFields.groundtruth_weights in tensor_dict) include_label_confidences = (fields.InputDataFields.groundtruth_confidences in tensor_dict) include_multiclass_scores = (fields.InputDataFields.multiclass_scores in tensor_dict) dense_pose_fields = [fields.InputDataFields.groundtruth_dp_num_points, fields.InputDataFields.groundtruth_dp_part_ids, fields.InputDataFields.groundtruth_dp_surface_coords] include_dense_pose = all(field in tensor_dict for field in dense_pose_fields) tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_label_weights=include_label_weights, include_label_confidences=include_label_confidences, include_multiclass_scores=include_multiclass_scores, include_instance_masks=include_instance_masks, include_keypoints=include_keypoints, include_keypoint_visibilities=include_keypoint_visibilities, include_dense_pose=include_dense_pose)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) return tensor_dict
def build(input_reader_config, model_config, lstm_config, unroll_length, data_augmentation_options=None, batch_size=1): """Builds a tensor dictionary based on the InputReader config. Args: input_reader_config: An input_reader_builder.InputReader object. model_config: A model.proto object containing the config for the desired DetectionModel. lstm_config: LSTM specific configs. unroll_length: Unrolled length for LSTM training. data_augmentation_options: A list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). batch_size: Batch size for queue outputs. Returns: A dictionary of tensors based on items in the input_reader_config. Raises: ValueError: On invalid input reader proto. ValueError: If no input paths are specified. """ if not isinstance(input_reader_config, input_reader_pb2.InputReader): raise ValueError('input_reader_config not of type ' 'input_reader_pb2.InputReader.') external_reader_config = input_reader_config.external_input_reader external_input_reader_config = external_reader_config.Extensions[ input_reader_google_pb2.GoogleInputReader.google_input_reader] input_reader_type = external_input_reader_config.WhichOneof('input_reader') if input_reader_type == 'tf_record_video_input_reader': config = external_input_reader_config.tf_record_video_input_reader reader_type_class = tf.TFRecordReader else: raise ValueError( 'Unsupported reader in input_reader_config: %s' % input_reader_type) if not config.input_path: raise ValueError('At least one input path must be specified in ' '`input_reader_config`.') key, value = parallel_reader.parallel_read( config.input_path[:], # Convert `RepeatedScalarContainer` to list. reader_class=reader_type_class, num_epochs=(input_reader_config.num_epochs if input_reader_config.num_epochs else None), num_readers=input_reader_config.num_readers, shuffle=input_reader_config.shuffle, dtypes=[tf.string, tf.string], capacity=input_reader_config.queue_capacity, min_after_dequeue=input_reader_config.min_after_dequeue) # TODO(yinxiao): Add loading instance mask option. decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder() keys_to_decode = [ fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_classes ] tensor_dict = decoder.decode(value, items=keys_to_decode) tensor_dict['image'].set_shape([None, None, None, 3]) tensor_dict['groundtruth_boxes'].set_shape([None, None, 4]) height = model_config.ssd.image_resizer.fixed_shape_resizer.height width = model_config.ssd.image_resizer.fixed_shape_resizer.width # If data augmentation is specified in the config file, the preprocessor # will be called here to augment the data as specified. Most common # augmentations include horizontal flip and cropping. if data_augmentation_options: images_pre = tf.split( tensor_dict['image'], config.video_length, axis=0) bboxes_pre = tf.split( tensor_dict['groundtruth_boxes'], config.video_length, axis=0) labels_pre = tf.split( tensor_dict['groundtruth_classes'], config.video_length, axis=0) images_proc, bboxes_proc, labels_proc = [], [], [] cache = preprocessor_cache.PreprocessorCache() for i, _ in enumerate(images_pre): image_dict = { fields.InputDataFields.image: images_pre[i], fields.InputDataFields.groundtruth_boxes: tf.squeeze(bboxes_pre[i], axis=0), fields.InputDataFields.groundtruth_classes: tf.squeeze(labels_pre[i], axis=0), } image_dict = preprocessor.preprocess( image_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map(), preprocess_vars_cache=cache) # Pads detection count to _PADDING_SIZE. image_dict[fields.InputDataFields.groundtruth_boxes] = tf.pad( image_dict[fields.InputDataFields.groundtruth_boxes], [[0, _PADDING_SIZE], [0, 0]]) image_dict[fields.InputDataFields.groundtruth_boxes] = tf.slice( image_dict[fields.InputDataFields.groundtruth_boxes], [0, 0], [_PADDING_SIZE, -1]) image_dict[fields.InputDataFields.groundtruth_classes] = tf.pad( image_dict[fields.InputDataFields.groundtruth_classes], [[0, _PADDING_SIZE]]) image_dict[fields.InputDataFields.groundtruth_classes] = tf.slice( image_dict[fields.InputDataFields.groundtruth_classes], [0], [_PADDING_SIZE]) images_proc.append(image_dict[fields.InputDataFields.image]) bboxes_proc.append( image_dict[fields.InputDataFields.groundtruth_boxes]) labels_proc.append( image_dict[fields.InputDataFields.groundtruth_classes]) tensor_dict['image'] = tf.concat(images_proc, axis=0) tensor_dict['groundtruth_boxes'] = tf.stack(bboxes_proc, axis=0) tensor_dict['groundtruth_classes'] = tf.stack(labels_proc, axis=0) else: # Pads detection count to _PADDING_SIZE per frame. tensor_dict['groundtruth_boxes'] = tf.pad( tensor_dict['groundtruth_boxes'], [[0, 0], [0, _PADDING_SIZE], [0, 0]]) tensor_dict['groundtruth_boxes'] = tf.slice( tensor_dict['groundtruth_boxes'], [0, 0, 0], [-1, _PADDING_SIZE, -1]) tensor_dict['groundtruth_classes'] = tf.pad( tensor_dict['groundtruth_classes'], [[0, 0], [0, _PADDING_SIZE]]) tensor_dict['groundtruth_classes'] = tf.slice( tensor_dict['groundtruth_classes'], [0, 0], [-1, _PADDING_SIZE]) tensor_dict['image'], _ = preprocessor.resize_image( tensor_dict['image'], new_height=height, new_width=width) num_steps = config.video_length / unroll_length init_states = { 'lstm_state_c': tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]), 'lstm_state_h': tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]), 'lstm_state_step': tf.constant(num_steps, shape=[]), } batch = sqss.batch_sequences_with_states( input_key=key, input_sequences=tensor_dict, input_context={}, input_length=None, initial_states=init_states, num_unroll=unroll_length, batch_size=batch_size, num_threads=batch_size, make_keys_unique=True, capacity=batch_size * batch_size) return _build_training_batch_dict(batch, unroll_length, batch_size)
def build(input_reader_config, model_config, lstm_config, unroll_length, data_augmentation_options=None, batch_size=1): """Builds a tensor dictionary based on the InputReader config. Args: input_reader_config: An input_reader_builder.InputReader object. model_config: A model.proto object containing the config for the desired DetectionModel. lstm_config: LSTM specific configs. unroll_length: Unrolled length for LSTM training. data_augmentation_options: A list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). batch_size: Batch size for queue outputs. Returns: A dictionary of tensors based on items in the input_reader_config. Raises: ValueError: On invalid input reader proto. ValueError: If no input paths are specified. """ if not isinstance(input_reader_config, input_reader_pb2.InputReader): raise ValueError('input_reader_config not of type ' 'input_reader_pb2.InputReader.') external_reader_config = input_reader_config.external_input_reader google_input_reader_config = external_reader_config.Extensions[ input_reader_google_pb2.GoogleInputReader.google_input_reader] input_reader_type = google_input_reader_config.WhichOneof('input_reader') if input_reader_type == 'tf_record_video_input_reader': config = google_input_reader_config.tf_record_video_input_reader reader_type_class = tf.TFRecordReader else: raise ValueError( 'Unsupported reader in input_reader_config: %s' % input_reader_type) if not config.input_path: raise ValueError('At least one input path must be specified in ' '`input_reader_config`.') key, value = parallel_reader.parallel_read( config.input_path[:], # Convert `RepeatedScalarContainer` to list. reader_class=reader_type_class, num_epochs=(input_reader_config.num_epochs if input_reader_config.num_epochs else None), num_readers=input_reader_config.num_readers, shuffle=input_reader_config.shuffle, dtypes=[tf.string, tf.string], capacity=input_reader_config.queue_capacity, min_after_dequeue=input_reader_config.min_after_dequeue) # TODO(yinxiao): Add loading instance mask option. decoder = tf_sequence_example_decoder.TFSequenceExampleDecoder() keys_to_decode = [ fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_classes ] tensor_dict = decoder.decode(value, items=keys_to_decode) tensor_dict['image'].set_shape([None, None, None, 3]) tensor_dict['groundtruth_boxes'].set_shape([None, None, 4]) height = model_config.ssd.image_resizer.fixed_shape_resizer.height width = model_config.ssd.image_resizer.fixed_shape_resizer.width # If data augmentation is specified in the config file, the preprocessor # will be called here to augment the data as specified. Most common # augmentations include horizontal flip and cropping. if data_augmentation_options: images_pre = tf.split(tensor_dict['image'], config.video_length, axis=0) bboxes_pre = tf.split( tensor_dict['groundtruth_boxes'], config.video_length, axis=0) labels_pre = tf.split( tensor_dict['groundtruth_classes'], config.video_length, axis=0) images_proc, bboxes_proc, labels_proc = [], [], [] cache = preprocessor_cache.PreprocessorCache() for i, _ in enumerate(images_pre): image_dict = { fields.InputDataFields.image: images_pre[i], fields.InputDataFields.groundtruth_boxes: tf.squeeze(bboxes_pre[i], axis=0), fields.InputDataFields.groundtruth_classes: tf.squeeze(labels_pre[i], axis=0), } image_dict = preprocessor.preprocess( image_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map(), preprocess_vars_cache=cache) # Pads detection count to _PADDING_SIZE. image_dict[fields.InputDataFields.groundtruth_boxes] = tf.pad( image_dict[fields.InputDataFields.groundtruth_boxes], [[0, _PADDING_SIZE], [0, 0]]) image_dict[fields.InputDataFields.groundtruth_boxes] = tf.slice( image_dict[fields.InputDataFields.groundtruth_boxes], [0, 0], [_PADDING_SIZE, -1]) image_dict[fields.InputDataFields.groundtruth_classes] = tf.pad( image_dict[fields.InputDataFields.groundtruth_classes], [[0, _PADDING_SIZE]]) image_dict[fields.InputDataFields.groundtruth_classes] = tf.slice( image_dict[fields.InputDataFields.groundtruth_classes], [0], [_PADDING_SIZE]) images_proc.append(image_dict[fields.InputDataFields.image]) bboxes_proc.append(image_dict[fields.InputDataFields.groundtruth_boxes]) labels_proc.append(image_dict[fields.InputDataFields.groundtruth_classes]) tensor_dict['image'] = tf.concat(images_proc, axis=0) tensor_dict['groundtruth_boxes'] = tf.stack(bboxes_proc, axis=0) tensor_dict['groundtruth_classes'] = tf.stack(labels_proc, axis=0) else: # Pads detection count to _PADDING_SIZE per frame. tensor_dict['groundtruth_boxes'] = tf.pad( tensor_dict['groundtruth_boxes'], [[0, 0], [0, _PADDING_SIZE], [0, 0]]) tensor_dict['groundtruth_boxes'] = tf.slice( tensor_dict['groundtruth_boxes'], [0, 0, 0], [-1, _PADDING_SIZE, -1]) tensor_dict['groundtruth_classes'] = tf.pad( tensor_dict['groundtruth_classes'], [[0, 0], [0, _PADDING_SIZE]]) tensor_dict['groundtruth_classes'] = tf.slice( tensor_dict['groundtruth_classes'], [0, 0], [-1, _PADDING_SIZE]) tensor_dict['image'], _ = preprocessor.resize_image( tensor_dict['image'], new_height=height, new_width=width) num_steps = config.video_length / unroll_length init_states = { 'lstm_state_c': tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]), 'lstm_state_h': tf.zeros([height / 32, width / 32, lstm_config.lstm_state_depth]), 'lstm_state_step': tf.constant(num_steps, shape=[]), } batch = sqss.batch_sequences_with_states( input_key=key, input_sequences=tensor_dict, input_context={}, input_length=None, initial_states=init_states, num_unroll=unroll_length, batch_size=batch_size, num_threads=batch_size, make_keys_unique=True, capacity=batch_size * batch_size) return _build_training_batch_dict(batch, unroll_length, batch_size)
def augment_input_data(tensor_dict, data_augmentation_options): """Applies data augmentation ops to input tensors. Args: tensor_dict: A dictionary of input tensors keyed by fields.InputDataFields. data_augmentation_options: A list of tuples, where each tuple contains a function and a dictionary that contains arguments and their values. Usually, this is the output of core/preprocessor.build. Returns: A dictionary of tensors obtained by applying data augmentation ops to the input tensor dictionary. """ tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tf.cast(tensor_dict[fields.InputDataFields.image], dtype=tf.float32), 0) tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_bel_O], 0) tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_bel_F], 0) tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_z_max_detections], 0) tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_z_min_observations], 0) tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_bel_U], 0) tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_z_min_detections], 0) tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor], 0) tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.expand_dims( tensor_dict[fields.InputDataFields.groundtruth_intensity], 0) include_label_weights = (fields.InputDataFields.groundtruth_weights in tensor_dict) include_label_confidences = (fields.InputDataFields.groundtruth_confidences in tensor_dict) include_multiclass_scores = (fields.InputDataFields.multiclass_scores in tensor_dict) tensor_dict = preprocessor.preprocess( tensor_dict, data_augmentation_options, func_arg_map=preprocessor.get_default_func_arg_map( include_label_weights=include_label_weights, include_label_confidences=include_label_confidences, include_multiclass_scores=include_multiclass_scores)) tensor_dict[fields.InputDataFields.image] = tf.squeeze( tensor_dict[fields.InputDataFields.image], axis=0) tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=0) tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_bel_F], axis=0) tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_z_max_detections], axis=0) tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_z_min_observations], axis=0) tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_bel_U], axis=0) tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_z_min_detections], axis=0) tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor], axis=0) tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.squeeze( tensor_dict[fields.InputDataFields.groundtruth_intensity], axis=0) return tensor_dict
def _create_input_queue(batch_size_per_clone, create_tensor_dict_fn, batch_queue_capacity, num_batch_queue_threads, prefetch_queue_capacity, data_augmentation_options, ignore_options=None, mtl_window=False, mtl_edgemask=False): """Sets up reader, prefetcher and returns input queue. Args: batch_size_per_clone: batch size to use per clone. create_tensor_dict_fn: function to create tensor dictionary. batch_queue_capacity: maximum number of elements to store within a queue. num_batch_queue_threads: number of threads to use for batching. prefetch_queue_capacity: maximum capacity of the queue used to prefetch assembled batches. data_augmentation_options: a list of tuples, where each tuple contains a data augmentation function and a dictionary containing arguments and their values (see preprocessor.py). ignore_options: exception condition of training loss Returns: input queue: a batcher.BatchQueue object holding enqueued tensor_dicts (which hold images, boxes and targets). To get a batch of tensor_dicts, call input_queue.Dequeue(). """ tensor_dict = create_tensor_dict_fn() tensor_dict[fields.InputDataFields.image] = tf.expand_dims( tensor_dict[fields.InputDataFields.image], 0) images = tensor_dict[fields.InputDataFields.image] float_images = tf.to_float(images) tensor_dict[fields.InputDataFields.image] = float_images preprocessor.make_ignore_list(tensor_dict, ignore_options) if mtl_window: for option in data_augmentation_options: if 'random_horizontal_flip' in option[0].func_name: option[1][fields.InputDataFields.window_boxes] = tensor_dict[ fields.InputDataFields.window_boxes] if mtl_edgemask: for option in data_augmentation_options: if 'random_horizontal_flip' in option[0].func_name: option[1][ fields.InputDataFields. groundtruth_edgemask_masks] = tensor_dict[ fields.InputDataFields.groundtruth_edgemask_masks] if data_augmentation_options: tensor_dict = preprocessor.preprocess(tensor_dict, data_augmentation_options, mtl_window=mtl_window, mtl_edgemask=mtl_edgemask) input_queue = batcher.BatchQueue( tensor_dict, batch_size=batch_size_per_clone, batch_queue_capacity=batch_queue_capacity, num_batch_queue_threads=num_batch_queue_threads, prefetch_queue_capacity=prefetch_queue_capacity) return input_queue