def create_single_feature_maps_file_from_config(config, split_names): """ Reads the image files from the sub-directories given as split names. Creates the according feature map files in the top directory. """ images_top_directory = config.getDatasetImagesDirectoryPath() output_layer = config.getImageOutputLayer() feature_map_files = [] for split_name in split_names: directory_path = "/".join([images_top_directory, split_name]) image_paths = get_image_paths(directory_path) target_shape = config.getImageInputShape() image_features_size = config.getImageFeaturesSize() feature_map_file = create_single_feature_maps_file( output_layer, image_paths, images_top_directory, image_features_size, target_shape, split_name, config.run_opts) feature_map_files.append(feature_map_file) return feature_map_files
def create_many_attention_map_files_from_config(config, split_name): """ Reads the image files from the sub-directories given as split names. Creates the according feature map files in the top directory. """ target_shape = config.getImageInputShape() image_feature_size = config.getImageFeaturesSize() image_infix = get_infix_from_config(config, split_name) image_prefix = "COCO_" + image_infix bounding_boxes = load_prepared_boxes_json_from_config(config, split_name) boxes = calculate_metrics(bounding_boxes) boxes_by_id = collections.defaultdict(list) [boxes_by_id[box["image_id"]].append(box) for box in boxes] images_top_directory = config.getDatasetImagesDirectoryPath() image_paths = get_image_paths(to_split_dir(images_top_directory, split_name)) processables = to_processables(image_paths, boxes_by_id, target_shape, image_prefix, image_feature_size) preprocess_bounding_boxes(processables)
def get_image_ids(self): paths = get_image_paths(self.directory_path) return extract_to_image_ids_ordered(paths)
def main(): parser = ArgumentParser( "Prepare the MSCOCO dataset for training adn experiments") parser.add_argument("command", help="""One of [preprocess, featuremaps, boxes, all]. preprocess: Resizes images and stores them by image id in a TFRecord file featuremaps: Loads images from the TFRecords file and creates the feature maps using the visual model. Then stores the feature maps as individual files along with the images. bxoes: Create the bounding box files based on the dataset instances file. Required for experiment. all: All of the above""") parser.add_argument( "-c", "--configuration", help= "Determine a specific configuration to use. If not specified, the default is used." ) parser.add_argument("-d", "--dryrun", action="store_true") parser.add_argument("-b", "--batch_size", type=int) parser.add_argument( "-n", "--num_images", type=int, help= "The expected number of images in the TFRecord file. Will show a progress bar then." ) parser.add_argument( '-s', "--split_names", nargs='+', help="Specify the split names. Otherwise defaults to [train, validate]" ) run_opts = parser.parse_args() if run_opts.configuration: config = Configuration(run_opts.configuration) else: config = Configuration() config[OPTION_DRY_RUN] = run_opts.dryrun config["batch_size"] = run_opts.batch_size config["num_images"] = run_opts.num_images import os os.environ["CUDA_VISIBLE_DEVICES"] = str(config.getGpuDevices()) #os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' split_names = [SPLIT_TRAIN, SPLIT_VALIDATE] if run_opts.split_names: split_names = run_opts.split_names print("\nStarting image preparation: {}".format(run_opts.command)) directory_path = config.getDatasetImagesDirectoryPath() if run_opts.command in ["all", "preprocess"]: print("\nPerform preprocessing for splits: " + str(split_names)) for split_name in split_names: tfrecord_file = get_preprocessing_tfrecord_file( directory_path, split_name) if tfrecord_file: print( "Skip preprocessing for split '{}' because TFRecord file already exists at {}" .format(split_name, tfrecord_file)) else: target_shape = config.getImageInputShape() image_paths = get_image_paths( to_split_dir(directory_path, split_name)) preprocess_images_and_write_tfrecord(image_paths, directory_path, target_shape, split_name) if run_opts.command in ["all", "featuremaps"]: print("\nCreated feature map files for splits " + str(split_names)) for split_name in split_names: tfrecord_file = get_preprocessing_tfrecord_file( directory_path, split_name) if tfrecord_file: print( "Start feature map generation for split '{}' with TFRecord file found at {}" .format(split_name, tfrecord_file)) create_many_feature_map_files_from_config(config, split_name) else: print( "Cannot find TFRecord file for split '{}'. Please run 'preprocess' for the split and try again." ) if run_opts.command in ["all", "boxes"]: print("\nCreate bounding boxes for splits: " + str(split_names)) for split_name in split_names: create_many_attention_map_files_from_config(config, split_name)
def get_image_ids(self): paths = get_image_paths(self.directory_path, file_ending="bbx") return extract_to_image_ids_ordered(paths)