def extract(self, overwrite = False): """Extract all supported archives of a source. Arguments: overwrite: Flag whether to overwrite already extracted archives. Defaults to False. """ archives = self.get_archive_details() if len(archives) == 0: logging_warn("No supported archives for the dataset '{}' could be found.".format(self.dataset)) return # iterate through supported archives for name, params in archives.items(): target_path = os.path.join(self.paths["interim"], name) # skip or delete already extracted archive if os.path.isdir(target_path): if overwrite: logging_info("Archive '{}' already extracted. Deleting".format(params["name"]), end=" ... ") shutil.rmtree(target_path) logging("ok") else: logging_info("Archive '{}' already extracted. Skip.".format(params["name"])) continue # extract archive logging_info("Extracting archive '{}'".format(params["name"]), end=" ... ") extract_archive(params["path"], self.paths["interim"]) logging("ok")
def __convert_split(self, split_name, writer): """Convert a single dataset split. Arguments: split_name: Name of the dataset split. devkit_path: Path to the VOC devkit. writer: TFRecords file writer. """ devkit_path = os.path.join(self.paths["interim"], "VOCtrainval_11-May-2012", "VOCdevkit", "VOC2012") # load image ids if split_name == "train": imageset_filename = "train.txt" elif split_name == "validation": imageset_filename = "val.txt" path = os.path.join(devkit_path, "ImageSets", "Main", imageset_filename) with open(path, "r") as file: image_ids = file.readlines() image_ids = [image_id.strip() for image_id in image_ids] # process annotations logging_info("Processing and writing images ...") # iterate all images num_images = 0 annotations_path = os.path.join(devkit_path, "Annotations") images_path = os.path.join(devkit_path, "JPEGImages") for image_id in tqdm(image_ids, ascii=True): # meta information meta_url = "host.robots.ox.ac.uk" meta_time = datetime.utcnow().isoformat() meta_requester = "*****@*****.**" # read xml file xml_path = os.path.join(annotations_path, "{}.xml".format(image_id)) if not os.path.isfile(xml_path): continue xml_root = xml.etree.ElementTree.parse(xml_path).getroot() # check format of image filename = xml_root.findtext("filename") image_format = self.get_image_format_of_file(filename) if image_format is None: raise NotImplementedError( "The format of the file '{}' is currently not supported.". format(filename)) # read image size image_height = int(xml_root.find("size").findtext("height")) image_width = int(xml_root.find("size").findtext("width")) image_channels = int(xml_root.find("size").findtext("depth")) # read image image_path = os.path.join(images_path, filename) with tf.gfile.FastGFile(image_path, "rb") as file: image_raw_data = file.read() # read bounding boxes labels = [] bboxes = [[], [], [], []] for sobject in xml_root.findall("object"): label_name = sobject.find("name").text if label_name not in self.selected_labels: continue labels.append(self.selected_labels.index(label_name) + 1) bndbox = sobject.find("bndbox") bboxes[0].append(int(bndbox.find("ymin").text) / image_height) bboxes[1].append(int(bndbox.find("xmin").text) / image_width) bboxes[2].append(int(bndbox.find("ymax").text) / image_height) bboxes[3].append(int(bndbox.find("xmax").text) / image_width) if len(labels) == 0: continue # write sample writer.write_single_example({ "meta/url": writer.bytes_feature(meta_url), "meta/requester": writer.bytes_feature(meta_requester), "meta/time": writer.bytes_feature(meta_time), "image/filename": writer.bytes_feature(filename), "image/format": writer.bytes_feature(image_format), "image/encoded": writer.bytes_feature(image_raw_data), "image/width": writer.int64_feature(image_width), "image/height": writer.int64_feature(image_height), "image/channels": writer.int64_feature(image_channels), "image/shape": writer.int64_feature( (image_height, image_width, image_channels)), "image/object/bbox/label": writer.int64_feature(labels), "image/object/bbox/y_min": writer.float_feature(bboxes[0]), "image/object/bbox/x_min": writer.float_feature(bboxes[1]), "image/object/bbox/y_max": writer.float_feature(bboxes[2]), "image/object/bbox/x_max": writer.float_feature(bboxes[3]) }) num_images += 1 logging_info( "Successfully written {} image(s) to the TFRecords file.".format( num_images))
choices=["train", "validation"], help= "Calculate the RGB mean of a dataset split. The split must be converted to TFRecords." ) arguments = parser.parse_args() if not arguments.extract and arguments.convert is None and arguments.rgb_mean is None: parser.print_help() exit() # initialize dataset dataset = VOC2012() # extract dataset if arguments.extract: logging_info("Extract all archives.") dataset.extract(overwrite=True) # convert dataset if arguments.convert is not None: logging_info("Convert dataset split '{}'.".format(arguments.convert)) dataset.convert(arguments.convert) # calculate rgb mean if arguments.rgb_mean is not None: logging_info("Calculate RGB mean of the dataset split '{}'.".format( arguments.rgb_mean)) rgb_mean = dataset.calculate_rgb_mean(arguments.rgb_mean) logging_info("RGB mean is R = {:.2f}, G = {:.2f}, B = {:.2f}.".format( *rgb_mean))
required=True) argument_list.add_model_argument("The model used for training.", default=None, required=True) argument_list.add_model_name_argument("The exported model name.", required=True) argument_list.add_dataset_argument("The dataset used for training.", default=None) argument_list.add_tf_verbosity_argument("Tensorflow verbosity.", default="info") argument_list.add_tf_min_log_level_argument( "Tensorflow minimum log level.", default=3) arguments = argument_list.parse() # print some information logging_info("Image filename: {}".format( arguments.image_filename)) logging_info("Model: {}".format(arguments.model)) logging_info("Model name: {}".format( arguments.model_name)) logging_info("Dataset: {}".format(arguments.dataset)) logging_info("Tensorflow verbosity: {}".format( arguments.tf_verbosity)) logging_info("Tensorflow minimum log level: {}".format( arguments.tf_min_log_level)) should_continue = query_yes_no("Continue?", default="yes") if not should_continue: exit() # set verbosity of tensorflow tfu_set_logging(arguments.tf_verbosity,
run.set_config_value(arguments.content_weight, "training", "content_weight") run.set_config_value(arguments.content_layers, "training", "content_layers") run.set_config_value(arguments.style_layers, "training", "style_layers") run.set_config_value(arguments.style_weight, "training", "style_weight") run.set_config_value(arguments.map_channel_weight, "training", "map_channel_weight") run.set_config_value(arguments.num_phases, "training", "num_phases") run.set_config_value(arguments.device, "training", "device") run.set_config_value(arguments.save_interval, "output", "save_interval") run.set_config_value(arguments.plot_interval, "output", "plot_interval") run.save_config() # print some information logging_info( "Semantic Style Transfer after https://arxiv.org/abs/1603.01768.") logging_info("Input style file: {}".format(arguments.input_style_file)) logging_info("Input map file: {}".format(arguments.input_map_file)) logging_info("Output map file: {}".format(arguments.output_map_file)) logging_info("Output content file: {}".format( arguments.output_content_file)) logging_info("Content weight: {}".format(arguments.content_weight)) logging_info("Content layers: {}".format(arguments.content_layers)) logging_info("Style layers: {}".format(arguments.style_layers)) logging_info("Style weight: {}".format(arguments.style_weight)) logging_info("Map channel weight: {}".format( arguments.map_channel_weight)) logging_info("Num phases: {}".format(arguments.num_phases)) logging_info("Device: {}".format(arguments.device)) logging_info("Save interval: {}".format(arguments.save_interval or "At the end"))
if __name__ == "__main__": # parse arguments parser = argparse.ArgumentParser( description="Export the pre-trained weights of the VGG 19 network.") parser.add_argument( "--checkpoint-file", default=get_full_path("models", "vgg_19_imagenet", "vgg_19.ckpt"), type=str, required=False, help="Path to the checkpoint file to extract weights from.") arguments = parser.parse_args() # print some information logging_info("Export the pre-trained weights of the VGG 19 network.") logging_info("Checkpoint file: {}".format(arguments.checkpoint_file)) should_continue = query_yes_no("Continue?", default="yes") if not should_continue: exit() logging_info("Read weights original checkpoint file.") # initialize checkpoint reader reader = tf.train.NewCheckpointReader(arguments.checkpoint_file) # read weights data = {} for block_ii, num_convs in enumerate([2, 2, 4, 4, 4]): block_name = "conv{}".format(block_ii + 1)
argument_list.add_model_argument("The model used for training.", default=None, required=True) argument_list.add_model_name_argument("The exported model name.", required=True) argument_list.add_dataset_argument("The dataset used for training.", default=None) argument_list.add_tf_verbosity_argument("Tensorflow verbosity.", default="info") argument_list.add_tf_min_log_level_argument( "Tensorflow minimum log level.", default=3) arguments = argument_list.parse() # print some information if arguments.video_filename is not None: logging_info("Video filename: {}".format( arguments.video_filename)) logging_info("Model: {}".format(arguments.model)) logging_info("Model name: {}".format( arguments.model_name)) logging_info("Dataset: {}".format(arguments.dataset)) logging_info("Tensorflow verbosity: {}".format( arguments.tf_verbosity)) logging_info("Tensorflow minimum log level: {}".format( arguments.tf_min_log_level)) should_continue = query_yes_no("Continue?", default="yes") if not should_continue: exit() # set verbosity of tensorflow tfu_set_logging(arguments.tf_verbosity,
default="/gpu:0") argument_list.add_optimization_device_argument("Device for optimization.", default="/cpu:0") argument_list.add_tf_verbosity_argument("Tensorflow verbosity.", default="info") argument_list.add_tf_min_log_level_argument( "Tensorflow minimum log level.", default=3) arguments = argument_list.parse() # load run run = Run(run_id=arguments.run) if not run.open(): logging_error("There is no run '{}'.".format(arguments.run)) # print some information logging_info("Load run '{}'.".format(arguments.run)) logging_info("Model: {}".format( run.get_config_value("model", "name"))) logging_info("Dataset: {} {}".format( arguments.dataset, arguments.dataset_split)) logging_info("Preprocessing parallel calls: {}".format( arguments.num_parallel_calls)) logging_info("Prefetch buffer size: {}".format( arguments.prefetch_buffer_size)) logging_info("Batch size: {}".format( arguments.batch_size)) logging_info("Input device: {}".format( arguments.input_device)) logging_info("Inference device: {}".format( arguments.inference_device)) logging_info("Optimization device: {}".format(
"optimization_device") run.set_config_value(arguments.checkpoint_interval, "training", "checkpoint_interval") run.set_config_value(arguments.step_log_interval, "training", "step_log_interval") run.set_config_value(datetime.utcnow().isoformat(), "misc", "creation_date") run.set_config_value(np.__version__, "misc", "np_version") run.set_config_value(tf.__version__, "misc", "tf_version") run.set_config_value(arguments.tf_verbosity, "misc", "tf_verbosity") run.set_config_value(arguments.tf_min_log_level, "misc", "tf_min_log_level") run.save_config() # print some information logging_info("Initialized run '{}'.".format(run.id)) logging_info("Model: {}".format(arguments.model)) logging_info("Dataset: {} {}".format( arguments.dataset, arguments.dataset_split)) logging_info( "Number of classes: {}".format(dataset.num_classes + 1)) logging_info("Global random seed: {}".format( arguments.random_seed)) logging_info("Operation random seed: {}".format( arguments.op_random_seed)) logging_info("Preprocessing parallel calls: {}".format( arguments.num_parallel_calls)) logging_info("Prefetch buffer size: {}".format( arguments.prefetch_buffer_size)) logging_info("Shuffle buffer size: {}".format( arguments.shuffle_buffer_size))
def calculate_rgb_mean(self, split_name): """Calculate the RGB mean of all images in a dataset split. The split must be converted to a TFRecords file. Arguments: split_name: Name of the dataset split. Returns: Array of length 3 containing mean values of RGB channels. """ dataset_path = os.path.join(self.paths["processed"], "{}.tfrecords".format(split_name)) if not os.path.isfile(dataset_path): logging_warn("There is no TFRecords file for the split '{}'.".format(split_name)) # initialize reader for TFRecords files filename_queue = tf.train.string_input_producer([dataset_path], num_epochs=1) options = None if self.compression_type is not None: options = tf.python_io.TFRecordOptions(self.compression_type) reader = tf.TFRecordReader(options=options) # read image and format from file _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ "image/encoded": tf.FixedLenFeature(shape=(), dtype=tf.string), "image/format": tf.FixedLenFeature(shape=(), dtype=tf.string) }) # setup decoder decode_bmp_fn = lambda: tf.image.decode_bmp(features["image/encoded"], channels=3) decode_jpeg_fn = lambda: tf.image.decode_jpeg(features["image/encoded"], channels=3, dct_method="INTEGER_ACCURATE") decode_png_fn = lambda: tf.image.decode_png(features["image/encoded"], channels=3, dtype=tf.uint8) decoded_image = tf.case({ tf.equal(features["image/format"], "bmp"): decode_bmp_fn, tf.equal(features["image/format"], "jpeg"): decode_jpeg_fn, tf.equal(features["image/format"], "png"): decode_png_fn }, exclusive=True) # set minimum logging level for TensorFlow old_min_log_level = os.environ["TF_CPP_MIN_LOG_LEVEL"] if "TF_CPP_MIN_LOG_LEVEL" in os.environ else "" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" with tf.Session() as session: session.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) # start loading images coordinator = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=session, coord=coordinator) # decode images logging_info("Loading images from TFRecords file ...") num_samples = np.sum([1 for _ in tf.python_io.tf_record_iterator(dataset_path, options=options)]) images = [] for ii in tqdm(range(num_samples), ascii=True): image = session.run(decoded_image) images.append(image) coordinator.request_stop() coordinator.join(threads) # reset minimum log level for TensorFlow os.environ["TF_CPP_MIN_LOG_LEVEL"] = old_min_log_level # calculate RGB mean of images logging_info("Calculating RGB mean of dataset split ...") rgb_mean = np.zeros((3,), dtype=np.uint8) num_pixels = 0 for image in tqdm(images, ascii=True): rgb_mean = np.add(rgb_mean, np.sum(image, axis=(0, 1))) num_pixels += image.shape[0] * image.shape[1] rgb_mean = np.divide(rgb_mean, num_pixels) return rgb_mean
required=True) argument_list.add_model_name_argument("The output model name.", required=True) argument_list.add_tf_verbosity_argument("Tensorflow verbosity.", default="info") argument_list.add_tf_min_log_level_argument( "Tensorflow minimum log level.", default=3) arguments = argument_list.parse() # load run run = Run(run_id=arguments.run) if not run.open(): logging_error("There is no run '{}'.".format(arguments.run)) # print some information logging_info("Load run '{}'.".format(arguments.run)) logging_info("Model: {}".format( run.get_config_value("model", "name"))) logging_info("Model name: {}".format( arguments.model_name)) logging_info("Tensorflow verbosity: {}".format( arguments.tf_verbosity)) logging_info("Tensorflow minimum log level: {}".format( arguments.tf_min_log_level)) should_continue = query_yes_no("Continue?", default="yes") if not should_continue: exit() # set verbosity of tensorflow tfu_set_logging(arguments.tf_verbosity,