def convert(self): config = self.configuration.get_config() data_dir = config.env.data_dir if self.args.mmf_data_folder: data_dir = self.args.mmf_data_folder bypass_checksum = False if self.args.bypass_checksum: bypass_checksum = bool(self.args.bypass_checksum) print(f"Data folder is {data_dir}") print(f"Zip path is {self.args.zip_file}") base_path = os.path.join(data_dir, "datasets", "hateful_memes", "defaults") images_path = os.path.join(base_path, "images") PathManager.mkdirs(images_path) if not bypass_checksum: self.checksum(self.args.zip_file, self.POSSIBLE_CHECKSUMS) src = self.args.zip_file print(f"Moving {src}") dest = images_path move(src, dest) print(f"Unzipping {src}") self.decompress_zip(dest, fname=os.path.basename(src), password=self.args.password) self.assert_files(images_path) annotations_path = os.path.join(base_path, "annotations") PathManager.mkdirs(annotations_path) annotations = self.JSONL_FILES for annotation in annotations: print(f"Moving {annotation}") src = os.path.join(images_path, "data", annotation) dest = annotations_path move(src, dest) images = self.IMAGE_FILES for image_file in images: src = os.path.join(images_path, "data", image_file) if PathManager.exists(src): print(f"Moving {image_file}") else: continue dest = images_path move(src, dest) if src.endswith(".tar.gz"): decompress(dest, fname=image_file, delete_original=False)
def convert(self): config = self.configuration.get_config() data_dir = config.env.data_dir if self.args.mmf_data_folder: data_dir = self.args.mmf_data_folder bypass_checksum = False if self.args.bypass_checksum: bypass_checksum = bool(self.args.bypass_checksum) print(f"Data folder is {data_dir}") print(f"Zip path is {self.args.zip_file}") base_path = data_dir images_path = os.path.join(base_path, "images") PathManager.mkdirs(images_path) move_dir = False if self.args.move: move_dir = bool(self.args.move) if not bypass_checksum: self.checksum(self.args.zip_file, self.POSSIBLE_CHECKSUMS) src = self.args.zip_file dest = images_path if move_dir: print(f"Moving {src}") move(src, dest) else: print(f"Copying {src}") copy(src, dest) print(f"Unzipping {src}") self.decompress_zip(dest, fname=os.path.basename(src), password=self.args.password) phase_one = self.assert_files(images_path) annotations_path = os.path.join(base_path, "annotations") PathManager.mkdirs(annotations_path) annotations = (self.JSONL_PHASE_ONE_FILES if phase_one is True else self.JSONL_PHASE_TWO_FILES) for annotation in annotations: print(f"Moving {annotation}") src = os.path.join(images_path, "data", annotation) dest = os.path.join(annotations_path, annotation) move(src, dest) images = self.IMAGE_FILES for image_file in images: src = os.path.join(images_path, "data", image_file) if PathManager.exists(src): print(f"Moving {image_file}") else: continue dest = os.path.join(images_path, image_file) move(src, dest) if src.endswith(".tar.gz"): decompress(dest, fname=image_file, delete_original=False)