def extract_vggface2_dataset(input_dir, device_args={} ): multi_gpu = device_args.get('multi_gpu', False) cpu_only = device_args.get('cpu_only', False) input_path = Path(input_dir) if not input_path.exists(): raise ValueError('Input directory not found. Please ensure it exists.') bb_csv = input_path / 'loose_bb_train.csv' if not bb_csv.exists(): raise ValueError('loose_bb_train.csv found. Please ensure it exists.') bb_lines = bb_csv.read_text().split('\n') bb_lines.pop(0) bb_dict = {} for line in bb_lines: name, l, t, w, h = line.split(',') name = name[1:-1] l, t, w, h = [ int(x) for x in (l, t, w, h) ] bb_dict[name] = (l,t,w, h) output_path = input_path.parent / (input_path.name + '_out') dir_names = pathex.get_all_dir_names(input_path) if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) data = [] for dir_name in io.progress_bar_generator(dir_names, "Collecting"): cur_input_path = input_path / dir_name cur_output_path = output_path / dir_name if not cur_output_path.exists(): cur_output_path.mkdir(parents=True, exist_ok=True) input_path_image_paths = pathex.get_image_paths(cur_input_path) for filename in input_path_image_paths: filename_path = Path(filename) name = filename_path.parent.name + '/' + filename_path.stem if name not in bb_dict: continue l,t,w,h = bb_dict[name] if min(w,h) < 128: continue data += [ ExtractSubprocessor.Data(filename=filename,rects=[ (l,t,l+w,t+h) ], landmarks_accurate=False, force_output_path=cur_output_path ) ] face_type = FaceType.fromString('full_face') io.log_info ('Performing 2nd pass...') data = ExtractSubprocessor (data, 'landmarks', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() io.log_info ('Performing 3rd pass...') ExtractSubprocessor (data, 'final', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=None).run()
def dev_test1(input_dir): input_path = Path(input_dir) dir_names = pathex.get_all_dir_names(input_path) for dir_name in io.progress_bar_generator(dir_names, desc="Processing"): img_paths = pathex.get_image_paths(input_path / dir_name) for filename in img_paths: filepath = Path(filename) dflimg = DFLIMG.x(filepath) if dflimg is None: raise ValueError
def pack(samples_path): samples_dat_path = samples_path / packed_faceset_filename if samples_dat_path.exists(): io.log_info(f"{samples_dat_path} : file already exists !") io.input("Press enter to continue and overwrite.") as_person_faceset = False dir_names = pathex.get_all_dir_names(samples_path) if len(dir_names) != 0: as_person_faceset = io.input_bool(f"{len(dir_names)} subdirectories found, process as person faceset?", True) if as_person_faceset: image_paths = [] for dir_name in dir_names: image_paths += pathex.get_image_paths(samples_path / dir_name) else: image_paths = pathex.get_image_paths(samples_path) samples = samplelib.SampleLoader.load_face_samples(image_paths) samples_len = len(samples) samples_configs = [] for sample in io.progress_bar_generator (samples, "Processing"): sample_filepath = Path(sample.filename) sample.filename = sample_filepath.name if as_person_faceset: sample.person_name = sample_filepath.parent.name samples_configs.append ( sample.get_config() ) samples_bytes = pickle.dumps(samples_configs, 4) of = open(samples_dat_path, "wb") of.write ( struct.pack ("Q", PackedFaceset.VERSION ) ) of.write ( struct.pack ("Q", len(samples_bytes) ) ) of.write ( samples_bytes ) del samples_bytes #just free mem del samples_configs sample_data_table_offset = of.tell() of.write ( bytes( 8*(samples_len+1) ) ) #sample data offset table data_start_offset = of.tell() offsets = [] for sample in io.progress_bar_generator(samples, "Packing"): try: if sample.person_name is not None: sample_path = samples_path / sample.person_name / sample.filename else: sample_path = samples_path / sample.filename with open(sample_path, "rb") as f: b = f.read() offsets.append ( of.tell() - data_start_offset ) of.write(b) except: raise Exception(f"error while processing sample {sample_path}") offsets.append ( of.tell() ) of.seek(sample_data_table_offset, 0) for offset in offsets: of.write ( struct.pack("Q", offset) ) of.seek(0,2) of.close() for filename in io.progress_bar_generator(image_paths, "Deleting files"): Path(filename).unlink() if as_person_faceset: for dir_name in io.progress_bar_generator(dir_names, "Deleting dirs"): dir_path = samples_path / dir_name try: shutil.rmtree(dir_path) except: io.log_info (f"unable to remove: {dir_path} ")
def __init__(self, root_path, debug=False, batch_size=1, resolution=256, face_type=None, generators_count=4, data_format="NHWC", **kwargs): super().__init__(debug, batch_size) self.initialized = False dataset_path = root_path / 'AvatarOperatorDataset' if not dataset_path.exists(): raise ValueError(f'Unable to find {dataset_path}') chains_dir_names = pathex.get_all_dir_names(dataset_path) samples = SampleLoader.load(SampleType.FACE, dataset_path, subdirs=True) sample_idx_by_path = { sample.filename: i for i, sample in enumerate(samples) } kf_idxs = [] for chain_dir_name in chains_dir_names: chain_root_path = dataset_path / chain_dir_name subchain_dir_names = pathex.get_all_dir_names(chain_root_path) try: subchain_dir_names.sort(key=int) except: raise Exception( f'{chain_root_path} must contain only numerical name of directories' ) chain_samples = [] for subchain_dir_name in subchain_dir_names: subchain_root = chain_root_path / subchain_dir_name subchain_samples = [ sample_idx_by_path[image_path] for image_path in pathex.get_image_paths(subchain_root) \ if image_path in sample_idx_by_path ] if len(subchain_samples) < 3: raise Exception( f'subchain {subchain_dir_name} must contain at least 3 faces. If you delete this subchain, then th echain will be corrupted.' ) chain_samples += [subchain_samples] chain_samples_len = len(chain_samples) for i in range(chain_samples_len - 1): kf_idxs += [(chain_samples[i + 1][0], chain_samples[i][-1], chain_samples[i][:-1])] for i in range(1, chain_samples_len): kf_idxs += [(chain_samples[i - 1][-1], chain_samples[i][0], chain_samples[i][1:])] if self.debug: self.generators_count = 1 else: self.generators_count = max(1, generators_count) if self.debug: self.generators = [ ThisThreadGenerator( self.batch_func, (samples, kf_idxs, resolution, face_type, data_format)) ] else: self.generators = [SubprocessGenerator ( self.batch_func, (samples, kf_idxs, resolution, face_type, data_format), start_now=False ) \ for i in range(self.generators_count) ] SubprocessGenerator.start_in_parallel(self.generators) self.generator_counter = -1 self.initialized = True