def get_unfused(self, labels, filenames, cv): skeldirfn = lambda loc: cv.meta.join(cv.skeleton.meta.skeleton_path, loc) filenames = [skeldirfn(loc) for loc in filenames] block_size = 50 if len(filenames) < block_size: blocks = [filenames] n_blocks = 1 else: n_blocks = max(len(filenames) // block_size, 1) blocks = scatter(filenames, n_blocks) all_skels = defaultdict(list) for filenames_block in tqdm(blocks, desc="Filename Block", total=n_blocks, disable=(not self.progress)): all_files = cv.skeleton.cache.download(filenames_block, progress=self.progress) for filename, content in tqdm(all_files.items(), desc="Unpickling Fragments", disable=(not self.progress)): fragment = pickle.loads(content) for label in labels: if label in fragment: all_skels[label].append(fragment[label]) return all_skels
def files_exist(self, file_paths): """ Threaded exists for all file paths. file_paths: (list) file paths to test for existence Returns: { filepath: bool } """ results = {} def exist_thunk(paths, interface): results.update(interface.files_exist(paths)) if len(self._threads): for block in scatter(file_paths, len(self._threads)): self.put(partial(exist_thunk, block)) else: exist_thunk(file_paths, self._interface) desc = 'Existence Testing' if self.progress else None self.wait(desc) return results
def files_exist(self, file_paths): """ Threaded exists for all file paths. file_paths: (list) file paths to test for existence Returns: { filepath: bool } """ results = {} def exist_thunk(paths): with self.get_connection() as conn: results.update(conn.files_exist(paths)) schedule_green_jobs( fns=(partial(exist_thunk, paths) for paths in scatter(file_paths, self.concurrency)), progress=('Existence Testing' if self.progress else None), concurrency=self.concurrency, total=len(file_paths), ) return results