def read_tensor(path: str, maxsize: int = None) -> typing.Optional[np.ndarray]: """ Load a saved a tensor, saved either as an image file for standard RGB images or as a numpy archive for more general tensors. """ path = file_util.cache_file(path) if maxsize is not None: if os.path.getsize(path) > maxsize: raise FileTooLargeError (_, ext) = os.path.splitext(path) ext = ext.lower() if ext in {'.png', '.jpg', '.jpeg'}: res = imread(path, as_gray=False, pilmode="RGB") assert len(res.shape) == 3 assert res.shape[2] == 3 return res elif ext in {'.npz'}: try: data = np.load(path) assert len(list(data.items())) == 1 except Exception as e: logging.exception('Error unzipping %s' % path) return None return data['arr_0'] else: raise RuntimeError('Extension %s for file %s not supported' % (ext, path))
def _get_weights(self) -> str: suffixes = ['.index', '.meta', '.data-00000-of-00001'] local_paths = [ file_util.cache_file(self.save_dir + 'save.ckpt-%d' % self.iteration + suffix) for suffix in suffixes ] local_path = local_paths[0] return local_path[:local_path.rfind(suffixes[0])]
def download_and_extract_tar(tarname: str, extract_dir: str, n_attempts: int = 100) -> None: print('.', end='', flush=True) logging.info('Downloading %s' % tarname) for attempt in range(n_attempts): try: cached_file = file_util.cache_file(tarname) break except FileNotFoundError: if attempt == n_attempts - 1: raise logging.exception('Download failed, retrying') time.sleep(10) file_util.extract_tarfile(cached_file, extract_dir) os.remove(cached_file)
def extract(self, pdf_path, output_dir, use_cache=True): """Return results from extracting a PDF with pdffigures2. :param str pdf_path: path to the PDF to extract. :param str output_dir: path to the output directory. :param bool use_cache: whether or not to use cached data from disk if it's available. :returns: results from running pdffigures2 on the PDF. """ pdffigures_dir = os.path.join(output_dir, 'pdffigures/') if not os.path.exists(pdffigures_dir): os.makedirs(pdffigures_dir) success_file_path = os.path.join(pdffigures_dir, '_SUCCESS') error_file_path = os.path.join(pdffigures_dir, '_ERROR') pdffigures_jar_path = file_util.cache_file( settings.PDFFIGURES_JAR_PATH) if not os.path.exists(success_file_path) or not use_cache: try: subprocess.check_call( 'java' ' -jar {pdffigures_jar_path}' ' --figure-data-prefix {pdffigures_dir}' ' --save-regionless-captions' ' {pdf_path}'.format( pdffigures_jar_path=pdffigures_jar_path, pdf_path=pdf_path, pdffigures_dir=pdffigures_dir), shell=True) except subprocess.CalledProcessError: # Writes an error file to indicate that a problem occured with open(error_file_path, 'w') as f_out: f_out.write('') # return nothing return # add a success file to verify that the operation completed with open(success_file_path, 'w') as f_out: f_out.write('') return file_util.read_json( os.path.join(pdffigures_dir, os.path.basename(pdf_path)[:-4] + '.json'))