コード例 #1
0
def read_tensor(path: str, maxsize: int = None) -> typing.Optional[np.ndarray]:
    """
    Load a saved a tensor, saved either as an image file for standard RGB images or as a numpy archive for more general
    tensors.
    """
    path = file_util.cache_file(path)
    if maxsize is not None:
        if os.path.getsize(path) > maxsize:
            raise FileTooLargeError
    (_, ext) = os.path.splitext(path)
    ext = ext.lower()
    if ext in {'.png', '.jpg', '.jpeg'}:
        res = imread(path, as_gray=False, pilmode="RGB")
        assert len(res.shape) == 3
        assert res.shape[2] == 3
        return res
    elif ext in {'.npz'}:
        try:
            data = np.load(path)
            assert len(list(data.items())) == 1
        except Exception as e:
            logging.exception('Error unzipping %s' % path)
            return None
        return data['arr_0']
    else:
        raise RuntimeError('Extension %s for file %s not supported' %
                           (ext, path))
コード例 #2
0
 def _get_weights(self) -> str:
     suffixes = ['.index', '.meta', '.data-00000-of-00001']
     local_paths = [
         file_util.cache_file(self.save_dir +
                              'save.ckpt-%d' % self.iteration + suffix)
         for suffix in suffixes
     ]
     local_path = local_paths[0]
     return local_path[:local_path.rfind(suffixes[0])]
コード例 #3
0
def download_and_extract_tar(tarname: str,
                             extract_dir: str,
                             n_attempts: int = 100) -> None:
    print('.', end='', flush=True)
    logging.info('Downloading %s' % tarname)
    for attempt in range(n_attempts):
        try:
            cached_file = file_util.cache_file(tarname)
            break
        except FileNotFoundError:
            if attempt == n_attempts - 1:
                raise
            logging.exception('Download failed, retrying')
            time.sleep(10)
    file_util.extract_tarfile(cached_file, extract_dir)
    os.remove(cached_file)
コード例 #4
0
    def extract(self, pdf_path, output_dir, use_cache=True):
        """Return results from extracting a PDF with pdffigures2.

        :param str pdf_path: path to the PDF to extract.
        :param str output_dir: path to the output directory.
        :param bool use_cache: whether or not to use cached data from
          disk if it's available.

        :returns: results from running pdffigures2 on the PDF.
        """
        pdffigures_dir = os.path.join(output_dir, 'pdffigures/')
        if not os.path.exists(pdffigures_dir):
            os.makedirs(pdffigures_dir)

        success_file_path = os.path.join(pdffigures_dir, '_SUCCESS')
        error_file_path = os.path.join(pdffigures_dir, '_ERROR')

        pdffigures_jar_path = file_util.cache_file(
            settings.PDFFIGURES_JAR_PATH)

        if not os.path.exists(success_file_path) or not use_cache:
            try:
                subprocess.check_call(
                    'java'
                    ' -jar {pdffigures_jar_path}'
                    ' --figure-data-prefix {pdffigures_dir}'
                    ' --save-regionless-captions'
                    ' {pdf_path}'.format(
                        pdffigures_jar_path=pdffigures_jar_path,
                        pdf_path=pdf_path,
                        pdffigures_dir=pdffigures_dir),
                    shell=True)
            except subprocess.CalledProcessError:
                # Writes an error file to indicate that a problem occured
                with open(error_file_path, 'w') as f_out:
                    f_out.write('')
                # return nothing
                return

            # add a success file to verify that the operation completed
            with open(success_file_path, 'w') as f_out:
                f_out.write('')

        return file_util.read_json(
            os.path.join(pdffigures_dir,
                         os.path.basename(pdf_path)[:-4] + '.json'))