Beispiel #1
0
def load_figures_json(filename: str) -> Dict[str, List[Figure]]:
    d = file_util.read_json(filename)
    res = {
        page: [Figure.from_dict(dict_fig) for dict_fig in page_dicts]
        for (page, page_dicts) in d.items()
    }
    return res
Beispiel #2
0
    def extract(self, pdf_path, output_dir, use_cache=True):
        """Return results from extracting a PDF with pdffigures2.

        :param str pdf_path: path to the PDF to extract.
        :param str output_dir: path to the output directory.
        :param bool use_cache: whether or not to use cached data from
          disk if it's available.

        :returns: results from running pdffigures2 on the PDF.
        """
        pdffigures_dir = os.path.join(output_dir, 'pdffigures/')
        if not os.path.exists(pdffigures_dir):
            os.makedirs(pdffigures_dir)

        success_file_path = os.path.join(pdffigures_dir, '_SUCCESS')
        error_file_path = os.path.join(pdffigures_dir, '_ERROR')

        pdffigures_jar_path = file_util.cache_file(
            settings.PDFFIGURES_JAR_PATH)

        if not os.path.exists(success_file_path) or not use_cache:
            try:
                subprocess.check_call(
                    'java'
                    ' -jar {pdffigures_jar_path}'
                    ' --figure-data-prefix {pdffigures_dir}'
                    ' --save-regionless-captions'
                    ' {pdf_path}'.format(
                        pdffigures_jar_path=pdffigures_jar_path,
                        pdf_path=pdf_path,
                        pdffigures_dir=pdffigures_dir),
                    shell=True)
            except subprocess.CalledProcessError:
                # Writes an error file to indicate that a problem occured
                with open(error_file_path, 'w') as f_out:
                    f_out.write('')
                # return nothing
                return

            # add a success file to verify that the operation completed
            with open(success_file_path, 'w') as f_out:
                f_out.write('')

        return file_util.read_json(
            os.path.join(pdffigures_dir,
                         os.path.basename(pdf_path)[:-4] + '.json'))
 def _get_hypes(self) -> dict:
     return file_util.read_json(os.path.join(self.save_dir, 'hypes.json'))
Beispiel #4
0
 def _get_hypes(self) -> dict:
     return file_util.read_json(self.save_dir + 'hypes.json')