def load_figures_json(filename: str) -> Dict[str, List[Figure]]: d = file_util.read_json(filename) res = { page: [Figure.from_dict(dict_fig) for dict_fig in page_dicts] for (page, page_dicts) in d.items() } return res
def extract(self, pdf_path, output_dir, use_cache=True): """Return results from extracting a PDF with pdffigures2. :param str pdf_path: path to the PDF to extract. :param str output_dir: path to the output directory. :param bool use_cache: whether or not to use cached data from disk if it's available. :returns: results from running pdffigures2 on the PDF. """ pdffigures_dir = os.path.join(output_dir, 'pdffigures/') if not os.path.exists(pdffigures_dir): os.makedirs(pdffigures_dir) success_file_path = os.path.join(pdffigures_dir, '_SUCCESS') error_file_path = os.path.join(pdffigures_dir, '_ERROR') pdffigures_jar_path = file_util.cache_file( settings.PDFFIGURES_JAR_PATH) if not os.path.exists(success_file_path) or not use_cache: try: subprocess.check_call( 'java' ' -jar {pdffigures_jar_path}' ' --figure-data-prefix {pdffigures_dir}' ' --save-regionless-captions' ' {pdf_path}'.format( pdffigures_jar_path=pdffigures_jar_path, pdf_path=pdf_path, pdffigures_dir=pdffigures_dir), shell=True) except subprocess.CalledProcessError: # Writes an error file to indicate that a problem occured with open(error_file_path, 'w') as f_out: f_out.write('') # return nothing return # add a success file to verify that the operation completed with open(success_file_path, 'w') as f_out: f_out.write('') return file_util.read_json( os.path.join(pdffigures_dir, os.path.basename(pdf_path)[:-4] + '.json'))
def _get_hypes(self) -> dict: return file_util.read_json(os.path.join(self.save_dir, 'hypes.json'))
def _get_hypes(self) -> dict: return file_util.read_json(self.save_dir + 'hypes.json')