def test_get_nb_page(file_path): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) with pytest.raises(UnsupportedMimeType): manager.get_page_nb( file_path=file_path, file_ext='.bin' )
def test_get_nb_page() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH) assert nb_page == 2 nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH__ENCRYPTED) assert nb_page == 2 nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH__A4) assert nb_page == 2
def test_page_number__extension_forced(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb( file_path=IMAGE_FILE_PATH_NO_EXTENSION, file_ext=".txt" ) assert page_number == 1
def pdf_data_to_thumbnails_by_preview_generator(pdf_data, page=None, width_max=256, height_max=256): """A more robust preview generator than imagemagick (wand). :param page: an int for one page or a list of ints for multiple pages :return: dict map from page number to encoded image of that page. """ # Installation: # - pip install preview-generator # - pakges to install: perl-image-exiftool, inskcape, scribus # Testcase: # - http://arxiv.org/abs/1612.01033v2 # - where preview_generator succeed but wand failed. if not isinstance(page, (tuple, list)): page_list = [page] else: page_list = page if page_list is None: page_list = list(range(num_pages)) from preview_generator.manager import PreviewManager cache_dir = tempfile.mkdtemp(prefix='preview-cache-') try: # save pdf fd, pdf_path = tempfile.mkstemp(dir=cache_dir) os.close(fd) with open(pdf_path, 'wb') as f: f.write(pdf_data) manager = PreviewManager(cache_dir, create_folder=True) num_pages = manager.get_page_nb(pdf_path) rst = {} for page in page_list: if not (0 <= page < num_pages): continue preview_path = manager.get_jpeg_preview(pdf_path, width=width_max, height=height_max, page=page) with open(preview_path, 'rb') as f: rst[page] = f.read() finally: shutil.rmtree(cache_dir) return rst
def pdf_data_to_thumbnails_by_preview_generator(pdf_data: bytes, pages: List[int], width_max: int, height_max: int): """ Convert pdf data to images with preview generator, which is sometimes more robust. :param page: an int for one page or a list of ints for multiple pages :return: dict map from page number to encoded image of that page. """ # Installation: # - pip install preview-generator # - pakges to install: perl-image-exiftool, inskcape, scribus # Testcase: # - http://arxiv.org/abs/1612.01033v2 # - where preview_generator succeed but wand failed. from preview_generator.manager import PreviewManager cache_dir = tempfile.mkdtemp(prefix="preview-cache-") try: # save pdf fd, pdf_path = tempfile.mkstemp(dir=cache_dir) os.close(fd) with open(pdf_path, "wb") as f: f.write(pdf_data) manager = PreviewManager(cache_dir, create_folder=True) num_pages = manager.get_page_nb(pdf_path) rst = {} for page in pages: if not (0 <= page < num_pages): continue preview_path = manager.get_jpeg_preview(pdf_path, width=width_max, height=height_max, page=page) with open(preview_path, "rb") as f: data = f.read() if len(data) == 0: raise ValueError("preview_generator gives zero-sized image") rst[page] = data finally: shutil.rmtree(cache_dir) return rst
def get_page_nb(): jsonRequest = request.json if 'path' not in jsonRequest: raise ValueError('path required') else: source_document = request.json['path'] print('source_document: ', source_document) cache_path = '/var/preview-cache' manager = PreviewManager('/var/preview-cache/', create_folder=True) get_page_nb = manager.get_page_nb(source_document) response = app.response_class( response=json.dumps({"pages": get_page_nb}), status=200, mimetype='application/json' ) return response
def test_get_nb_page() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb( file_path=IMAGE_FILE_PATH ) # FIXME must add parameter force=True/False in the API assert nb_page == 1
def get_one(self, file_id, revision_id=None): file_id = int(file_id) cache_path = CFG.get_instance().PREVIEW_CACHE_DIR preview_manager = PreviewManager(cache_path, create_folder=True) user = tmpl_context.current_user workspace = tmpl_context.workspace current_user_content = Context(CTX.CURRENT_USER, current_user=user).toDict(user) current_user_content.roles.sort(key=lambda role: role.workspace.name) content_api = ContentApi(user, show_archived=True, show_deleted=True) if revision_id: file = content_api.get_one_from_revision(file_id, self._item_type, workspace, revision_id) else: file = content_api.get_one(file_id, self._item_type, workspace) revision_id = file.revision_id file_path = content_api.get_one_revision_filepath(revision_id) nb_page = 0 enable_pdf_buttons = False # type: bool preview_urls = [] try: nb_page = preview_manager.get_page_nb(file_path=file_path) for page in range(int(nb_page)): url_str = '/previews/{}/pages/{}?revision_id={}' url = url_str.format(file_id, page, revision_id) preview_urls.append(url) enable_pdf_buttons = \ preview_manager.has_pdf_preview(file_path=file_path) except PreviewGeneratorException as e: # INFO - A.P - Silently intercepts preview exception # As preview generation isn't mandatory, just register it logger.debug(self, 'Preview Generator Exception: {}'.format(e.__str__)) except Exception as e: # INFO - D.A - 2017-08-11 - Make Tracim robust to pg exceptions # Preview generator may potentially raise any type of exception # so we prevent user interface crashes by catching all exceptions logger.error( self, 'Preview Generator Generic Exception: {}'.format(e.__str__)) pdf_available = 'true' if enable_pdf_buttons else 'false' # type: str fake_api_breadcrumb = self.get_breadcrumb(file_id) fake_api_content = DictLikeClass(breadcrumb=fake_api_breadcrumb, current_user=current_user_content) fake_api = Context(CTX.FOLDER, current_user=user)\ .toDict(fake_api_content) dictified_file = Context(self._get_one_context, current_user=user).toDict(file, 'file') result = DictLikeClass(result=dictified_file, fake_api=fake_api, nb_page=nb_page, url=preview_urls, pdf_available=pdf_available) return result
def test_page_number__no_extension() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb(file_path=HTML_FILE_PATH_NO_EXTENSION) assert page_number == 7
def test_page_number() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb(file_path=HTML_FILE_PATH) assert page_number == 7
def test_get_nb_page_no_extension() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH_NO_EXTENSION, file_ext=ODT_FILE_EXT) assert nb_page == 2
def test_get_nb_page() -> None: manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) nb_page = manager.get_page_nb(file_path=ODP_FILE_PATH) assert nb_page == 5
def test_page_number(): manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True) page_number = manager.get_page_nb( file_path=IMAGE_FILE_PATH, ) assert page_number == 1