Exemple #1
0
def test_get_nb_page(file_path):
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    with pytest.raises(UnsupportedMimeType):
        manager.get_page_nb(
            file_path=file_path,
            file_ext='.bin'
        )
Exemple #2
0
def test_get_nb_page() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH)
    assert nb_page == 2
    nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH__ENCRYPTED)
    assert nb_page == 2
    nb_page = manager.get_page_nb(file_path=PDF_FILE_PATH__A4)
    assert nb_page == 2
def test_page_number__extension_forced():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(
        file_path=IMAGE_FILE_PATH_NO_EXTENSION,
        file_ext=".txt"
    )
    assert page_number == 1
Exemple #4
0
def pdf_data_to_thumbnails_by_preview_generator(pdf_data,
                                                page=None,
                                                width_max=256,
                                                height_max=256):
    """A more robust preview generator than imagemagick (wand).
    
    :param page: an int for one page or a list of ints for multiple pages

    :return: dict map from page number to encoded image of that page.
    """
    # Installation:
    #    - pip install preview-generator
    #    - pakges to install: perl-image-exiftool, inskcape, scribus
    # Testcase:
    #    - http://arxiv.org/abs/1612.01033v2
    #    - where preview_generator succeed but wand failed.

    if not isinstance(page, (tuple, list)):
        page_list = [page]
    else:
        page_list = page

    if page_list is None:
        page_list = list(range(num_pages))

    from preview_generator.manager import PreviewManager

    cache_dir = tempfile.mkdtemp(prefix='preview-cache-')
    try:
        # save pdf
        fd, pdf_path = tempfile.mkstemp(dir=cache_dir)
        os.close(fd)
        with open(pdf_path, 'wb') as f:
            f.write(pdf_data)

        manager = PreviewManager(cache_dir, create_folder=True)
        num_pages = manager.get_page_nb(pdf_path)

        rst = {}
        for page in page_list:
            if not (0 <= page < num_pages):
                continue
            preview_path = manager.get_jpeg_preview(pdf_path,
                                                    width=width_max,
                                                    height=height_max,
                                                    page=page)
            with open(preview_path, 'rb') as f:
                rst[page] = f.read()
    finally:
        shutil.rmtree(cache_dir)

    return rst
Exemple #5
0
def pdf_data_to_thumbnails_by_preview_generator(pdf_data: bytes,
                                                pages: List[int],
                                                width_max: int,
                                                height_max: int):
    """
    Convert pdf data to images with preview generator, which is sometimes more robust.

    :param page: an int for one page or a list of ints for multiple pages
    :return: dict map from page number to encoded image of that page.
    """
    # Installation:
    #    - pip install preview-generator
    #    - pakges to install: perl-image-exiftool, inskcape, scribus
    # Testcase:
    #    - http://arxiv.org/abs/1612.01033v2
    #    - where preview_generator succeed but wand failed.
    from preview_generator.manager import PreviewManager

    cache_dir = tempfile.mkdtemp(prefix="preview-cache-")
    try:
        # save pdf
        fd, pdf_path = tempfile.mkstemp(dir=cache_dir)
        os.close(fd)
        with open(pdf_path, "wb") as f:
            f.write(pdf_data)

        manager = PreviewManager(cache_dir, create_folder=True)
        num_pages = manager.get_page_nb(pdf_path)

        rst = {}

        for page in pages:
            if not (0 <= page < num_pages):
                continue
            preview_path = manager.get_jpeg_preview(pdf_path,
                                                    width=width_max,
                                                    height=height_max,
                                                    page=page)
            with open(preview_path, "rb") as f:
                data = f.read()
            if len(data) == 0:
                raise ValueError("preview_generator gives zero-sized image")
            rst[page] = data
    finally:
        shutil.rmtree(cache_dir)

    return rst
def get_page_nb():

    jsonRequest = request.json
    if 'path' not in jsonRequest:
        raise ValueError('path required')
    else:
        source_document = request.json['path']
        print('source_document: ', source_document)

    cache_path = '/var/preview-cache'
    manager = PreviewManager('/var/preview-cache/', create_folder=True)
    get_page_nb = manager.get_page_nb(source_document)

    response = app.response_class(
        response=json.dumps({"pages": get_page_nb}),
        status=200,
        mimetype='application/json'
    )
    return response
Exemple #7
0
def test_get_nb_page() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(
        file_path=IMAGE_FILE_PATH
    )  # FIXME must add parameter force=True/False in the API
    assert nb_page == 1
Exemple #8
0
    def get_one(self, file_id, revision_id=None):
        file_id = int(file_id)
        cache_path = CFG.get_instance().PREVIEW_CACHE_DIR
        preview_manager = PreviewManager(cache_path, create_folder=True)
        user = tmpl_context.current_user
        workspace = tmpl_context.workspace
        current_user_content = Context(CTX.CURRENT_USER,
                                       current_user=user).toDict(user)
        current_user_content.roles.sort(key=lambda role: role.workspace.name)
        content_api = ContentApi(user, show_archived=True, show_deleted=True)
        if revision_id:
            file = content_api.get_one_from_revision(file_id, self._item_type,
                                                     workspace, revision_id)
        else:
            file = content_api.get_one(file_id, self._item_type, workspace)
            revision_id = file.revision_id

        file_path = content_api.get_one_revision_filepath(revision_id)

        nb_page = 0
        enable_pdf_buttons = False  # type: bool
        preview_urls = []

        try:
            nb_page = preview_manager.get_page_nb(file_path=file_path)
            for page in range(int(nb_page)):
                url_str = '/previews/{}/pages/{}?revision_id={}'
                url = url_str.format(file_id, page, revision_id)
                preview_urls.append(url)

            enable_pdf_buttons = \
                preview_manager.has_pdf_preview(file_path=file_path)

        except PreviewGeneratorException as e:
            # INFO - A.P - Silently intercepts preview exception
            # As preview generation isn't mandatory, just register it
            logger.debug(self,
                         'Preview Generator Exception: {}'.format(e.__str__))
        except Exception as e:
            # INFO - D.A - 2017-08-11 - Make Tracim robust to pg exceptions
            # Preview generator may potentially raise any type of exception
            # so we prevent user interface crashes by catching all exceptions
            logger.error(
                self,
                'Preview Generator Generic Exception: {}'.format(e.__str__))

        pdf_available = 'true' if enable_pdf_buttons else 'false'  # type: str

        fake_api_breadcrumb = self.get_breadcrumb(file_id)
        fake_api_content = DictLikeClass(breadcrumb=fake_api_breadcrumb,
                                         current_user=current_user_content)
        fake_api = Context(CTX.FOLDER, current_user=user)\
            .toDict(fake_api_content)

        dictified_file = Context(self._get_one_context,
                                 current_user=user).toDict(file, 'file')
        result = DictLikeClass(result=dictified_file,
                               fake_api=fake_api,
                               nb_page=nb_page,
                               url=preview_urls,
                               pdf_available=pdf_available)
        return result
Exemple #9
0
def test_page_number__no_extension() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(file_path=HTML_FILE_PATH_NO_EXTENSION)
    assert page_number == 7
Exemple #10
0
def test_page_number() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(file_path=HTML_FILE_PATH)
    assert page_number == 7
Exemple #11
0
def test_get_nb_page_no_extension() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=ODT_FILE_PATH_NO_EXTENSION, file_ext=ODT_FILE_EXT)
    assert nb_page == 2
Exemple #12
0
def test_get_nb_page() -> None:
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    nb_page = manager.get_page_nb(file_path=ODP_FILE_PATH)
    assert nb_page == 5
def test_page_number():
    manager = PreviewManager(cache_folder_path=CACHE_DIR, create_folder=True)
    page_number = manager.get_page_nb(
        file_path=IMAGE_FILE_PATH,
    )
    assert page_number == 1