Beispiel #1
0
def generate_pdf(kanji_code, kanji):
    try:
        file_handle = fitz.open(template)
        page = file_handle[0]
        page.clean_contents()
        image_path = os.path.join(
            settings.BASE_DIR,
            'kanjis/kanjivg/kanji/{}.svg'.format(kanji_code))
        # x0, y0, x1, y1 - https://pymupdf.readthedocs.io/en/latest/rect.html#rect
        img_rect = fitz.Rect(page.rect.width - 67, 28, page.rect.width - 20,
                             82)

        png_path = os.path.join(settings.BASE_DIR,
                                'kanjis/pdf/tmp/{}.png'.format(kanji_code))
        svg2png(url=image_path, write_to=png_path)
        page.insertImage(img_rect, filename=png_path)
        page.insertText((page.rect.width - 62, 18), 'JLPT' + str(kanji.jlpt)
                        or '?')
        text_writer = fitz.TextWriter(
            (0, 0, page.rect.width - 70, page.rect.height))
        text_writer.append(fitz.Point(20, 38),
                           'Kun: ' + ', '.join(kanji.kun_readings))
        text_writer.append(fitz.Point(20, 53),
                           'On: ' + ', '.join(kanji.on_readings))
        text_writer.append(fitz.Point(20, 68),
                           'Meanings: ' + ', '.join(kanji.meanings))
        text_writer.writeText(page)
        file_handle.save('{}/{}.pdf'.format(settings.MEDIA_ROOT, kanji_code),
                         deflate=True)
        os.remove(png_path)
    except Exception as e:
        raise e
Beispiel #2
0
def append_TOC(existingDoc, newDoc, entryname, filename, startPoint, path):

    # Appends the new page to the existing document that contains the table of contents
    existingDoc.insert_pdf(newDoc)
    TOC_page = existingDoc.load_page(page_id=0)

    # Generates the text entry for the new page
    TOC_page.insertText(startPoint, entryname, fontname="helv", fontsize=16, rotate=0)
    x_distance = (fitz.getTextlength(entryname, fontname="helv", fontsize=16)) + 105
    targetPageNumber = existingDoc.page_count
    entrynumber = "   %i" % targetPageNumber
    while(x_distance < 475):
        dotLocation = fitz.Point(x_distance, startPoint.y)
        TOC_page.insertText(dotLocation, ".", fontname="helv", fontsize=16, rotate=0)
        x_distance = x_distance + 5
    TOC_page.insertText(dotLocation, entrynumber, fontname="helv", fontsize=16, rotate=0)

    # Creates the hyperlink for the newly appended page
    # When the entry is clicked on in the Table of Contents, user is sent to that particular page
    linkRect = Rect(100, startPoint.y-20, x_distance + 25, startPoint.y + 15)
    newLink = TOC_page.insert_link({'kind': 1, 'from': linkRect, 'type': 'goto', 'page': targetPageNumber-1, 'to': fitz.Point(0, 0), 'zoom': 0.0})

    # Inserts the page number on the bottom of the newly appended page
    insertedPage = existingDoc.load_page(page_id=-1)
    pageNumberPoint = fitz.Point(294, 830)
    insertPageNumber = "%i" % targetPageNumber
    insertedPage.insertText(pageNumberPoint, insertPageNumber, fontname="Times-Roman", fontsize=14, rotate=0)

    # Calculates the new start point for the next entry and saves the pdf
    newStartPoint = fitz.Point(100, startPoint.y + 35)
    existingDoc.save(path + filename)

    return newStartPoint
Beispiel #3
0
    def download_and_sign_latex_doc(self, repo_url, main_tex="main.tex",
                                    is_contract=False, options={}):
        """clones a repo, renders and signs a pdf latex document"""
        new_main_tex = "main2.tex"
        watermark = "Document generated for: " + self.signer_user.email

        clone = F'git clone {repo_url}'
        rev_parse = 'git rev-parse master'

        with tempfile.TemporaryDirectory() as tmpdir:
            try:
                timestamp = str(time.time())
                subprocess.check_output(clone, shell=True, cwd=tmpdir)
                repo_name = os.listdir(tmpdir)[0]
                file_tittle = repo_name.strip(" ") + ".pdf"
                filesdir = os.path.join(tmpdir, repo_name)
                if options != {}:  # if there are special conditions to render
                    # modify the original template:
                    template = latex_jinja_env.get_template(
                        os.path.join(filesdir, main_tex)
                    )
                    renderer_template = template.render(**options)
                    with open(os.path.join(filesdir, new_main_tex), "w") as f:  # saves tex_code to outpout file
                        f.write(renderer_template)
                else:
                    new_main_tex = main_tex

                file_full_path = os.path.join(
                    filesdir, f"{new_main_tex.split('.')[0]}.pdf")
                run_git_rev_parse = subprocess.check_output(rev_parse, shell=True, cwd=filesdir)
                complete_hash = get_hash([timestamp, self.signer_user.email], [run_git_rev_parse.decode('UTF-8')])
                subprocess.call(
                    F"texliveonfly --compiler=latexmk --arguments='-interaction=nonstopmode -pdf' -f {new_main_tex}",
                    shell=True,
                    cwd=filesdir
                )

                if not is_contract:
                    pointa = fitz.Point(conf.AXIS_X, conf.AXIS_Y)
                    pointb = fitz.Point(conf.AXIS_X_LOWER, conf.AXIS_Y)
                    document = fitz.open(file_full_path)
                    for page in document:
                        page.insertText(pointa, text=watermark, fontsize=conf.WATERMARK_SIZE, fontname=conf.WATERMARK_FONT,
                                        rotate=conf.WATERMARK_ROTATION)
                        page.insertText(pointb, text="DocId: " + complete_hash, fontsize=conf.WATERMARK_SIZE,
                                        fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION)
                    document.save(file_full_path, incremental=1)
                    document.close()

                pdffile = open(file_full_path, 'rb').read()
                return pdffile, complete_hash, file_tittle

            except IOError as e:
                logger.info('IOError' + str(e))
                return None, None, None
            except Exception as e:
                logger.info("other error" + str(e))
                return None, None, None
Beispiel #4
0
def generate_content_page(header_to_pagenumber, headers_and_subheaders, page_height, page_width):
    """
    Generates a document that serves as a Table of Contents, with header and subheader information.
    """
    doc = fitz.open()
    page = doc.newPage(height=page_height, width=page_width)
    horizontal_start_point = 40
    vertical_start_point = 60
    spacing = 15
    num_lines = 1
    tab = 30

    # Add Table of Contents heading (centered)
    rect_topleft = fitz.Point(0, vertical_start_point + num_lines * spacing)
    num_lines += 4
    rect_bottomright = fitz.Point(page_width, vertical_start_point + num_lines * spacing)
    rect = fitz.Rect(rect_topleft, rect_bottomright)
    page.insertTextbox(rect, "Table of Contents", fontsize=32, align=fitz.TEXT_ALIGN_CENTER)
    num_lines += 2

    # Create a TextWriter (per page)
    wr = fitz.TextWriter(page.rect)
    for h1_item, h2_items in headers_and_subheaders.items():
        # Insert the h1_item
        p = fitz.Point(
            horizontal_start_point, vertical_start_point + num_lines * spacing
        )
        wr.append(p, h1_item, fontsize=24, font=fitz.Font("Arial"))
        num_lines += 2
        for h2_item in h2_items:
            # Insert each h2_item
            p_tab = fitz.Point(
                tab + horizontal_start_point, vertical_start_point + num_lines * spacing
            )
            wr.append(p_tab, h2_item, fontsize=16)

            # Insert ... between h2_item and page number
            p_tab_number = fitz.Point(
                tab + horizontal_start_point + 500,
                vertical_start_point + num_lines * spacing,
            )
            add_dot_connector(wr, wr.lastPoint, p_tab_number)

            # Insert page number for h2_item
            wr.append(p_tab_number, str(header_to_pagenumber[h2_item]), fontsize=16)
            num_lines += 1

            # Move to new page if nearing end of page
            if num_lines >= 45:
                wr.writeText(page)
                page = doc.newPage(height=page_height, width=page_width)
                wr = fitz.TextWriter(page.rect)
                num_lines = 0
        num_lines += 2

    wr.writeText(page)
    return doc
Beispiel #5
0
 def thematic_break(self, node, entering):
     # attrs = self.attrs(node)
     pntFrom = fitz.Point(self.insertPoint.x,
                          self.insertPoint.y - lineheight / 2)
     pntTo = fitz.Point(width - margin, pntFrom.y)
     shape = self.currentPage.newShape()
     shape.drawLine(pntFrom, pntTo)
     shape.finish()
     shape.commit()
     self.cr("")
Beispiel #6
0
def test_rect():
    assert tuple(fitz.Rect()) == (0, 0, 0, 0)
    p1 = fitz.Point(10, 20)
    p2 = fitz.Point(100, 200)
    p3 = fitz.Point(150, 250)
    r = fitz.Rect(10, 20, 100, 200)
    r_tuple = tuple(r)
    assert tuple(fitz.Rect(p1, p2)) == r_tuple
    assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple
    assert tuple(fitz.Rect(10, 20, p2)) == r_tuple
    assert tuple(r.include_point(p3)) == (10, 20, 150, 250)
    r = fitz.Rect(10, 20, 100, 200)
    assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220)
    r = fitz.Rect(10, 20, 100, 200)
    # include empty rect makes no change
    assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple
    # include invalid rect makes no change
    assert tuple(r.include_rect((1, 1, -1, -1))) == r_tuple
    r = fitz.Rect()
    for i in range(4):
        r[i] = i + 1
    assert r == fitz.Rect(1, 2, 3, 4)
    assert fitz.Rect() / 5 == fitz.Rect()
    assert fitz.Rect(1, 1, 2, 2) / fitz.Identity == fitz.Rect(1, 1, 2, 2)
    failed = False
    try:
        r = fitz.Rect(1)
    except:
        failed = True
    assert failed
    failed = False
    try:
        r = fitz.Rect(1, 2, 3, 4, 5)
    except:
        failed = True
    assert failed
    failed = False
    try:
        r = fitz.Rect((1, 2, 3, 4, 5))
    except:
        failed = True
    assert failed
    failed = False
    try:
        r = fitz.Rect(1, 2, 3, "x")
    except:
        failed = True
    assert failed
    failed = False
    try:
        r = fitz.Rect()
        r[5] = 1
    except:
        failed = True
    assert failed
Beispiel #7
0
def generate_content_page(header_to_pagenumber, headers_and_subheaders,
                          page_height, page_width):
    doc = fitz.open()
    page = doc.newPage(height=page_height, width=page_width)
    horizontal_start_point = 40
    vertical_start_point = 60
    spacing = 15
    num_lines = 1
    tab = 30
    p = fitz.Point(horizontal_start_point + 250,
                   vertical_start_point + num_lines * spacing)
    page.insertText(p, "Table of Contents", fontname="helv", fontsize=32)
    num_lines += 4

    # Create a TextWriter (per page)
    wr = fitz.TextWriter(page.rect)
    for h1_item, h2_items in headers_and_subheaders.items():
        # Insert the h1_item
        p = fitz.Point(horizontal_start_point,
                       vertical_start_point + num_lines * spacing)
        wr.append(p, h1_item, fontsize=24)
        num_lines += 2
        for h2_item in h2_items:
            # Insert each h2_item
            p_tab = fitz.Point(tab + horizontal_start_point,
                               vertical_start_point + num_lines * spacing)
            wr.append(p_tab, h2_item, fontsize=16)

            # Insert ... between h2_item and page number
            p_tab_number = fitz.Point(
                tab + horizontal_start_point + 500,
                vertical_start_point + num_lines * spacing,
            )
            add_dot_connector(wr, wr.lastPoint, p_tab_number)

            # Insert page number for h2_item
            wr.append(p_tab_number,
                      str(header_to_pagenumber[h2_item]),
                      fontsize=16)
            num_lines += 1

            # Move to new page if nearing end of page
            if num_lines >= 45:
                wr.writeText(page)
                page = doc.newPage(height=page_height, width=page_width)
                wr = fitz.TextWriter(page.rect)
                num_lines = 0
        num_lines += 2

    wr.writeText(page)
    return doc
Beispiel #8
0
def test_algebra():
    p = fitz.Point(1, 2)
    m = fitz.Matrix(1, 2, 3, 4, 5, 6)
    r = fitz.Rect(1, 1, 2, 2)
    assert p + p == p * 2
    assert p - p == fitz.Point()
    assert m + m == m * 2
    assert m - m == fitz.Matrix()
    assert r + r == r * 2
    assert r - r == fitz.Rect()
    assert p + 5 == fitz.Point(6, 7)
    assert m + 5 == fitz.Matrix(6, 7, 8, 9, 10, 11)
    assert r.tl in r
    assert r.tr not in r
    assert r.br not in r
    assert r.bl not in r
    assert p * m == fitz.Point(12, 16)
    assert r * m == fitz.Rect(9, 12, 13, 18)
    assert (fitz.Rect(1, 1, 2, 2) & fitz.Rect(2, 2, 3, 3)).is_empty
    assert not fitz.Rect(1, 1, 2, 2).intersects((2, 2, 4, 4))
    failed = False
    try:
        x = m + p
    except:
        failed = True
    assert failed
    failed = False
    try:
        x = m + r
    except:
        failed = True
    assert failed
    failed = False
    try:
        x = p + r
    except:
        failed = True
    assert failed
    failed = False
    try:
        x = r + m
    except:
        failed = True
    assert failed
    failed = False
    try:
        m in r
    except:
        failed = True
    assert failed
Beispiel #9
0
    def __init__(self, raw: dict = {}):
        # convert start/end point to real page CS
        self._start = fitz.Point(raw.get('start',
                                         (0.0, 0.0))) * Stroke.ROTATION_MATRIX
        self._end = fitz.Point(raw.get('end',
                                       (0.0, 0.0))) * Stroke.ROTATION_MATRIX

        # width, color
        self.width = raw.get('width', 0.0)
        self.color = raw.get('color', 0)
        self._type = RectType.UNDEFINED  # no type by default

        # update bbox
        self.update(self._to_rect())
Beispiel #10
0
def add_dot_connector(wr, start, end):
    """
    Adds ... between a startpoint and endpoint. Uses a workaround to suppress unnecessary pymupdf warnings about text
    overflow. Credits for workaround: https://stackoverflow.com/a/8447352
    """
    sys.stdout = open(os.devnull, "w")

    dot_connector = "." * 200
    rect_topleft = fitz.Point(start.x, start.y - 15)
    rect_bottomright = fitz.Point(end.x, end.y + 10)
    rect = fitz.Rect(rect_topleft, rect_bottomright)
    wr.fillTextbox(rect, dot_connector)

    sys.stdout = sys.__stdout__
Beispiel #11
0
    def __init__(self, raw:dict=None):
        raw = raw or {}
        # NOTE: real page CS
        self._start = fitz.Point(raw.get('start', (0.0, 0.0)))
        self._end = fitz.Point(raw.get('end', (0.0, 0.0)))

        if self._start.x > self._end.x or self._start.y > self._end.y:
            self._start, self._end = self._end, self._start

        # width, color
        super().__init__(raw) # type, color
        self.width = raw.get('width', 0.0)

        # update bbox
        super().update_bbox(self._to_rect())
Beispiel #12
0
def mock_pdf(tmpdir_factory):

    doc = fitz.open()

    page = doc.new_page()
    page.insert_text(fitz.Point(50, 100), "I am a jedi!", fontsize=20)
    page = doc.new_page()
    page.insert_text(fitz.Point(50, 100), "No, I am your father.", fontsize=20)

    # Save the PDF
    fn = tmpdir_factory.mktemp("data").join("mock_pdf_file.pdf")
    with open(fn, 'wb') as f:
        doc.save(f)

    return str(fn)
def add_notes(page, notes):
    for note in notes:
        # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when
        # compared to the pdf library's axis. So the Y position should be flipped
        page_lr = page.rect
        corner = pdf.Point(note.x, page_lr.y1 - note.y)

        # Get the date
        if note.date == None:
            date = pdf.getPDFnow()
        else:
            try:
                date = note.date.strftime("D:%Y%m%d%H%M%SZ00'00")
            except:
                date = pdf.getPDFnow()

        # Make the note
        anot = page.addTextAnnot(corner, note.content)

        color = anot.colors
        color["fill_color"] = note.color
        color["stroke_color"] = note.color
        anot.setColors(color)

        anot.setOpacity(1)

        info = anot.info
        info["title"] = note.author
        info["content"] = note.content
        info["subject"] = note.content.partition("\n")[0]
        info["creationDate"] = date
        anot.setInfo(info)

        anot.update()
Beispiel #14
0
def insert_text_output_pdf_fitz(pdf_file_path, insert_text):
    """
    既存のPDFファイルに文字を挿入し、別名で出力します
    :param pdf_file_path:       既存のPDFファイルパス
    :param insert_text:         挿入するテキスト
    :return:
    """
    import fitz

    # 既存PDFの読み取り
    reader = fitz.open(pdf_file_path)
    # 新規PDFの作成
    writer = fitz.open()
    # 既存PDFの1ページ目を新規PDFに流し込む
    writer.insertPDF(reader, from_page=0, to_page=0)
    # 既存PDFの1ページを読み込む
    page = writer.loadPage(0)
    # 挿入位置(mmをptsに変えて指定)
    target_x, target_y = mm_to_pts(10), mm_to_pts(10)
    p = fitz.Point(target_x, target_y)  # start point of 1st line
    rc = page.insertText(
        p,  # bottom-left of 1st char
        insert_text,  # the text (honors '\n')
        fontname="helv",  # the default font
        fontsize=16,  # the default font size
        rotate=0,  # also available: 90, 180, 270
    )
    # 出力名
    output_name = "PyMuPDF.pdf"
    writer.save(output_name)
Beispiel #15
0
    def hover_annot(self):
        ex, ey, page_index = self.get_cursor_absolute_position()
        page = self.document[page_index]
        annot = page.firstAnnot
        if not annot:
            return None

        annots = []
        while annot:
            annots.append(annot)
            annot = annot.next

        for annot in annots:
            if fitz.Point(ex, ey) in annot.rect:
                self.is_hover_annot = True
                annot.setOpacity(0.5)
                self.message_to_emacs.emit("[d]Delete Annot [e]Edit Annot")
            else:
                annot.setOpacity(1)  # restore annot
                self.is_hover_annot = False
            annot.update()

        self.page_cache_pixmap_dict.clear()
        self.update()
        return page, annot
def test_paper_size(pdf_document, paper_size_coordinates):
    """
    Test paper size for all pages in document

    There is no stated requirement about paper size in the format manual, but
    these tests assume US letter is required.
    """
    page_number = 1
    for page in pdf_document:

        # this is just a warning for now; depending on use cases, it could
        # become an error in the future
        if page.CropBox != page.MediaBox:
            message = (f"Page {page_number} MediaBox does not match CropBox, "
                       "so it may appear differently when printed")
            warnings.warn(message, UserWarning)

        # this is just a warning for now; depending on use cases, it could
        # become an error in the future
        if page.CropBoxPosition != fitz.Point(0, 0):
            warnings.warn(f"Page {page_number} is displaced", UserWarning)

        # this is just a warning for now; depending on use cases, it could
        # become an error in the future
        if page.rotation != 0:
            warnings.warn(f"Page {page_number} is rotated", UserWarning)

        obj = pdfcheck.core.Page(page=page)
        assert (obj.bounding_box == paper_size_coordinates
                ), f"Page {page_number} has wrong size"

        page_number += 1
Beispiel #17
0
    def hover_annot(self):
        ex, ey, page_index = self.get_cursor_absolute_position()
        page = self.document[page_index]
        annot = page.firstAnnot
        if not annot:
            return None, None

        annots = []
        while annot:
            annots.append(annot)
            annot = annot.next

        is_hover_annot = False
        current_annot = None
        for annot in annots:
            if fitz.Point(ex, ey) in annot.rect:
                # self.buffer.message_to_emacs.emit(annot.info["content"])
                is_hover_annot = True
                current_annot = annot
                opacity = 0.5
                self.buffer.message_to_emacs.emit(
                    "[d]Delete Annot [e]Edit Annot")
            else:
                opacity = 1.0
            if opacity != annot.opacity:
                annot.setOpacity(opacity)
                annot.update()

        # update only if changed
        if is_hover_annot != self.is_hover_annot:
            self.is_hover_annot = is_hover_annot
            self.page_cache_pixmap_dict.clear()
            self.update()

        return page, current_annot
Beispiel #18
0
    def replace_text_in_pdf(self):
        """
        Replaces the text matches in the PDF document
        with today's date.
        """
        # Open the PDF file for editing
        document = fitz.open(self.path)
        # Iterate through each page of the PDF document
        for page in document:
            # _wrapContents is needed for fixing alignment issues with rect boxes in some cases where there is alignment issue
            page._wrapContents()
            # Gets the rect boxes which consists of the matching regex pattern
            text_date = self.search_pdf_for_text_match(
                page.getText("develop").split('\n'))

            for data in text_date:
                areas = page.searchFor(data)
                [page.addRedactAnnot(area) for area in areas]
            page.apply_redactions()

        # Define today's date as the variable to replace the deleted value
        # And set the PDF coordinates for placing the new value
        page = document[0]
        coordinates = fitz.Point(440, 58)
        todays_date = date.today()
        todays_date = f"""{todays_date.strftime("%B")} {todays_date.day}, {todays_date.year}"""

        # Set the new text in the PDF file
        update_text = page.insertText(coordinates,
                                      todays_date,
                                      fontname="helv",
                                      fontsize=16)

        # Save the updated PDF file
        document.save("pdf_updated.pdf")
Beispiel #19
0
def test_quad():
    r = fitz.Rect(10, 10, 20, 20)
    q = r.quad
    assert q.is_rectangular
    assert not q.is_empty
    assert q.is_convex
    q *= fitz.Matrix(1, 1).preshear(2, 3)
    assert not q.is_rectangular
    assert not q.is_empty
    assert q.is_convex
    assert r.tl not in q
    assert r not in q
    assert r.quad not in q
    failed = False
    try:
        q[5] = fitz.Point()
    except:
        failed = True
    assert failed

    failed = False
    try:
        q /= (1, 0, 1, 0, 1, 0)
    except:
        failed = True
    assert failed
Beispiel #20
0
    def insert_text_output_pdf_fitz(pdf_file_path, target_coordinate, insert_text):
        """
        既存のPDFファイルに文字を挿入し、別名で出力します
        :param pdf_file_path:       既存のPDFファイルパス
        :param target_coordinate:   テキストを挿入座標値(mm)
        :param insert_text:         挿入するテキスト
        :return:
        """
        import fitz
        # read your existing PDF
        reader = fitz.open(pdf_file_path)
        writer = fitz.open()
        writer.insertPDF(reader, from_page=0, to_page=0)

        page = writer.loadPage(0)
        target_coordinate_arr = np.squeeze(np.asarray(target_coordinate))
        target_x, target_y = target_coordinate_arr[0], target_coordinate_arr[1]
        p = fitz.Point(50, 10)  # start point of 1st line
        rc = page.insertText(p,  # bottom-left of 1st char
                               insert_text,  # the text (honors '\n')
                               fontname="helv",  # the default font
                               fontsize=8,  # the default font size
                               rotate=0,  # also available: 90, 180, 270
                               )
        writer.save("out.pdf")
Beispiel #21
0
    def __init__(self, raw: dict = {}):
        # convert start/end point to real page CS
        self._start = fitz.Point(raw.get('start',
                                         (0.0, 0.0))) * Stroke.ROTATION_MATRIX
        self._end = fitz.Point(raw.get('end',
                                       (0.0, 0.0))) * Stroke.ROTATION_MATRIX

        if self._start.x > self._end.x or self._start.y > self._end.y:
            self._start, self._end = self._end, self._start

        # width, color
        super().__init__(raw)  # type, color
        self.width = raw.get('width', 0.0)

        # update bbox
        super().update_bbox(self._to_rect())
def test_rectangles():
    p1 = fitz.Point(10, 20)
    p2 = fitz.Point(100, 200)
    p3 = fitz.Point(150, 250)
    r = fitz.Rect(10, 20, 100, 200)
    r_tuple = tuple(r)
    assert tuple(fitz.Rect(p1, p2)) == r_tuple
    assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple
    assert tuple(fitz.Rect(10, 20, p2)) == r_tuple
    assert tuple(r.includePoint(p3)) == (10, 20, 150, 250)
    r = fitz.Rect(10, 20, 100, 200)
    assert tuple(r.includeRect((100, 200, 110, 220))) == (10, 20, 110, 220)
    r = fitz.Rect(10, 20, 100, 200)
    # include empty rect makes no change
    assert tuple(r.includeRect((0, 0, 0, 0))) == r_tuple
    # include infinite rect delivers infinite rect
    assert tuple(r.includeRect((1, 1, -1, -1))) == (1, 1, -1, -1)
Beispiel #23
0
    def download_render_sign_url_doc(self, pdf_url, timestamp_now, is_contract=False):
        """Downloads, renders and signs pdf document from an external url"""

        with tempfile.TemporaryDirectory() as tmpdir:
            try:
                file_full_path = os.path.join(tmpdir, pdf_url.split("/")[-1])
                file_tittle = file_full_path.split(".")[0]
                watermark = "Document generated for: " + self.signer_user.email
                complete_hash = get_hash([timestamp_now, self.signer_user.email], [file_tittle])

                req = requests.get(pdf_url)
                if req.status_code == 200:
                    with open(file_full_path, 'wb') as mypdf:
                        mypdf.write(req.content)

                    if not req.content:
                        logger.info("Error rendering the pdf external document")
                        return None, None, None

                    if not is_contract:
                        pointa = fitz.Point(conf.AXIS_X, conf.AXIS_Y)
                        pointb = fitz.Point(conf.AXIS_X_LOWER, conf.AXIS_Y)
                        document = fitz.open(file_full_path)
                        for page in document:
                            page.insertText(pointa, text=watermark, fontsize=conf.WATERMARK_SIZE,
                                            fontname=conf.WATERMARK_FONT,
                                            rotate=conf.WATERMARK_ROTATION)
                            page.insertText(pointb, text="DocId: " + complete_hash, fontsize=conf.WATERMARK_SIZE,
                                            fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION)
                        document.save(file_full_path, incremental=1)
                        document.close()

                    pdffile = open(file_full_path, 'rb').read()
                    return pdffile, complete_hash, file_tittle
                else:
                    logger.info(f"[Error] download_render_url_doc:"
                                f" couldnt download the pdf: {req.content}")
                    return None, None, None

            except IOError as e:
                logger.info('pdf render IOError' + str(e))
                return None, None, None
            except Exception as e:
                logger.info("other error pdf render " + str(e))
                return None, None, None
Beispiel #24
0
    def __init__(self, raw: dict = {}):
        # convert start/end point to real page CS
        self._start = fitz.Point(raw.get('start',
                                         (0.0, 0.0))) * Stroke.ROTATION_MATRIX
        self._end = fitz.Point(raw.get('end',
                                       (0.0, 0.0))) * Stroke.ROTATION_MATRIX
        assert self.horizontal or self.vertical, 'Supports horizontal or vertical Strokes only'

        if self._start.x > self._end.x or self._start.y > self._end.y:
            self._start, self._end = self._end, self._start

        # width, color
        self.width = raw.get('width', 0.0)
        self.color = raw.get('color', 0)
        self.type = RectType.UNDEFINED  # no type by default

        # update bbox
        super().update(self._to_rect())
def pdf_add_emp_info(name='NAME',
                     depno='DEPARTMENT',
                     empid='1234',
                     years='10'):

    appp_xy = fitz.Point(113, 81)  # Applicant
    depno_xy = fitz.Point(113, 93)  # Dept No.
    empid_xy = fitz.Point(363, 81)  # Employee ID No.
    year_xy = fitz.Point(363, 93)  # Length of Service

    fname = askopenfilename(title="Open PDF Authority to Deduct Form.",
                            filetypes=(("PDF files", "*.pdf"), ("all files",
                                                                "*.*")))
    fname_empid = fname[:len(fname) - 4] + '_' + empid + '.pdf'
    doc = fitz.open(fname)
    page = doc[0]
    page.insertText(appp_xy,
                    name,
                    rotate=0,
                    fontsize=9,
                    render_mode=0,
                    overlay=True)
    page.insertText(depno_xy,
                    depno,
                    rotate=0,
                    fontsize=9,
                    render_mode=0,
                    overlay=True)
    page.insertText(empid_xy,
                    empid,
                    rotate=0,
                    fontsize=9,
                    render_mode=0,
                    overlay=True)
    page.insertText(year_xy,
                    years,
                    rotate=0,
                    fontsize=9,
                    render_mode=0,
                    overlay=True)

    doc.save(fname_empid)
    startfile(fname_empid)  # open the signed PDF.
    return
Beispiel #26
0
def generateTOC(existingDoc, filename, path):
    generatedPage = existingDoc.newPage(pno=0)

    # Generates the Table of Contents Title
    tableOfContentsText = "-- Table of Contents --"
    TOC_textLength = fitz.getTextlength(tableOfContentsText)
    TOC_startPoint_X = ((595 / 2) - TOC_textLength)
    TOC_startPoint_Y = 85
    TOC_startPoint = fitz.Point(TOC_startPoint_X, TOC_startPoint_Y)
    generatedPage.insertText(TOC_startPoint, tableOfContentsText, fontname="Times-Roman", color=(0, 0.35, 0.8),
                             fontsize=24, rotate=0)

    # Inserts the page number at the bottom of the page.
    # Table of Contents will be page 1
    pageNumberPoint = fitz.Point(294, 815)
    generatedPage.insertText(pageNumberPoint, "1", fontname="Times-Roman", fontsize=14, rotate=0)
    existingDoc.save(path + filename)

    return existingDoc
Beispiel #27
0
    def document(self, node, entering):
        if entering:
            self.indent = 0
            self.insertPoint = fitz.Point(margin, margin + lineheight)
            self.linkDestination = None
            self.linkRects = []

            self.currentPage = self.doc.newPage(-1, width, height)
            style.push(fontname=font.TIMES, fontsize=10, indent=0)
        else:
            self.finishPage()
            style.pop()  # We should be done anyway
Beispiel #28
0
    def update_bbox(self, rect):
        '''Update stroke bbox (related to real page CS):
            - rect.area==0: start/end points
            - rect.area!=0: update bbox directly
        '''
        rect = fitz.Rect(rect)

        # an empty area line
        if rect.getArea() == 0.0:
            self._start = fitz.Point(rect[0:2])
            self._end = fitz.Point(rect[2:])
            super().update_bbox(self._to_rect())

        # a rect
        else:
            super().update_bbox(rect)

            # horizontal stroke
            if rect.width >= rect.height:
                y = (rect.y0 + rect.y1) / 2.0
                self._start = fitz.Point(rect.x0, y)
                self._end = fitz.Point(rect.x1, y)

            # vertical stroke
            else:
                x = (rect.x0 + rect.x1) / 2.0
                self._start = fitz.Point(x, rect.y0)
                self._end = fitz.Point(x, rect.y1)

        return self
def add_highlights(page, highlights):
    for high in highlights:
        # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when
        # compared to the pdf library's axis. So the Y position should be flipped
        page_lr = page.rect

        ll = pdf.Point(high.x[0], page_lr.y1 - high.y[0])
        ul = pdf.Point(high.x[0], page_lr.y1 - high.y[1])
        ur = pdf.Point(high.x[1], page_lr.y1 - high.y[1])
        lr = pdf.Point(high.x[1], page_lr.y1 - high.y[0])
        points = pdf.Quad(ul, ur, ll, lr)

        # Create the highlight
        anot = page.addHighlightAnnot(points)

        # Update the color
        color = anot.colors
        color["fill_color"] = high.color
        color["stroke_color"] = high.color
        anot.setColors(color)

        anot.update()
Beispiel #30
0
def convert2(input_file):
    import fitz  # https://pymupdf.readthedocs.io/en/latest/tutorial/

    pdf = fitz.open(input_file)
    labels = {}
    for no in range(len(pdf)):
        i = 0
        paragraphs = pdf.loadPage(no).getTextBlocks()
        for image in pdf.getPageImageList(no):
            xref = image[0]
            pix = fitz.Pixmap(pdf, xref)
            if pix.n > 4:  # CMYK vs GRAY or RGB
                pix = fitz.Pixmap(fitz.csRGB, pix)
            file = "p%s-i%s.png" % (no, xref)
            pix.writePNG(file)
            pix = None

            img = Image.open(file)
            width, height = img.size
            if width > height:  # Landscape, have to rotate -90
                width, height = height, width
                img = img.rotate(270, expand=True)
                img.save(file, quality=100, subsampling=0)
            if height == 2200:  # USPS Summary Page
                continue
            if height < 1801 and width < 1201:
                labels[file] = paragraphs[i * 3 + 2][4]
            if width == 762 and height == 1200:
                labels[file] = ""
            img = None
            i += 1

    doc = fitz.open()
    rect = fitz.Rect(0, 0, 280, 410)
    for label in labels:
        pix = fitz.Pixmap(label)
        page = doc.newPage(width=282, height=424)
        page.insertImage(rect, pixmap=pix)

        p1 = fitz.Point(12, page.rect.height - 6)
        shape = page.newShape()
        shape.insertText(p1, labels[label], fontsize=12)
        shape.commit()
        os.remove(label)
    fn = "%stmp%s%s.pdf" % (
        os.sep,
        os.sep,
        str(tempfile.TemporaryFile().name).split(os.sep)[-1],
    )
    doc.save(fn, garbage=4, deflate=1)
    return fn