def generate_pdf(kanji_code, kanji): try: file_handle = page = file_handle[0] page.clean_contents() image_path = os.path.join( settings.BASE_DIR, 'kanjis/kanjivg/kanji/{}.svg'.format(kanji_code)) # x0, y0, x1, y1 - img_rect = fitz.Rect(page.rect.width - 67, 28, page.rect.width - 20, 82) png_path = os.path.join(settings.BASE_DIR, 'kanjis/pdf/tmp/{}.png'.format(kanji_code)) svg2png(url=image_path, write_to=png_path) page.insertImage(img_rect, filename=png_path) page.insertText((page.rect.width - 62, 18), 'JLPT' + str(kanji.jlpt) or '?') text_writer = fitz.TextWriter( (0, 0, page.rect.width - 70, page.rect.height)) text_writer.append(fitz.Point(20, 38), 'Kun: ' + ', '.join(kanji.kun_readings)) text_writer.append(fitz.Point(20, 53), 'On: ' + ', '.join(kanji.on_readings)) text_writer.append(fitz.Point(20, 68), 'Meanings: ' + ', '.join(kanji.meanings)) text_writer.writeText(page)'{}/{}.pdf'.format(settings.MEDIA_ROOT, kanji_code), deflate=True) os.remove(png_path) except Exception as e: raise e
def append_TOC(existingDoc, newDoc, entryname, filename, startPoint, path): # Appends the new page to the existing document that contains the table of contents existingDoc.insert_pdf(newDoc) TOC_page = existingDoc.load_page(page_id=0) # Generates the text entry for the new page TOC_page.insertText(startPoint, entryname, fontname="helv", fontsize=16, rotate=0) x_distance = (fitz.getTextlength(entryname, fontname="helv", fontsize=16)) + 105 targetPageNumber = existingDoc.page_count entrynumber = " %i" % targetPageNumber while(x_distance < 475): dotLocation = fitz.Point(x_distance, startPoint.y) TOC_page.insertText(dotLocation, ".", fontname="helv", fontsize=16, rotate=0) x_distance = x_distance + 5 TOC_page.insertText(dotLocation, entrynumber, fontname="helv", fontsize=16, rotate=0) # Creates the hyperlink for the newly appended page # When the entry is clicked on in the Table of Contents, user is sent to that particular page linkRect = Rect(100, startPoint.y-20, x_distance + 25, startPoint.y + 15) newLink = TOC_page.insert_link({'kind': 1, 'from': linkRect, 'type': 'goto', 'page': targetPageNumber-1, 'to': fitz.Point(0, 0), 'zoom': 0.0}) # Inserts the page number on the bottom of the newly appended page insertedPage = existingDoc.load_page(page_id=-1) pageNumberPoint = fitz.Point(294, 830) insertPageNumber = "%i" % targetPageNumber insertedPage.insertText(pageNumberPoint, insertPageNumber, fontname="Times-Roman", fontsize=14, rotate=0) # Calculates the new start point for the next entry and saves the pdf newStartPoint = fitz.Point(100, startPoint.y + 35) + filename) return newStartPoint
def download_and_sign_latex_doc(self, repo_url, main_tex="main.tex", is_contract=False, options={}): """clones a repo, renders and signs a pdf latex document""" new_main_tex = "main2.tex" watermark = "Document generated for: " + clone = F'git clone {repo_url}' rev_parse = 'git rev-parse master' with tempfile.TemporaryDirectory() as tmpdir: try: timestamp = str(time.time()) subprocess.check_output(clone, shell=True, cwd=tmpdir) repo_name = os.listdir(tmpdir)[0] file_tittle = repo_name.strip(" ") + ".pdf" filesdir = os.path.join(tmpdir, repo_name) if options != {}: # if there are special conditions to render # modify the original template: template = latex_jinja_env.get_template( os.path.join(filesdir, main_tex) ) renderer_template = template.render(**options) with open(os.path.join(filesdir, new_main_tex), "w") as f: # saves tex_code to outpout file f.write(renderer_template) else: new_main_tex = main_tex file_full_path = os.path.join( filesdir, f"{new_main_tex.split('.')[0]}.pdf") run_git_rev_parse = subprocess.check_output(rev_parse, shell=True, cwd=filesdir) complete_hash = get_hash([timestamp,], [run_git_rev_parse.decode('UTF-8')]) F"texliveonfly --compiler=latexmk --arguments='-interaction=nonstopmode -pdf' -f {new_main_tex}", shell=True, cwd=filesdir ) if not is_contract: pointa = fitz.Point(conf.AXIS_X, conf.AXIS_Y) pointb = fitz.Point(conf.AXIS_X_LOWER, conf.AXIS_Y) document = for page in document: page.insertText(pointa, text=watermark, fontsize=conf.WATERMARK_SIZE, fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION) page.insertText(pointb, text="DocId: " + complete_hash, fontsize=conf.WATERMARK_SIZE, fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION), incremental=1) document.close() pdffile = open(file_full_path, 'rb').read() return pdffile, complete_hash, file_tittle except IOError as e:'IOError' + str(e)) return None, None, None except Exception as e:"other error" + str(e)) return None, None, None
def generate_content_page(header_to_pagenumber, headers_and_subheaders, page_height, page_width): """ Generates a document that serves as a Table of Contents, with header and subheader information. """ doc = page = doc.newPage(height=page_height, width=page_width) horizontal_start_point = 40 vertical_start_point = 60 spacing = 15 num_lines = 1 tab = 30 # Add Table of Contents heading (centered) rect_topleft = fitz.Point(0, vertical_start_point + num_lines * spacing) num_lines += 4 rect_bottomright = fitz.Point(page_width, vertical_start_point + num_lines * spacing) rect = fitz.Rect(rect_topleft, rect_bottomright) page.insertTextbox(rect, "Table of Contents", fontsize=32, align=fitz.TEXT_ALIGN_CENTER) num_lines += 2 # Create a TextWriter (per page) wr = fitz.TextWriter(page.rect) for h1_item, h2_items in headers_and_subheaders.items(): # Insert the h1_item p = fitz.Point( horizontal_start_point, vertical_start_point + num_lines * spacing ) wr.append(p, h1_item, fontsize=24, font=fitz.Font("Arial")) num_lines += 2 for h2_item in h2_items: # Insert each h2_item p_tab = fitz.Point( tab + horizontal_start_point, vertical_start_point + num_lines * spacing ) wr.append(p_tab, h2_item, fontsize=16) # Insert ... between h2_item and page number p_tab_number = fitz.Point( tab + horizontal_start_point + 500, vertical_start_point + num_lines * spacing, ) add_dot_connector(wr, wr.lastPoint, p_tab_number) # Insert page number for h2_item wr.append(p_tab_number, str(header_to_pagenumber[h2_item]), fontsize=16) num_lines += 1 # Move to new page if nearing end of page if num_lines >= 45: wr.writeText(page) page = doc.newPage(height=page_height, width=page_width) wr = fitz.TextWriter(page.rect) num_lines = 0 num_lines += 2 wr.writeText(page) return doc
def thematic_break(self, node, entering): # attrs = self.attrs(node) pntFrom = fitz.Point(self.insertPoint.x, self.insertPoint.y - lineheight / 2) pntTo = fitz.Point(width - margin, pntFrom.y) shape = self.currentPage.newShape() shape.drawLine(pntFrom, pntTo) shape.finish() shape.commit()"")
def test_rect(): assert tuple(fitz.Rect()) == (0, 0, 0, 0) p1 = fitz.Point(10, 20) p2 = fitz.Point(100, 200) p3 = fitz.Point(150, 250) r = fitz.Rect(10, 20, 100, 200) r_tuple = tuple(r) assert tuple(fitz.Rect(p1, p2)) == r_tuple assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple assert tuple(fitz.Rect(10, 20, p2)) == r_tuple assert tuple(r.include_point(p3)) == (10, 20, 150, 250) r = fitz.Rect(10, 20, 100, 200) assert tuple(r.include_rect((100, 200, 110, 220))) == (10, 20, 110, 220) r = fitz.Rect(10, 20, 100, 200) # include empty rect makes no change assert tuple(r.include_rect((0, 0, 0, 0))) == r_tuple # include invalid rect makes no change assert tuple(r.include_rect((1, 1, -1, -1))) == r_tuple r = fitz.Rect() for i in range(4): r[i] = i + 1 assert r == fitz.Rect(1, 2, 3, 4) assert fitz.Rect() / 5 == fitz.Rect() assert fitz.Rect(1, 1, 2, 2) / fitz.Identity == fitz.Rect(1, 1, 2, 2) failed = False try: r = fitz.Rect(1) except: failed = True assert failed failed = False try: r = fitz.Rect(1, 2, 3, 4, 5) except: failed = True assert failed failed = False try: r = fitz.Rect((1, 2, 3, 4, 5)) except: failed = True assert failed failed = False try: r = fitz.Rect(1, 2, 3, "x") except: failed = True assert failed failed = False try: r = fitz.Rect() r[5] = 1 except: failed = True assert failed
def generate_content_page(header_to_pagenumber, headers_and_subheaders, page_height, page_width): doc = page = doc.newPage(height=page_height, width=page_width) horizontal_start_point = 40 vertical_start_point = 60 spacing = 15 num_lines = 1 tab = 30 p = fitz.Point(horizontal_start_point + 250, vertical_start_point + num_lines * spacing) page.insertText(p, "Table of Contents", fontname="helv", fontsize=32) num_lines += 4 # Create a TextWriter (per page) wr = fitz.TextWriter(page.rect) for h1_item, h2_items in headers_and_subheaders.items(): # Insert the h1_item p = fitz.Point(horizontal_start_point, vertical_start_point + num_lines * spacing) wr.append(p, h1_item, fontsize=24) num_lines += 2 for h2_item in h2_items: # Insert each h2_item p_tab = fitz.Point(tab + horizontal_start_point, vertical_start_point + num_lines * spacing) wr.append(p_tab, h2_item, fontsize=16) # Insert ... between h2_item and page number p_tab_number = fitz.Point( tab + horizontal_start_point + 500, vertical_start_point + num_lines * spacing, ) add_dot_connector(wr, wr.lastPoint, p_tab_number) # Insert page number for h2_item wr.append(p_tab_number, str(header_to_pagenumber[h2_item]), fontsize=16) num_lines += 1 # Move to new page if nearing end of page if num_lines >= 45: wr.writeText(page) page = doc.newPage(height=page_height, width=page_width) wr = fitz.TextWriter(page.rect) num_lines = 0 num_lines += 2 wr.writeText(page) return doc
def test_algebra(): p = fitz.Point(1, 2) m = fitz.Matrix(1, 2, 3, 4, 5, 6) r = fitz.Rect(1, 1, 2, 2) assert p + p == p * 2 assert p - p == fitz.Point() assert m + m == m * 2 assert m - m == fitz.Matrix() assert r + r == r * 2 assert r - r == fitz.Rect() assert p + 5 == fitz.Point(6, 7) assert m + 5 == fitz.Matrix(6, 7, 8, 9, 10, 11) assert in r assert not in r assert not in r assert not in r assert p * m == fitz.Point(12, 16) assert r * m == fitz.Rect(9, 12, 13, 18) assert (fitz.Rect(1, 1, 2, 2) & fitz.Rect(2, 2, 3, 3)).is_empty assert not fitz.Rect(1, 1, 2, 2).intersects((2, 2, 4, 4)) failed = False try: x = m + p except: failed = True assert failed failed = False try: x = m + r except: failed = True assert failed failed = False try: x = p + r except: failed = True assert failed failed = False try: x = r + m except: failed = True assert failed failed = False try: m in r except: failed = True assert failed
def __init__(self, raw: dict = {}): # convert start/end point to real page CS self._start = fitz.Point(raw.get('start', (0.0, 0.0))) * Stroke.ROTATION_MATRIX self._end = fitz.Point(raw.get('end', (0.0, 0.0))) * Stroke.ROTATION_MATRIX # width, color self.width = raw.get('width', 0.0) self.color = raw.get('color', 0) self._type = RectType.UNDEFINED # no type by default # update bbox self.update(self._to_rect())
def add_dot_connector(wr, start, end): """ Adds ... between a startpoint and endpoint. Uses a workaround to suppress unnecessary pymupdf warnings about text overflow. Credits for workaround: """ sys.stdout = open(os.devnull, "w") dot_connector = "." * 200 rect_topleft = fitz.Point(start.x, start.y - 15) rect_bottomright = fitz.Point(end.x, end.y + 10) rect = fitz.Rect(rect_topleft, rect_bottomright) wr.fillTextbox(rect, dot_connector) sys.stdout = sys.__stdout__
def __init__(self, raw:dict=None): raw = raw or {} # NOTE: real page CS self._start = fitz.Point(raw.get('start', (0.0, 0.0))) self._end = fitz.Point(raw.get('end', (0.0, 0.0))) if self._start.x > self._end.x or self._start.y > self._end.y: self._start, self._end = self._end, self._start # width, color super().__init__(raw) # type, color self.width = raw.get('width', 0.0) # update bbox super().update_bbox(self._to_rect())
def mock_pdf(tmpdir_factory): doc = page = doc.new_page() page.insert_text(fitz.Point(50, 100), "I am a jedi!", fontsize=20) page = doc.new_page() page.insert_text(fitz.Point(50, 100), "No, I am your father.", fontsize=20) # Save the PDF fn = tmpdir_factory.mktemp("data").join("mock_pdf_file.pdf") with open(fn, 'wb') as f: return str(fn)
def add_notes(page, notes): for note in notes: # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when # compared to the pdf library's axis. So the Y position should be flipped page_lr = page.rect corner = pdf.Point(note.x, page_lr.y1 - note.y) # Get the date if == None: date = pdf.getPDFnow() else: try: date ="D:%Y%m%d%H%M%SZ00'00") except: date = pdf.getPDFnow() # Make the note anot = page.addTextAnnot(corner, note.content) color = anot.colors color["fill_color"] = note.color color["stroke_color"] = note.color anot.setColors(color) anot.setOpacity(1) info = info["title"] = info["content"] = note.content info["subject"] = note.content.partition("\n")[0] info["creationDate"] = date anot.setInfo(info) anot.update()
def insert_text_output_pdf_fitz(pdf_file_path, insert_text): """ 既存のPDFファイルに文字を挿入し、別名で出力します :param pdf_file_path: 既存のPDFファイルパス :param insert_text: 挿入するテキスト :return: """ import fitz # 既存PDFの読み取り reader = # 新規PDFの作成 writer = # 既存PDFの1ページ目を新規PDFに流し込む writer.insertPDF(reader, from_page=0, to_page=0) # 既存PDFの1ページを読み込む page = writer.loadPage(0) # 挿入位置(mmをptsに変えて指定) target_x, target_y = mm_to_pts(10), mm_to_pts(10) p = fitz.Point(target_x, target_y) # start point of 1st line rc = page.insertText( p, # bottom-left of 1st char insert_text, # the text (honors '\n') fontname="helv", # the default font fontsize=16, # the default font size rotate=0, # also available: 90, 180, 270 ) # 出力名 output_name = "PyMuPDF.pdf"
def hover_annot(self): ex, ey, page_index = self.get_cursor_absolute_position() page = self.document[page_index] annot = page.firstAnnot if not annot: return None annots = [] while annot: annots.append(annot) annot = for annot in annots: if fitz.Point(ex, ey) in annot.rect: self.is_hover_annot = True annot.setOpacity(0.5) self.message_to_emacs.emit("[d]Delete Annot [e]Edit Annot") else: annot.setOpacity(1) # restore annot self.is_hover_annot = False annot.update() self.page_cache_pixmap_dict.clear() self.update() return page, annot
def test_paper_size(pdf_document, paper_size_coordinates): """ Test paper size for all pages in document There is no stated requirement about paper size in the format manual, but these tests assume US letter is required. """ page_number = 1 for page in pdf_document: # this is just a warning for now; depending on use cases, it could # become an error in the future if page.CropBox != page.MediaBox: message = (f"Page {page_number} MediaBox does not match CropBox, " "so it may appear differently when printed") warnings.warn(message, UserWarning) # this is just a warning for now; depending on use cases, it could # become an error in the future if page.CropBoxPosition != fitz.Point(0, 0): warnings.warn(f"Page {page_number} is displaced", UserWarning) # this is just a warning for now; depending on use cases, it could # become an error in the future if page.rotation != 0: warnings.warn(f"Page {page_number} is rotated", UserWarning) obj = pdfcheck.core.Page(page=page) assert (obj.bounding_box == paper_size_coordinates ), f"Page {page_number} has wrong size" page_number += 1
def hover_annot(self): ex, ey, page_index = self.get_cursor_absolute_position() page = self.document[page_index] annot = page.firstAnnot if not annot: return None, None annots = [] while annot: annots.append(annot) annot = is_hover_annot = False current_annot = None for annot in annots: if fitz.Point(ex, ey) in annot.rect: # self.buffer.message_to_emacs.emit(["content"]) is_hover_annot = True current_annot = annot opacity = 0.5 self.buffer.message_to_emacs.emit( "[d]Delete Annot [e]Edit Annot") else: opacity = 1.0 if opacity != annot.opacity: annot.setOpacity(opacity) annot.update() # update only if changed if is_hover_annot != self.is_hover_annot: self.is_hover_annot = is_hover_annot self.page_cache_pixmap_dict.clear() self.update() return page, current_annot
def replace_text_in_pdf(self): """ Replaces the text matches in the PDF document with today's date. """ # Open the PDF file for editing document = # Iterate through each page of the PDF document for page in document: # _wrapContents is needed for fixing alignment issues with rect boxes in some cases where there is alignment issue page._wrapContents() # Gets the rect boxes which consists of the matching regex pattern text_date = self.search_pdf_for_text_match( page.getText("develop").split('\n')) for data in text_date: areas = page.searchFor(data) [page.addRedactAnnot(area) for area in areas] page.apply_redactions() # Define today's date as the variable to replace the deleted value # And set the PDF coordinates for placing the new value page = document[0] coordinates = fitz.Point(440, 58) todays_date = todays_date = f"""{todays_date.strftime("%B")} {}, {todays_date.year}""" # Set the new text in the PDF file update_text = page.insertText(coordinates, todays_date, fontname="helv", fontsize=16) # Save the updated PDF file"pdf_updated.pdf")
def test_quad(): r = fitz.Rect(10, 10, 20, 20) q = r.quad assert q.is_rectangular assert not q.is_empty assert q.is_convex q *= fitz.Matrix(1, 1).preshear(2, 3) assert not q.is_rectangular assert not q.is_empty assert q.is_convex assert not in q assert r not in q assert r.quad not in q failed = False try: q[5] = fitz.Point() except: failed = True assert failed failed = False try: q /= (1, 0, 1, 0, 1, 0) except: failed = True assert failed
def insert_text_output_pdf_fitz(pdf_file_path, target_coordinate, insert_text): """ 既存のPDFファイルに文字を挿入し、別名で出力します :param pdf_file_path: 既存のPDFファイルパス :param target_coordinate: テキストを挿入座標値(mm) :param insert_text: 挿入するテキスト :return: """ import fitz # read your existing PDF reader = writer = writer.insertPDF(reader, from_page=0, to_page=0) page = writer.loadPage(0) target_coordinate_arr = np.squeeze(np.asarray(target_coordinate)) target_x, target_y = target_coordinate_arr[0], target_coordinate_arr[1] p = fitz.Point(50, 10) # start point of 1st line rc = page.insertText(p, # bottom-left of 1st char insert_text, # the text (honors '\n') fontname="helv", # the default font fontsize=8, # the default font size rotate=0, # also available: 90, 180, 270 )"out.pdf")
def __init__(self, raw: dict = {}): # convert start/end point to real page CS self._start = fitz.Point(raw.get('start', (0.0, 0.0))) * Stroke.ROTATION_MATRIX self._end = fitz.Point(raw.get('end', (0.0, 0.0))) * Stroke.ROTATION_MATRIX if self._start.x > self._end.x or self._start.y > self._end.y: self._start, self._end = self._end, self._start # width, color super().__init__(raw) # type, color self.width = raw.get('width', 0.0) # update bbox super().update_bbox(self._to_rect())
def test_rectangles(): p1 = fitz.Point(10, 20) p2 = fitz.Point(100, 200) p3 = fitz.Point(150, 250) r = fitz.Rect(10, 20, 100, 200) r_tuple = tuple(r) assert tuple(fitz.Rect(p1, p2)) == r_tuple assert tuple(fitz.Rect(p1, 100, 200)) == r_tuple assert tuple(fitz.Rect(10, 20, p2)) == r_tuple assert tuple(r.includePoint(p3)) == (10, 20, 150, 250) r = fitz.Rect(10, 20, 100, 200) assert tuple(r.includeRect((100, 200, 110, 220))) == (10, 20, 110, 220) r = fitz.Rect(10, 20, 100, 200) # include empty rect makes no change assert tuple(r.includeRect((0, 0, 0, 0))) == r_tuple # include infinite rect delivers infinite rect assert tuple(r.includeRect((1, 1, -1, -1))) == (1, 1, -1, -1)
def download_render_sign_url_doc(self, pdf_url, timestamp_now, is_contract=False): """Downloads, renders and signs pdf document from an external url""" with tempfile.TemporaryDirectory() as tmpdir: try: file_full_path = os.path.join(tmpdir, pdf_url.split("/")[-1]) file_tittle = file_full_path.split(".")[0] watermark = "Document generated for: " + complete_hash = get_hash([timestamp_now,], [file_tittle]) req = requests.get(pdf_url) if req.status_code == 200: with open(file_full_path, 'wb') as mypdf: mypdf.write(req.content) if not req.content:"Error rendering the pdf external document") return None, None, None if not is_contract: pointa = fitz.Point(conf.AXIS_X, conf.AXIS_Y) pointb = fitz.Point(conf.AXIS_X_LOWER, conf.AXIS_Y) document = for page in document: page.insertText(pointa, text=watermark, fontsize=conf.WATERMARK_SIZE, fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION) page.insertText(pointb, text="DocId: " + complete_hash, fontsize=conf.WATERMARK_SIZE, fontname=conf.WATERMARK_FONT, rotate=conf.WATERMARK_ROTATION), incremental=1) document.close() pdffile = open(file_full_path, 'rb').read() return pdffile, complete_hash, file_tittle else:"[Error] download_render_url_doc:" f" couldnt download the pdf: {req.content}") return None, None, None except IOError as e:'pdf render IOError' + str(e)) return None, None, None except Exception as e:"other error pdf render " + str(e)) return None, None, None
def __init__(self, raw: dict = {}): # convert start/end point to real page CS self._start = fitz.Point(raw.get('start', (0.0, 0.0))) * Stroke.ROTATION_MATRIX self._end = fitz.Point(raw.get('end', (0.0, 0.0))) * Stroke.ROTATION_MATRIX assert self.horizontal or self.vertical, 'Supports horizontal or vertical Strokes only' if self._start.x > self._end.x or self._start.y > self._end.y: self._start, self._end = self._end, self._start # width, color self.width = raw.get('width', 0.0) self.color = raw.get('color', 0) self.type = RectType.UNDEFINED # no type by default # update bbox super().update(self._to_rect())
def pdf_add_emp_info(name='NAME', depno='DEPARTMENT', empid='1234', years='10'): appp_xy = fitz.Point(113, 81) # Applicant depno_xy = fitz.Point(113, 93) # Dept No. empid_xy = fitz.Point(363, 81) # Employee ID No. year_xy = fitz.Point(363, 93) # Length of Service fname = askopenfilename(title="Open PDF Authority to Deduct Form.", filetypes=(("PDF files", "*.pdf"), ("all files", "*.*"))) fname_empid = fname[:len(fname) - 4] + '_' + empid + '.pdf' doc = page = doc[0] page.insertText(appp_xy, name, rotate=0, fontsize=9, render_mode=0, overlay=True) page.insertText(depno_xy, depno, rotate=0, fontsize=9, render_mode=0, overlay=True) page.insertText(empid_xy, empid, rotate=0, fontsize=9, render_mode=0, overlay=True) page.insertText(year_xy, years, rotate=0, fontsize=9, render_mode=0, overlay=True) startfile(fname_empid) # open the signed PDF. return
def generateTOC(existingDoc, filename, path): generatedPage = existingDoc.newPage(pno=0) # Generates the Table of Contents Title tableOfContentsText = "-- Table of Contents --" TOC_textLength = fitz.getTextlength(tableOfContentsText) TOC_startPoint_X = ((595 / 2) - TOC_textLength) TOC_startPoint_Y = 85 TOC_startPoint = fitz.Point(TOC_startPoint_X, TOC_startPoint_Y) generatedPage.insertText(TOC_startPoint, tableOfContentsText, fontname="Times-Roman", color=(0, 0.35, 0.8), fontsize=24, rotate=0) # Inserts the page number at the bottom of the page. # Table of Contents will be page 1 pageNumberPoint = fitz.Point(294, 815) generatedPage.insertText(pageNumberPoint, "1", fontname="Times-Roman", fontsize=14, rotate=0) + filename) return existingDoc
def document(self, node, entering): if entering: self.indent = 0 self.insertPoint = fitz.Point(margin, margin + lineheight) self.linkDestination = None self.linkRects = [] self.currentPage = self.doc.newPage(-1, width, height) style.push(fontname=font.TIMES, fontsize=10, indent=0) else: self.finishPage() style.pop() # We should be done anyway
def update_bbox(self, rect): '''Update stroke bbox (related to real page CS): - rect.area==0: start/end points - rect.area!=0: update bbox directly ''' rect = fitz.Rect(rect) # an empty area line if rect.getArea() == 0.0: self._start = fitz.Point(rect[0:2]) self._end = fitz.Point(rect[2:]) super().update_bbox(self._to_rect()) # a rect else: super().update_bbox(rect) # horizontal stroke if rect.width >= rect.height: y = (rect.y0 + rect.y1) / 2.0 self._start = fitz.Point(rect.x0, y) self._end = fitz.Point(rect.x1, y) # vertical stroke else: x = (rect.x0 + rect.x1) / 2.0 self._start = fitz.Point(x, rect.y0) self._end = fitz.Point(x, rect.y1) return self
def add_highlights(page, highlights): for high in highlights: # Create the location, note that the Mendeley coordinate system seems to have its Y axis inverted when # compared to the pdf library's axis. So the Y position should be flipped page_lr = page.rect ll = pdf.Point(high.x[0], page_lr.y1 - high.y[0]) ul = pdf.Point(high.x[0], page_lr.y1 - high.y[1]) ur = pdf.Point(high.x[1], page_lr.y1 - high.y[1]) lr = pdf.Point(high.x[1], page_lr.y1 - high.y[0]) points = pdf.Quad(ul, ur, ll, lr) # Create the highlight anot = page.addHighlightAnnot(points) # Update the color color = anot.colors color["fill_color"] = high.color color["stroke_color"] = high.color anot.setColors(color) anot.update()
def convert2(input_file): import fitz # pdf = labels = {} for no in range(len(pdf)): i = 0 paragraphs = pdf.loadPage(no).getTextBlocks() for image in pdf.getPageImageList(no): xref = image[0] pix = fitz.Pixmap(pdf, xref) if pix.n > 4: # CMYK vs GRAY or RGB pix = fitz.Pixmap(fitz.csRGB, pix) file = "p%s-i%s.png" % (no, xref) pix.writePNG(file) pix = None img = width, height = img.size if width > height: # Landscape, have to rotate -90 width, height = height, width img = img.rotate(270, expand=True), quality=100, subsampling=0) if height == 2200: # USPS Summary Page continue if height < 1801 and width < 1201: labels[file] = paragraphs[i * 3 + 2][4] if width == 762 and height == 1200: labels[file] = "" img = None i += 1 doc = rect = fitz.Rect(0, 0, 280, 410) for label in labels: pix = fitz.Pixmap(label) page = doc.newPage(width=282, height=424) page.insertImage(rect, pixmap=pix) p1 = fitz.Point(12, page.rect.height - 6) shape = page.newShape() shape.insertText(p1, labels[label], fontsize=12) shape.commit() os.remove(label) fn = "%stmp%s%s.pdf" % ( os.sep, os.sep, str(tempfile.TemporaryFile().name).split(os.sep)[-1], ), garbage=4, deflate=1) return fn