def bbox(self): bbox = fitz.Rect() for instance in self._instances: bbox |= instance.bbox return bbox
def trans_pdf(file_name, path): t0 = time.time() cur_pdf = fitz.open(path) # 待翻译的pdf new_pdf = fitz.open() # 翻译完成后要写入的pdf new_docx = Document() # 翻译完成后要写入的docx new_docx.styles['Normal'].font.name = u'宋体' # 设置翻译完成后的字体 new_docx.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体') # 设置翻译完成后的字体 i = 0 # 定义页面数的递增 bytes_array = 0 try: for cur_page in cur_pdf: img_list = cur_page.getImageList() # 获取当前页面的图片对象 print(img_list) imgcount = 0 for img in img_list: # 获取当前页面的图像列表 pix_temp1 = fitz.Pixmap(cur_pdf, img[0]) if img[1]: pix_temp2 = fitz.Pixmap(cur_pdf, img[1]) pix_temp = fitz.Pixmap(pix_temp1) pix_temp.setAlpha(pix_temp2.samples) else: pix_temp = pix_temp1 print('当前页面的图像:::', pix_temp) imgcount += 1 new_name = "图片{}.png".format(imgcount) # 生成图片的名称 pix_temp.writeImage(os.path.join(settings.BASE_DIR, 'trans', 'output_file', new_name)) # bytes_array = pix_temp.getImageData('png')#可以不输出图片再写入新的pdf,通过byte # print(pix_temp.getImageData('png')) pix_temp = None # 释放资源 blks = cur_page.getTextBlocks(images=True) # read text blocks of input page new_page = new_pdf.newPage(-1, width=cur_page.MediaBoxSize[0], height=cur_page.MediaBoxSize[1]) # 创建一个新的页面与之前的页面相同大小 img = new_page.newShape() # prepare /Contents object disp = fitz.Rect(cur_page.CropBoxPosition, cur_page.CropBoxPosition) croprect = cur_page.rect + disp # img.drawRect(croprect)#画出整个页面的矩形 # img.finish(color=gray, fill=gray)#填充颜色 begin = (0, 0, 0, 0) # 记录初始值 end = (0, 0, 0, 0) # 记录终结值 flag = 0 # 记录当前的循 reference_flag = 0 # 判断是否在参考文献之后 blks.append((1, 2, 3, 6)) content = "" imgcount = 0 fonts = 9 for num in range(len(blks)): # loop through the blocks # 如果是本页面最后一个块,直接结束,因为最后一个是方便计算自己添加的。 if num == len(blks) - 1: break # 如果这个块里放的是图像. if blks[num][-1] == 1: print('图像:::',blks[num][4]) imgcount += 1 img_r = blks[num][:4] # 图片要放置位置的坐标 try: path_img = os.path.join(settings.BASE_DIR, 'trans', 'output_file', '图片{}.png'.format(imgcount)) # 当前页面第几个图片的位置 img = open(path_img, "rb").read() # 输入流 new_page.insertImage(img_r, stream=img, keep_proportion=True) # 输入到新的pdf页面对应位置 new_docx.add_picture(path_img, width=Inches(3)) # 设置图片保存的宽度 os.remove(path_img) # 输入到新的pdf之后就移除 except: pass continue # 跳过下面的插入翻译后文字的过程 # 设置默认字体大小以及位置 if i == 0: # 当前是第一页的话 if num == 0 or num == 1: fonts = 15 text_pos = fitz.TEXT_ALIGN_CENTER # 一般论文前面的标题,作者,机构名等要居中 elif num == 2: fonts = 10 text_pos = fitz.TEXT_ALIGN_CENTER # 一般论文前面的标题,作者,机构名等要居中 elif num == 3: fonts = 10 text_pos = fitz.TEXT_ALIGN_CENTER # 一般论文前面的标题,作者,机构名等要居中 else: fonts = 10 text_pos = fitz.TEXT_ALIGN_LEFT # 设置文字在当前矩阵中的位置靠左排列 else: fonts = 10 text_pos = fitz.TEXT_ALIGN_LEFT # 设置文字在当前矩阵中的位置靠左排列 # 目的为了记录起始块坐标 if num == 0: begin = blks[0][:4] content = blks[0][4].replace("\n", " ") # 矩形块,b[0]b[1]为左上角的坐标,b[2]b[3]为右下角的坐标 r = fitz.Rect(blks[num][:4]) # 如果不是倒数第一个块,则进入此循环 if num < len(blks) - 1: # 两个块y轴距离很近的话,这里以1.0为界,这里判断当前数的右下角的坐标y值 if (abs(blks[num + 1][1] - blks[num][3]) <= 1.0 and abs( blks[num + 1][1] - blks[num][3]) >= 0): # 当前块在参考文献之后 if reference_flag == 1: trans_pragraph = blks[num][4].replace("\n", " ") res = translate_func.bing_translate(trans_pragraph).replace(' ', '') new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=7, align=text_pos) # # 其它情况 else: flag = 1 # # 记录最后的矩形坐标,目的为了取出最后的右下角坐标点 end = blks[num + 1][:4] content += blks[num + 1][4].replace("\n", " ") # print('content::',content) # 两个块y轴距离远的的时候 else: if flag == 1: # img.drawRect(fitz.Rect(end[0],begin[1],end[2],end[3])) res = translate_func.bing_translate(content).replace(' ', '') # 翻译结果去掉汉字中的空格 new_docx.add_paragraph(res) # 添加到新的docx文档中 # print('content:',content) # print(res) # fitz.Rect(end[0],begin[1],end[2],end[3])为新扩展的矩形框坐标 if begin[2] > end[2]: # 如果起始点的右下角x坐标小于结束点的右下角x坐标 new_page.insertTextbox(fitz.Rect(end[0], begin[1], begin[2], end[3]), res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=fonts, align=text_pos) else: new_page.insertTextbox(fitz.Rect(end[0], begin[1], end[2], end[3]), res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=fonts, align=text_pos) flag = 0 else: # img.drawRect(r) trans_pragraph = blks[num][4].replace("\n", " ") # 将待翻译的句子换行换成空格 if is_figure(trans_pragraph.replace(' ','')): # 将该块的判断是否是图片标注 res = translate_func.bing_translate(trans_pragraph).replace(' ', '') # 翻译结果去掉汉字中的空格 new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=7, align=fitz.TEXT_ALIGN_CENTER) # 标记在这里之后的都是参考文献 elif is_reference(trans_pragraph.replace(' ','')): reference_flag = 1 new_page.insertTextbox(r, '参考文献', fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=fonts, align=text_pos) else: # 翻译结果去掉汉字中的空格 res = translate_func.bing_translate(trans_pragraph).replace(' ', '') # 添加到新的docx文档中 new_docx.add_paragraph(res) if reference_flag == 1: new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=7, align=text_pos) # else: new_page.insertTextbox(r, res, fontname="song", fontfile=os.path.join(settings.BASE_DIR, 'trans/static/fonts/SimSun.ttf'), fontsize=fonts, align=text_pos) # # 记录起始矩形坐标 begin = blks[num + 1][:4] try: content = blks[num + 1][4].replace("\n", " ") # print('content:::',content) except: pass #print('记录content失败!') # img.finish(width=0.3) # img.commit() i += 1 except:#如果整个过程出现异常,防止翻译工作丢失,直接保存到文件中结束翻译。 new_file_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name) # 翻译后的pdf保存路径 new_docx_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name[:-4] + '.docx') # 翻译后的docx保存路径 new_docx.save(new_docx_name) # 保存翻译后的docx new_pdf.save(new_file_name, garbage=4, deflate=True, clean=True) # 保存翻译后的pdf print('翻译过程出现异常') # 文件保存 new_file_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name) # 翻译后的pdf保存路径 new_docx_name = os.path.join(settings.BASE_DIR, 'trans', 'output_file', 'translated_' + file_name[:-4] + '.docx') # 翻译后的docx保存路径 new_docx.save(new_docx_name) # 保存翻译后的docx new_pdf.save(new_file_name, garbage=4, deflate=True, clean=True) # 保存翻译后的pdf t1 = time.time() print("Total translation time: %g sec" % (t1 - t0))
def run_remarks( input_dir, output_dir, targets=None, pdf_name=None, ann_type=None, combined_pdf=False, modified_pdf=False, ): for path in pathlib.Path(f"{input_dir}/").glob("*.metadata"): if not is_document(path): continue filetype = get_document_filetype(path) if filetype == 'pdf': pages = list_pages_uuids(path) name = get_visible_name(path) rm_files = list_ann_rm_files(path) if pdf_name and (pdf_name not in name): continue if not pages or not name or not rm_files or not len(rm_files): continue page_magnitude = math.floor(math.log10(len(pages))) + 1 in_device_path = get_ui_path(path) out_path = pathlib.Path(f"{output_dir}/{in_device_path}/{name}/") out_path.mkdir(parents=True, exist_ok=True) pdf_src = fitz.open(path.with_name(f"{path.stem}.pdf")) if modified_pdf: mod_pdf = fitz.open() print(f"Working on PDF file: {path.stem}") print(f'PDF visibleName: "{name}"') print(f"PDF in-device directory: {in_device_path}") for rm_file in rm_files: page_idx = pages.index(f"{rm_file.stem}") pdf_w, pdf_h = get_pdf_page_dims(path, page_idx=page_idx) scale = get_pdf_to_device_ratio(pdf_w, pdf_h) highlights, scribbles = parse_rm_file(rm_file) if ann_type == "highlights": parsed_data = highlights elif ann_type == "scribbles": parsed_data = scribbles else: # merge both annotation types parsed_data = { "layers": highlights["layers"] + scribbles["layers"] } if not parsed_data.get("layers"): continue parsed_data = rescale_parsed_data(parsed_data, scale) if "svg" in targets: svg_str = draw_svg(parsed_data) subdir = prepare_subdir(out_path, "svg") with open(f"{subdir}/{page_idx:0{page_magnitude}}.svg", "w") as f: f.write(svg_str) ann_doc = fitz.open() rm_w_rescaled, rm_h_scaled = get_rescaled_device_dims(scale) ann_page = ann_doc.newPage(width=rm_w_rescaled, height=rm_h_scaled) pdf_w_adj, pdf_h_adj = get_adjusted_pdf_dims( pdf_w, pdf_h, scale) pdf_rect = fitz.Rect(0, 0, pdf_w_adj, pdf_h_adj) ann_page.showPDFpage(pdf_rect, pdf_src, pno=page_idx) should_extract_text = ann_type != "scribbles" and highlights extractable = is_text_extractable(pdf_src[page_idx]) ocred = False if should_extract_text and not extractable and is_tool( "ocrmypdf"): print( f"Couldn't extract text from page #{page_idx}. Will OCR it. Hold on\n" ) tmp_file = "_tmp.pdf" ann_doc.save(tmp_file) ann_doc.close() # Note: as of July 2020, ocrmypdf does not recognize handwriting tmp_file = run_ocr(tmp_file) ann_doc = fitz.open(tmp_file) pathlib.Path(tmp_file).unlink() ann_page = ann_doc[0] ocred = True ann_page = draw_pdf(parsed_data, ann_page) if "pdf" in targets: subdir = prepare_subdir(out_path, "pdf") ann_doc.save(f"{subdir}/{page_idx:0{page_magnitude}}.pdf") if "png" in targets: # (2, 2) is a short-hand for 2x zoom on x and y # ref: https://pymupdf.readthedocs.io/en/latest/page.html#Page.getPixmap pixmap = ann_page.getPixmap(matrix=fitz.Matrix(2, 2)) subdir = prepare_subdir(out_path, "png") pixmap.writePNG( f"{subdir}/{page_idx:0{page_magnitude}}.png") if "md" in targets: if should_extract_text and (extractable or ocred): md_str = md_from_blocks(ann_page) # TODO: add proper table extraction? # https://pymupdf.readthedocs.io/en/latest/faq.html#how-to-extract-tables-from-documents # TODO: maybe also add highlighted image (pixmap) extraction? subdir = prepare_subdir(out_path, "md") with open(f"{subdir}/{page_idx:0{page_magnitude}}.md", "w") as f: f.write(md_str) elif not highlights: print( f"Couldn't find any highlighted text on page #{page_idx}" ) elif ann_type == "scribbles": print( "Found some highlighted text but `--ann_type` flag was set to `scribbles` only" ) else: print( f"Found highlighted text but couldn't create markdown from page #{page_idx}" ) if modified_pdf: mod_pdf.insertPDF(ann_doc, start_at=-1) if combined_pdf: x_max, y_max = get_ann_max_bound(parsed_data) ann_outside = (x_max > pdf_w_adj) or (y_max > pdf_h_adj) # If there are annotations outside the original PDF page limits, # insert the ann_page that we have created from scratch if ann_outside: pdf_src.insertPDF(ann_doc, start_at=page_idx) pdf_src.deletePage(page_idx + 1) # Else, draw annotations in the original PDF page (in-place) # to preserve links (and also the original page size) else: draw_pdf(parsed_data, pdf_src[page_idx], inplace=True) ann_doc.close() if combined_pdf: pdf_src.save(f"{output_dir}/{name} _remarks.pdf") if modified_pdf: mod_pdf.save(f"{output_dir}/{name} _remarks-only.pdf") mod_pdf.close() pdf_src.close() else: print( f"Skipping document {path.stem}: document type: {filetype} is currently not supported." )
def find_words_rect(page, *words): found_words = defaultdict(list) for (a, b, c, d, word, x, y, z) in page.getText("words"): found_words[word].append(fitz.Rect(a, b, c, d)) return zip(*(found_words[w] for w in words))
achieved be using Pillow / PIL instead of Tkinter's own support. * We are not slowing down the speed of showing new images (i.a.w. "frames per second"). The statistics displayed at end of program can hence be used as a performance indicator. """ if not list(map(int, fitz.VersionBind.split("."))) >= [1, 14, 5]: raise SystemExit("need PyMuPDF v1.14.5 for this script") mytime = time.perf_counter # define some global constants gold = (1, 1, 0) blue = (0, 0, 1) pagerect = fitz.Rect(0, 0, 400, 400) # dimension of our image mp = fitz.Point(pagerect.width / 2.0, pagerect.height / 2.0) # center of the page r = fitz.Rect(mp, mp + (80, 80)) # rect of text box text = "Just some demo text, to be filled in a rect." textpoint = fitz.Point(40, 50) # start position of this text: itext = "Rotation Morphing by:\nfitz.Matrix(%i)" # ------------------------------------------------------------------------------ # make one page # ------------------------------------------------------------------------------
import cv2 import fitz import re from Downloader import * import sys #### Download files from Eli download = Downloader() download.getAttach() w = 390 h = 125 words = ["Paciente"] img = open('Encabezado.jpg', "rb").read() rect = fitz.Rect(0, 0, w, h) docs = glob.glob('adjuntos/*.PDF') for fname in docs: file_handle = fitz.open(fname) for pag in file_handle: pag.insertImage(rect, stream=img, keep_proportion=False) text = pag.getText("text") words = text.lower().split() try: name = ' '.join([ str(elem) for elem in words[words.index('paciente') + 2:words.index('resultado')] ]) except Exception: name = ' '.join([
On every page after the first to-be-deleted page, also insert a link, which points to this page. The bookmark text equals the text on the page for easy verification. Then delete some pages and verify: - the new TOC has empty items exactly for every deleted page - the remaining TOC items still point to the correct page - the document has no more links at all """ import fitz page_count = 100 # initial document length r = range(5, 35, 5) # contains page numbers we will delete # insert this link on pages after first deleted one link = { "from": fitz.Rect(100, 100, 120, 120), "kind": fitz.LINK_GOTO, "page": r[0], "to": fitz.Point(100, 100), } def test_deletion(): # First prepare the document. doc = fitz.open() toc = [] for i in range(page_count): page = doc.new_page() # make a page page.insert_text((100, 100), "%i" % i) # insert unique text if i > r[0]: # insert a link page.insert_link(link)
def convert_to_pdf(Name, Date): try: Name.get() Date.get() except: return Customer_name = Name.get() Date_day, Date_month, Date_year = Date.get() data = None with open("details.json", 'r') as f: data = json.load(f) global Bill, total Gross_total = total taxable_amount = round((Gross_total * TAX) / 100, 2) Net_total = round((Gross_total * (100 + TAX)) / 100, 2) Bill_html = "<table align=center>" Bill_html += "<tr><td colspan=3 height=100>" + data[ "Details"] + "</td><td colspan=2></td></tr>" Bill_html += "<tr><td>Name:</td><td colspan=2>" + str( Customer_name) + "</td><td>Date:</td><td>" + str(Date_day) + "/" + str( Date_month) + "/" + str(Date_year) + "</td></tr>" Bill_html += "<tr><td class=\"index\">Index</td><td class=\"name\">Name</td><td class=\"price\">Price</td><td class=\"qty\">Qty</td><td class=\"amount\">Amount</td></tr>" for i in range(len(Bill)): entry = Bill[i] Bill_html += "<tr><td class=\"index\">" + str( i + 1 ) + "</td><td class=\"name\">" + entry[ 0] + "</td><td align=\"right\" class=\"price\">" + "{0:.2f}".format( entry[1] ) + "</td><td align=\"right\" class=\"qty\">" + str( entry[2] ) + "</td><td align=\"right\" class=\"amount\">" + "{0:.2f}".format( entry[3]) + "</td></tr>" file_name = "Bill" + data["Counts"] + ".pdf" env = Environment(loader=FileSystemLoader('.')) Bill_html += "<tr class=\"gross-total\"><td colspan=4>Gross Total</td><td align=\"right\">" + "{0:.2f}".format( Gross_total) + "</td></tr>" Bill_html += "<tr class=\"taxable-amount\"><td colspan=4>Taxable Amount\n( " + data[ "TAX_NAME"] + ": " + data[ "TAX"] + "%)</td><td align=\"right\">" + "{0:.2f}".format( taxable_amount) + "</td></tr>" Bill_html += "<tr class=\"net-total\"><td colspan=4>Net Total</td><td align=\"right\">" + "{0:.2f}".format( Net_total) + "</td></tr>" Bill_html += "</table>" template = env.get_template("Bill_Preview.html") template_vars = { "Bill": Bill_html, } html_out = template.render(template_vars) pdfkit.from_string(html_out, file_name) data["Counts"] = str(int(data["Counts"]) + 1) with open("details.json", "w") as f: json.dump(data, f) doc = fitz.open(file_name) rect = fitz.Rect(358, 15, 428, 118) for page in doc: page._cleanContents() page.insertImage(rect, filename="IndustryLogo.jpg", overlay=True) doc.saveIncr() system(file_name)
def render_box(bounding_box, pdf): page = pdf.loadPage(int(bounding_box.page_num) - 1) bb = fitz.Rect(bounding_box.min_h, bounding_box.min_v, bounding_box.max_h, bounding_box.max_v) pix = page.getPixmap(clip=bb) return pix
def bbox(self): if not self.cells: return fitz.Rect() x0, y0 = self.cells[0][0].bbox.tl x1, y1 = self.cells[-1][-1].bbox.br return fitz.Rect(x0, y0, x1, y1)
def get_financial_resume(self): """ ------------------------------------------------------------------------------- Identify the rectangle. We use the text search function here. The two search strings are chosen to be unique, to make our case work. The two returned rectangle lists both have only one item. ------------------------------------------------------------------------------- """ headers = ["Valor líquido das operações", "Taxa de liquidação", "Taxa de Registro", "Total CBLC", "Taxa de termo/opções", "Taxa A.N.A", "Emolumentos", "Total Bovespa / Soma", "Corretagem", "ISS", "I.R.R.F.", "Outras Bovespa", "Total Corretagem / Despesas", "Líquido para"] my_financial_resume = {} for pno, page in enumerate(self.pages): rl1 = page.searchFor("Resumo Financeiro") if not rl1: rl1 = page.searchFor("Corretagem / Despesas") # Are we on the other page?? if not rl1: continue # I don´t need you anymore... rl2 = page.searchFor("Líquido para ") # rect list two if rl2: rl2 = [rl2[0] | [(601,842),(0,0)][0]] else: rl2 = [(601,842),(0,0)] rect = rl1[0] | rl2[0] # Now we have the rectangle --------------------------------------------------- ###### # select the words which at least intersect the rect #------------------------------------------------------------------------------ mywords = [w for w in self.words[pno] if fitz.Rect(w[:4]).intersects(rect)] mywords.sort(key = itemgetter(3, 0)) group = groupby(mywords, key = itemgetter(3)) old = "" for y1, gwords in group: line = " ".join(w[4] for w in gwords) for header in headers: # Did we find the header in the text content? if line[0:len(header)] == header: # Do you already exists? if header in my_financial_resume.keys(): # it will append old since it appears before the label (header) my_financial_resume[header].append(old) else: # Create the value in a list. my_financial_resume[header] = [old] # Regex, do your magic and show me the Money!! XX.XXX,XX Y (Y = C or D) old = re.findall(r"(?:[1-9]\d{0,2}(?:\.\d{3})*|0)(?:,\d{1,2})[ ][CD]{1}", line) if old: old = old[0] else: old = 0 vl, vc = 0,0 print("Total de Notas de Corretagem no Arquivo:", self.nc_quantity) for nc in range(self.nc_quantity): # try: # Remove 'falsy" items... try: my_financial_resume["Corretagem"] = [x for x in my_financial_resume["Corretagem"] if x] except: continue for head in headers: if head in my_financial_resume.keys() and len(my_financial_resume[head]) > 1: self.ncs_in_file[nc][head] = my_financial_resume[head][nc] else: self.ncs_in_file[nc][head] = my_financial_resume[head][0] # my_financial_resume["Custos Totais"][nc] = float(my_financial_resume[headers[-1]][nc].split(" ")[0]) - float(my_financial_resume[headers[0][nc]].split(" ")[0]) vl = to_money(self.ncs_in_file[nc]["Valor líquido das operações"]) if "Líquido para" in self.ncs_in_file[nc].keys(): vc = to_money( self.ncs_in_file[nc]["Líquido para"]) else: continue if "Custos Totais" in my_financial_resume.keys(): my_financial_resume["Custos Totais"].append(locale.currency( abs(vc - vl), grouping = True )) else: my_financial_resume["Custos Totais"] = [locale.currency( abs(vc - vl), grouping = True )] print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"]) print("Código do Cliente:", self.ncs_in_file[nc]["CodigoCliente"]) print("Data da Nota:", self.ncs_in_file[nc]["Data"]) for head in headers: tab = "\t" if head[0:5] == "Total" else "" if head in my_financial_resume.keys(): if len(my_financial_resume[head]) > 1: print("\t {} {}: {}".format(tab, head,my_financial_resume[head][nc])) else: print("\t {} {}: {}".format(tab, head,my_financial_resume[head])) else: print("This Brokeage note seems to be a supported kind! Day trade may be? Missing: {}".format(head)) input() print("\nResumo:") if "Valor líquido das operações" in self.ncs_in_file[nc].keys(): print("\t Valor Líquido das Operações:", self.ncs_in_file[nc]["Valor líquido das operações"]) print("\t Valor da Nota de Corretagem:", self.ncs_in_file[nc]["Líquido para"]) print("\t Custos Totais:", my_financial_resume["Custos Totais"][nc]) else: continue print("Conta: ", self.get_account()) for nc in range(self.nc_quantity): print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"]) print("No Financial Resume!") print(" -------- Negociações -------- " ) # Duplicando print de resumo financeiro não ignorando o segunda NC. # file:///Users/maion/OneDrive/Documentos/Documentos%20Felipe/programs/ruby/Python/PyCharmProjects/IR/Notas%20Corretagem/pdf/240303-20130829-NC4617559-929443.pdf self.get_transactions() self.get_transactions_for_options() # TODO implement this.. # self.get_transactions_for_index() # print(*[(w[1], w[0].split(" ")) for w in self.negotiations], sep="\n") print(" ______________________________") # except: # print("Conta: ", self.get_account()) # for nc in range(self.nc_quantity): # print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"]) # print("Day Trade - To be implemented!") self.financial_resume = my_financial_resume return self.financial_resume
def bbox(self): bbox = fitz.Rect() for instance in self._instances: bbox = bbox | instance.bbox # NOTE: | support fitz.Rect and rect-like object, e.g. tuple return bbox
blue = (0, 0, 1) green = (0, 1, 0) gray = (0.9, 0.9, 0.9) for page1 in doc1: blks = page1.getTextBlocks(images=True) # read text blocks of input page # create new page in output with /MediaBox dimensions page2 = doc2.newPage(-1, width=page1.MediaBoxSize[0], height=page1.MediaBoxSize[1]) # the text font we use page2.insertFont(fontfile=None, fontname="Helvetica") img = page2.newShape() # prepare /Contents object # calculate /CropBox & displacement disp = fitz.Rect(page1.CropBoxPosition, page1.CropBoxPosition) croprect = page1.rect + disp # draw original /CropBox rectangle img.drawRect(croprect) img.finish(color=gray, fill=gray) for b in blks: # loop through the blocks r = fitz.Rect(b[:4]) # block rectangle # add dislacement of original /CropBox r += disp img.drawRect(r) # surround block rectangle if b[-1] == 1: # if image block ... color = red a = fitz.TEXT_ALIGN_CENTER
imgcount += 1 new_name = "图片{}.png".format(imgcount) # 生成图片的名称 pix_temp.writeImage( os.path.join(root, '\EasyTrans', 'trans', 'output_file', new_name)) # bytes_array = pix_temp.getImageData('png')#可以不输出图片再写入新的pdf,通过byte # print(pix_temp.getImageData('png')) pix_temp = None # 释放资源 blks = cur_page.getTextBlocks( images=True) # read text blocks of input page new_page = new_pdf.newPage( -1, width=cur_page.MediaBoxSize[0], height=cur_page.MediaBoxSize[1]) # 创建一个新的页面与之前的页面相同大小 img = new_page.newShape() # prepare /Contents object disp = fitz.Rect(cur_page.CropBoxPosition, cur_page.CropBoxPosition) croprect = cur_page.rect + disp # img.drawRect(croprect)#画出整个页面的矩形 # img.finish(color=gray, fill=gray)#填充颜色 begin = (0, 0, 0, 0) # 记录初始值 end = (0, 0, 0, 0) # 记录终结值 flag = 0 # 记录当前的循 reference_flag = 0 # 判断是否在参考文献之后 blks.append((1, 2, 3, 6)) content = "" imgcount = 0 fonts = 9 for num in range(len(blks)): # loop through the blocks # 如果是本页面最后一个块,直接结束,因为最后一个是方便计算自己添加的。 if num == len(blks) - 1: break
import fitz import pandas as pd doc = fitz.open('<Your File Name>.pdf') page1 = doc[0] words = page1.get_text("words") #Extract the coordinates of the first object : first_annots=[] rec=page1.first_annot.rect rec #Information of words in first object is stored in mywords mywords = [w for w in words if fitz.Rect(w[:4]) in rec] ann= make_text(mywords) first_annots.append(ann) #This function selects the words contained in the box, sort the words and return in form of a string : def make_text(words): line_dict = {} words.sort(key=lambda w: w[0]) for w in words: y1 = round(w[3], 1) word = w[4] line = line_dict.get(y1, []) line.append(word) Extracting each page of the document and all the annots/rectangles : for pageno in range(0,len(doc)-1):
# Evan Widloski - 2020-11-07 # demonstrating storing pen strokes directly in PDF import fitz # name of layer where pen strokes live stroke_layer_name = 'remarkable pen' # create new document doc = fitz.open() page = doc.newPage() page.setRotation(0) # draw a rectangle and add some text for fun r = fitz.Rect(72, 72, 100, 100) page.addFreetextAnnot(r, 'foobar') # ----- write pen strokes ----- # create new optional content group pen_ocg_xref = doc.addOCG(stroke_layer_name, config=-1, on=1, intent=None) # create new annotation and set as optional content annot = page.addInkAnnot([ [(72, 72), (100, 100), (200, 100), (100, 200)], [(372, 300), (367, 324), (355, 346), (336, 362), (312, 370), (287, 370), (264, 362), (244, 346), (232, 324), (228, 300)], ]) annot.setOC(pen_ocg_xref) print(pen_ocg_xref) doc.save('out.pdf', deflate=True)
def extract_rectangle_text(rect: fitz.Rect, wordlist): words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(rect)] sentence = " ".join(w[4] for w in words) return sentence.strip()
def pencil(page, penciltip, pb_height, left): """Draws a pencil image. """ from fitz.utils import getColor # define some colors yellow = getColor("darkgoldenrod") black = getColor("black") white = getColor("white") red = getColor("red") wood = getColor("wheat2") wood2 = getColor("wheat3") #--------------------------------------------------------------------------- # some adjustments follow depending on pencil tip is left or right: # when choices need be made between a left point (lp) or a right point (rb), # we specify lp*a + rp*b, delivering either lp or rp. # Variable 's' is used as a sign and is either +1 or -1. #--------------------------------------------------------------------------- if left: # pencil tip is left a = 1 b = 0 s = 1 else: a = 0 b = 1 s = -1 def oneof(x, y): return x * a + y * b w = pb_height * 0.01 # standard line thickness pb_width = 2 * pb_height # pencil body width tipendtop = penciltip + fitz.Point(1, -0.5) * pb_height * s tipendbot = penciltip + fitz.Point(1, 0.5) * pb_height * s r = fitz.Rect(tipendtop, tipendbot + (pb_width * s, 0)) # pencil body r.normalize() # force r to be finite # topline / botline indicate the pencil edges topline0 = fitz.Point(r.x0 + r.width * 0.1, r.y0 + pb_height / 5.) # upper pencil edge - left topline1 = fitz.Point(r.x0 + r.width * 0.9, topline0.y) # upper epncil edge - right botline0 = fitz.Point(r.x0 + r.width * 0.1, r.y1 - pb_height / 5.) # lower pencil edge - left botline1 = fitz.Point(r.x0 + r.width * 0.9, botline0.y) # lower pencil edge - right # control point 1 for pencil rubber hp1 = oneof(r.top_right, r.top_left) + (pb_height * 0.6 * s, 0) # control point 2 for pencil rubber hp2 = oneof(r.bottom_right, r.bottom_left) + (pb_height * 0.6 * s, 0) # pencil body is some type of yellow page.drawRect(r, fill=yellow, color=wood, width=w) page.drawPolyline((r.top_left, topline0, botline0, r.bottom_left), fill=wood, color=yellow, width=w) page.drawPolyline((r.top_right, topline1, botline1, r.bottom_right), fill=wood, color=yellow, width=w) # draw pencil edge lines page.drawLine(topline0, topline1, width=w, color=wood2) page.drawLine(botline0, botline1, width=w, color=wood2) #=========================================================================== # draw the pencil rubber #=========================================================================== page.drawBezier(oneof(r.top_right, r.top_left), hp1, hp2, oneof(r.bottom_right, r.bottom_left), fill=red, width=w) #=========================================================================== # black rectangle near pencil rubber #=========================================================================== blackrect = fitz.Rect( oneof((r.top_right - (pb_height / 2., 0)), r.top_left), oneof(r.bottom_right, (r.bottom_left + (pb_height / 2., 0)))) page.drawRect(blackrect, fill=black, width=w) #=========================================================================== # draw pencil tip and curves indicating pencil sharpening traces #=========================================================================== page.drawPolyline((tipendtop, penciltip, tipendbot), width=w, fill=wood) # pencil tip p1 = oneof(r.top_left, r.top_right) # either left or right p2 = oneof(topline0, topline1) p3 = oneof(botline0, botline1) p4 = oneof(r.bottom_left, r.bottom_right) p0 = -fitz.Point(pb_height / 5., 0) * s # horiz. displacment of ctrl points cp1 = p1 + (p2 - p1) * 0.5 + p0 # ctrl point upper rounding cp2 = p2 + (p3 - p2) * 0.5 + p0 * 2.9 # ctrl point middle rounding cp3 = p3 + (p4 - p3) * 0.5 + p0 # ctrl point lower rounding page.drawCurve(p1, cp1, p2, fill=yellow, width=w, color=wood) page.drawCurve(p2, cp2, p3, fill=yellow, width=w, color=wood) page.drawCurve(p3, cp3, p4, fill=yellow, width=w, color=wood) #=========================================================================== # draw the pencil tip lead mine #=========================================================================== page.drawPolyline( (penciltip + (tipendtop - penciltip) * 0.4, penciltip, penciltip + (tipendbot - penciltip) * 0.4), fill=black, width=w, closePath=True) #=========================================================================== # add a curve to indicate lead mine is round #=========================================================================== page.drawCurve(penciltip + (tipendtop - penciltip) * 0.4, fitz.Point(penciltip.x + pb_height * 0.6 * s, penciltip.y), penciltip + (tipendbot - penciltip) * 0.4, width=w, fill=black) #=========================================================================== # re-border pencil body getting rid of some pesky pixels #=========================================================================== page.drawLine(r.top_left, r.top_right, width=w) page.drawLine(r.bottom_left, r.bottom_right, width=w) page.drawPolyline((tipendtop, penciltip, tipendbot), width=w) #=========================================================================== # draw pencil label - first a rounded rectangle #=========================================================================== p1 = fitz.Point(0.65, 0.15) * pb_height p2 = fitz.Point(0.45, 0.15) * pb_height lblrect = fitz.Rect(topline0 + oneof(p1, p2), botline1 - oneof(p2, p1)) page.drawRect(lblrect, width=w, fill=black) page.drawCurve(lblrect.top_right, fitz.Point(lblrect.x1 + pb_height / 4., penciltip.y), lblrect.bottom_right, width=w, fill=black) page.drawCurve(lblrect.top_left, fitz.Point(lblrect.x0 - pb_height / 4., penciltip.y), lblrect.bottom_left, width=w, fill=black) # ... then text indicating it's a medium pencil if page.insertTextbox(lblrect, "No.2", color=white, fontname="Helvetica", fontsize=pb_height * 0.22, align=1) < 0: raise ValueError("not enough space to store pencil text") #=========================================================================== # finally the white vertical stripes - whatever they are good for #=========================================================================== p1t = blackrect.top_left + fitz.Point(blackrect.width / 3., pb_height / 20.) p1b = blackrect.bottom_left + fitz.Point(blackrect.width / 3., -pb_height / 20.) p2t = blackrect.top_left + fitz.Point(blackrect.width * 2 / 3., pb_height / 20.) p2b = blackrect.bottom_left + fitz.Point(blackrect.width * 2 / 3., -pb_height / 20.) page.drawLine(p1t, p1b, color=white, width=pb_height * 0.08, roundCap=False) page.drawLine(p2t, p2b, color=white, width=pb_height * 0.08, roundCap=False) return
def main(argv): input_file = '' output_file = '' nb_delete = 0 nb_big = 0 nb_small = 0 nb_lateral = 0 try: opts, args = getopt.getopt(argv,"i:d:g:p:l:",["ifile=","ofile="]) except getopt.GetoptError: print ('insert_rect.py -i inputfile -d x_first_pages_to_delete -g x_first_pages_with_big_pict -p small_pic_from_page_y -l lateral_page_from_page_z') sys.exit(2) for opt, arg in opts: if opt == '-h': print ('insert_rect.py -i inputfile -d x_first_pages_to_delete -g x_first_pages_with_big_pict -p small_pic_from_page_y -l lateral_page_from_page_z') sys.exit() elif opt in ("-i", "--ifile"): input_file = arg + ".pdf" output_file = arg + "_vu.pdf" elif opt in ("-d", "--ifile"): nb_delete = int(arg) print("delete ",nb_delete," first pages") elif opt in ("-g", "--ofile"): nb_big = int(arg) print("big picture on ",nb_big," first pages") elif opt in ("-p", "--ofile"): nb_small = int(arg) print("small picture from page ",nb_small) elif opt in ("-l", "--ofile"): nb_lateral = int(arg) print("lateral picture from page ",nb_lateral) print ('Input file is ', input_file) print ('Output file is ', output_file) # insert big picture image_file = open(bigpicture, 'rb').read() # define the position (upper-right corner) image_rectangle = fitz.Rect(0,-80,750,210) # retrieve the first page of the PDF file_handle = fitz.open(input_file) # first_page = file_handle[0] for page in file_handle.pages(0, nb_big, 1): # add the image page.insertImage(image_rectangle, stream=image_file) # insert small picture image_file = open(smallpicture, 'rb').read() # define the position (upper-right corner) 1000 55 image_rectangle = fitz.Rect(0,0,400,35) # retrieve the first page of the PDF # file_handle = fitz.open(input_file) for page in file_handle.pages(nb_small, file_handle.pageCount, 1): # add the image page.insertImage(image_rectangle, stream=image_file) # insert lateral picture image_file = open(lateralpicture, 'rb').read() # define the position (upper-right corner) image_rectangle = fitz.Rect(510,-50,720,850) # retrieve the first page of the PDF # file_handle = fitz.open(input_file) for page in file_handle.pages(nb_lateral, file_handle.pageCount, 1): # add the image page.insertImage(image_rectangle, stream=image_file) # delete first pages l = list(range(nb_delete, file_handle.pageCount)) # 2-end file_handle.select(l) # delete all others file_handle.save(output_file, garbage=3) # save and clean new PDF file_handle.close()
def compare_layput(filename_source, filename_target, filename_output, threshold=0.7): ''' Compare layout of two pdf files: It's difficult to have an exactly same layout of blocks, but ensure they look like each other. So, with `extractWORDS()`, all words with bbox information are compared. ``` (x0, y0, x1, y1, "word", block_no, line_no, word_no) ``` ''' # fitz document source = fitz.open(filename_source) # type: fitz.Document target = fitz.open(filename_target) # type: fitz.Document # check count of pages # -------------------------- if len(source) != len(target): msg = 'Page count is inconsistent with source file.' print(msg) return False flag = True errs = [] for source_page, target_page in zip(source, target): # check position of each word # --------------------------- source_words = source_page.getText('words') target_words = target_page.getText('words') # sort by word source_words.sort( key=lambda item: (item[4], round(item[1], 1), round(item[0], 1))) target_words.sort( key=lambda item: (item[4], round(item[1], 1), round(item[0], 1))) if len(source_words) != len(target_words): msg = 'Words count is inconsistent with source file.' print(msg) # check each word and bbox for sample, test in zip(source_words, target_words): source_rect, target_rect = fitz.Rect(sample[0:4]), fitz.Rect( test[0:4]) # draw bbox based on source layout source_page.drawRect(source_rect, color=(1, 1, 0), overlay=True) # source position source_page.drawRect(target_rect, color=(1, 0, 0), overlay=True) # current position # check bbox word by word: ignore small bbox, e.g. single letter bbox if not Element().update_bbox(source_rect).get_main_bbox( target_rect, threshold): flag = False errs.append( (f'{sample[4]} ===> {test[4]}', target_rect, source_rect)) # save and close source.save(filename_output) target.close() source.close() # outputs for word, target_rect, source_rect in errs: print( f'Word "{word}": \nsample bbox: {source_rect}\ncurrent bbox: {target_rect}\n' ) return flag
import fitz # <-- PyMuPDF doc = fitz.open("some.pdf") # open the PDF rect = fitz.Rect(0, 0, 100, 100) # where to put image: use upper left corner for page in doc: page.insertImage(rect, filename = "some.image") doc.saveIncr() # do an incremental save
def get_financial_resume(self): """ ------------------------------------------------------------------------------- Identify the rectangle. We use the text search function here. The two search strings are chosen to be unique, to make our case work. The two returned rectangle lists both have only one item. ------------------------------------------------------------------------------- """ my_financial_resume = {} for pno, page in enumerate(self.pages): rl1 = page.searchFor("Resumo Financeiro") if not rl1: rl1 = page.searchFor( "Corretagem / Despesas") # Are we on the other page?? if not rl1: continue # I don´t need you anymore... rl2 = page.searchFor("Líquido para ") # rect list two if rl2: rl2 = [rl2[0] | [(601, 842), (0, 0)][0]] else: rl2 = [(601, 842), (0, 0)] rect = rl1[0] | rl2[0] # Now we have the rectangle --------------------------------------------------- ###### # select the words which at least intersect the rect #------------------------------------------------------------------------------ mywords = [ w for w in self.words[pno] if fitz.Rect(w[:4]).intersects(rect) ] mywords.sort(key=itemgetter(3, 0)) group = groupby(mywords, key=itemgetter(3)) old = "" for y1, gwords in group: line = " ".join(w[4] for w in gwords) headers = [ "Valor líquido das operações", "Taxa de liquidação", "Taxa de Registro", "Total CBLC", "Taxa de termo/opções", "Taxa A.N.A", "Emolumentos", "Total Bovespa / Soma", "Corretagem", "ISS", "I.R.R.F.", "Outras Bovespa", "Total Corretagem / Despesas", "Líquido para" ] for header in headers: # Did we find the header in the text content? if line[0:len(header)] == header: # Do you already exists? if header in my_financial_resume.keys(): # it will append old since it appears before the label (header) my_financial_resume[header].append(old) else: # Create the value in a list. my_financial_resume[header] = [old] # Regex, do your magic and show me the Money!! XX.XXX,XX Y (Y = C or D) old = re.findall( r"(?:[1-9]\d{0,2}(?:\.\d{3})*|0)(?:,\d{1,2})[ ][CD]{1}", line) if old: old = old[0] else: old = 0 print("Total de Notas de Corretagem no Arquivo:", self.nc_quantity) for nc in range(self.nc_quantity): try: # Remove 'falsy" items... my_financial_resume["Corretagem"] = [ x for x in my_financial_resume["Corretagem"] if x ] for head in headers: self.ncs_in_file[nc][head] = my_financial_resume[head][nc] print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"]) print("Código do Cliente:", self.ncs_in_file[nc]["CodigoCliente"]) print("Data da Nota:", self.ncs_in_file[nc]["Data"]) for head in headers: tab = "\t" if head[0:5] == "Total" else "" print("\t {} {}: {}".format(tab, head, my_financial_resume[head][nc])) print("\nResumo:") print("\t Valor Líquido das Operações:", self.ncs_in_file[nc]["Valor líquido das operações"]) print("\t Valor da Nota de Corretagem", self.ncs_in_file[nc]["Líquido para"]) print(" ") except: print("Conta: ", self.get_account()) for nc in range(self.nc_quantity): print("Nota de Corretagem Nº:", self.ncs_in_file[nc]["Nota"]) print("Day Trade - To be implemented!") self.financial_resume = my_financial_resume return self.financial_resume
page.insertText(fitz.Point(ts_h - 30, ts_v - 30), stitle, fontsize=13, color=blue) # draw the table data for i, c in enumerate(table): beta = c[0] / seats * 180 # express seats as semi circle angles color = getColor(c[1]) # avoid multiple color lookups # the method delivers point of other end of the constructed arc # we will use it as input for next round point = page.drawSector(center, point, beta, color=white, fullSector=True, fill=color) text = "%s, %i %s" % (c[2], c[0], "Sitze" if c[0] > 1 else "Sitz") pos = fitz.Point(ts_h, ts_v + i * lineheight) page.insertText(pos, text, color=blue) tl = fitz.Point(pos.x - 30, ts_v - 10 + i * lineheight) br = fitz.Point(pos.x - 10, ts_v + i * lineheight) rect = fitz.Rect(tl, br) # legend color bar page.drawRect(rect, fill=color, color=color) # overlay center of circle with white to simulate an auditorium page.drawCircle(center, radius - 70, color=white, fill=white) doc.save("piechart2.pdf")
return rects doc = fitz.open() # new PDF page = doc.newPage() # new page shape = page.newShape() # make a page draw area opacity = 0.3 # all annotation use this opacity tcol = (0, 0, 1) # text color gold = (1, 1, 0) # highlight color bg_color = "skyblue3" background = fitz.utils.getColor(bg_color) # background color fname = "hebo" # Helvetica Bold fsize = 12 # generous font size tl = page.rect.tl + (150, 100) br = page.rect.br - (150, 62) rect = fitz.Rect(tl, br) # only use this area of the page rects = table( # define a table with 2 cells per blend mode rows=len(blend_modes), # one row per blend mode cols=2, # for the blend mode and its highlighted version rect=rect, # inside this rectangle ) # paint page background # will provide better visibility of highlighted text shape.drawRect(page.rect) shape.finish(fill=background, color=background) # fill the table for i, bmode in enumerate(blend_modes): r = rects[i] # contains 2 rectangles
# used for non-CJK characters. For CJK, the fallback is always used. text = """This is a text of mixed languages to demonstrate MuPDF's text output capabilities. Font used for the non-CJK characters: '%s', font size: %g, color: %s. Euro: €, some special signs: |~°²³, general Latin: ñäöüßâ Japan: 熊野三山本願所は、15世紀末以降における熊野三山(熊野本宮、熊野新宮 Greece: Στα ερείπια της πόλης, που ήταν ένα σημαντικό Korea: 에듀롬은 하나의 계정으로 전 세계 고등교육 기관의 인터넷에 접속할 Russia: Ко времени восшествия на престол Якова I в значительной China: 北京作为城市的历史可以追溯到3,000年前。西周初年,周武王封召公奭于燕國。 This longer text part checks, whether the very last line will not be justified either.""" % ( font.name, fsize, blue, ) fill_rect = fitz.Rect(72, 72, 372, 372) # keep above text in here writer = fitz.TextWriter(page_rect, color=blue) # start a text writer writer.fillTextbox( # fill in above text fill_rect, # keep text inside this text, # the text align=fitz.TEXT_ALIGN_JUSTIFY, # alignment warn=True, # keep going if too much text fontsize=fsize, font=font, ) # write our results to the PDF page. writer.writeText(page) # To show what happened, draw the rectangles, etc.
print(fitz.__doc__) if fitz.VersionBind.split(".") < ["1", "17", "0"]: sys.exit("PyMuPDF v1.17.0+ is needed.") gc.set_debug(gc.DEBUG_UNCOLLECTABLE) highlight = "this text is highlighted" underline = "this text is underlined" strikeout = "this text is striked out" squiggled = "this text is zigzag-underlined" red = (1, 0, 0) blue = (0, 0, 1) gold = (1, 1, 0) green = (0, 1, 0) displ = fitz.Rect(0, 50, 0, 50) r = fitz.Rect(72, 72, 220, 100) t1 = u"têxt üsès Lätiñ charß,\nEUR: €, mu: µ, super scripts: ²³!" def print_descr(annot): """Print a short description to the right of each annot rect.""" annot.parent.insert_text(annot.rect.br + (10, -5), "%s annotation" % annot.type[1], color=red) doc = fitz.open() page = doc.new_page() page.set_rotation(0)
def wxRect_to_Rect(self, wr): """ Return a shrunk fitz.Rect for given wx.Rect.""" r = fitz.Rect(wr.x, wr.y, wr.x + wr.width, wr.y + wr.height) return r * self.shrink # shrunk fitz.Rect version
def generate(args, page): pagerect = list() # rl1 = page.searchFor("#")[0] rl1 = fitz.Rect(100, 30, 130, 70) # sometimes in text so hardcode # rl2 = page.searchFor("THIS BEER IS")[0] # sometimes in text so hardcode rl2 = fitz.Rect(20, 182, 555, 197) # sometimes in text so hardcode if (page.number % 2): rl2.x1 = 575 rl2.x0 = 40 pagerect.append(rl1 | rl2) ### union rectangle 0 (header) rectdesc = fitz.Rect(10, 120, 410, 160) if (page.number % 2): rectdesc.x1 += 20 # rl1 = page.searchFor("THIS BEER IS")[0] rl1 = fitz.Rect(40, 182, 180, 197) # sometimes in text so hardcode rl2 = page.searchFor("BASICS")[0] rl3 = page.searchFor("METHOD / TIMINGS")[ 0] # sometimes food and method reversed method2 = 0 if (rl3.x0 > 150): #method is in 2nd row rl3 = page.searchFor("FOOD PAIRING")[0] method2 = 1 rl2.x1 = 180 rl3.x1 = 180 if (page.number % 2): rl2.x1 = 200 rl3.x1 = 200 pagerect.extend((rl1 | rl2, rl2 | rl3)) ### union rectangle 1 and 2 rl3.y1 = 780 pagerect.append(rl3) ### rect 3 rl1 = page.searchFor("INGREDIENTS")[0] if rl1.y0 > 190: rl1 = page.searchFor("INGREDIENTS")[1] rl2 = page.searchFor("FOOD PAIRING") if method2: rl2 = page.searchFor( "METHOD / TIMINGS") # sometimes food and method reversed if rl2 == []: rl2 = (rl1, ) rl2[0].y1 = 780 rl2 = rl2[0] if rl2.x0 > 300: #food pairing is sometimes on third column special = 1 rl1.y1 = 780 rl1.x1 = 340 if (page.number % 2): rl1.x1 = 360 pagerect.append(rl1) # rect4 if pairing next rl2.x1 = 555 if (page.number % 2): rl2.x1 = 575 rl2.y1 = 780 pagerect.append(rl2) # rect5 if pairing next else: special = 0 rl2.x1 = 340 if (page.number % 2): rl2.x1 = 360 pagerect.append(rl1 | rl2) ### union rectangle 4 rl2.y1 = 780 pagerect.append(rl2) ### rect 5 if method2: pagerect[3], pagerect[5] = pagerect[5], pagerect[3] rl1 = page.searchFor("PACKAGING")[0] rl2 = page.searchFor("BREWER’S TIP")[0] rl2.x1 = 555 if (page.number % 2): rl2.x1 = 575 rlt = rl1 | rl2 rlt.y0 += 20 # to crop for photo rlt.y1 -= 35 rlt.x1 -= 5 pagerect.append(rlt) ### union rectangle 6 if special: rlr = page.searchFor("FOOD PAIRING")[0] pagerect.append(rlr | rl2) ### union rectangle 7 special else: rl2.y1 = 780 pagerect.append(rl2) ### rect 7 # FOR TESTING RECTANGLES : if args.debug: for rect in pagerect: page.drawRect(rect, color=(1, 0, 0), width=2) page.drawRect(rectdesc, color=(0, 1, 0), width=2) page.getPixmap().writeImage("page-%i-test.png" % page.number) words = page.getTextWords() blocks = page.getTextBlocks() beer = {} for i, rect in enumerate(pagerect): myblocks = [w for w in blocks if fitz.Rect(w[:4]).intersect(rect)] # myblocks = [w for w in blocks if fitz.Rect(w[:4]) in rect] groupblock = groupby(sorted(myblocks, key=itemgetter(3, 0)), key=itemgetter(3)) sentence_list_1 = [ " ".join(w[4] for w in gwords) for y1, gwords in groupblock ] sentence_list_blk = [ re.sub(r"(\s+)", r" ", s) for s in sentence_list_1 ] blkstr = "\n".join(sentence_list_blk) blkstr2 = "\n".join(sentence_list_1) sentence_list_blk_2 = blkstr2.split("\n") if args.debug: # print(i,sentence_list_blk) print(i, sentence_list_blk) print(blkstr2) try: if i == 0: #header descblock = [w for w in words if fitz.Rect(w[:4]) in rectdesc] descgroupblock = groupby(sorted(descblock, key=itemgetter(3, 0)), key=itemgetter(3)) descsentence_list_1 = [ " ".join(w[4] for w in gwords) for y1, gwords in descgroupblock ] beer['id'] = re_number.search(blkstr2).group(1) t = sentence_list_blk_2.index("#" + beer['id']) beer['name'] = sentence_list_blk_2[t + 1] if descsentence_list_1 != []: beer['shortdesc'] = descsentence_list_1[0] else: print( f"HEADER order problem trying to correct: {page.number} id:{beer['id']} " ) print(f"leaving empty") # beer['shortdesc'] = sentence_list_blk_2[t+2] if s := re_date.search(blkstr2): beer['date'] = s.group(1) else: print(f"No date data: {page.number} id:{beer['id']} ") abvibuog = re_realabvog.search(blkstr2) if abvibuog: beer['real_abv'] = abvibuog.group(1) beer['IBU'] = abvibuog.group(2) beer['OG'] = abvibuog.group(3) # elif "ABV" in sentence_list_blk_2[t+3]: # beer['real_abv'] = re.search(r"\d+\.{0,1}\d*%",sentence_list_blk_2[t+4]).group() elif "ABV" in blkstr2: beer['real_abv'] = re.search(r"\d+\.{0,1}\d*%", blkstr2).group() elif i == 1: #top left description if "THIS BEER IS" in sentence_list_blk_2 and "BASICS" in sentence_list_blk_2: t1 = sentence_list_blk_2.index("THIS BEER IS") t2 = sentence_list_blk_2.index("BASICS") beer['description'] = " ".join(sentence_list_blk_2[t1 + 1:t2]) else: print( f"No description data: {page.number} id:{beer['id']} ") elif i == 2: #basics if s := re_vol.search(blkstr): beer['vol'] = s.group(1)
if not path.exists(input_folder): print ("Não existe input") exit(-1) if not path.exists(assinatura_folder): print ("Não existe assinatura") exit(-2) jessica = assinatura_folder + "image.png" rodrigo = assinatura_folder + "image.png" marcelo = assinatura_folder + "image.png" # define the posdition (upper-right corner) image_jessica = fitz.Rect(30,320,250,632) image_rodrigo = fitz.Rect(240,320,470,632) image_marcelo = fitz.Rect(430,320,660,632) files = glob(input_folder + "*.pdf") output_file = "example2.pdf" for input_file in files: # retrieve the first page of the PDF file_handle = fitz.open(input_file) first_page = file_handle[0] output_file = output_folder + path.split(input_file)[1] # add the image first_page.insertImage(image_jessica, filename = jessica) first_page.insertImage(image_rodrigo, filename = rodrigo) first_page.insertImage(image_marcelo, filename = marcelo)
def generate_report(): name_ = ename.get() ename.delete(0,END) desg = desg_.get() loca_ = elocation.get() loca_ = int(loca_[-1]) elocation.delete(0,END) risk_rating = int(Panel_data[Panel_data_headings[-1]][loca_-1]) zone_ = zoneOfC(risk_rating) if desg=="Overman": if risk_rating <28: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoLow(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12) for x in range(1,7): plt.annotate(str(x)+") "+Precaution[Precaution_headings[3]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <48: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(17,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoMed(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <70: with PdfPages(name_ + '.pdf') as pdf: # Page 1 plt.figure(figsize=(15,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[21]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # Page 2 plt.figure(figsize=(10,11)) border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image on Page 2 doc = fitz.open(name_ + '.pdf') rect= fitz.Rect(50,50,700,700) page = doc.loadPage(1) page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") doc.saveIncr() # Very high risk else: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoVeryHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14) for x in range(1,5): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[21]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # plt.close() plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image at the top doc = fitz.open(name_ + '.pdf') rect1= fitz.Rect(-500,30,500,650) rect2 = fitz.Rect(500,100,1100,580) page = doc.loadPage(1) page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg") doc.saveIncr() if desg=="Mining Sirdar": if risk_rating <28: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoLow(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12) for x in range(1,7): plt.annotate(str(x)+") "+Precaution[Precaution_headings[5]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <48: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(17,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoMed(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <70: with PdfPages(name_ + '.pdf') as pdf: # Page 1 plt.figure(figsize=(15,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[23]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=11) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # Page 2 plt.figure(figsize=(10,11)) border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image on Page 2 doc = fitz.open(name_ + '.pdf') rect= fitz.Rect(50,50,700,700) page = doc.loadPage(1) page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") doc.saveIncr() # Very high risk else: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoVeryHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14) for x in range(1,5): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[23]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) # plt.close() plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image at the top doc = fitz.open(name_ + '.pdf') rect1= fitz.Rect(-500,30,500,650) rect2 = fitz.Rect(500,100,1100,580) page = doc.loadPage(1) page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg") doc.saveIncr() if desg=="Shotfirer": if risk_rating <28: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoLow(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12) for x in range(1,7): plt.annotate(str(x)+") "+Precaution[Precaution_headings[7]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <48: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(17,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoMed(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <70: with PdfPages(name_ + '.pdf') as pdf: # Page 1 plt.figure(figsize=(15,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14) for x in range(2,6): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[25]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # Page 2 plt.figure(figsize=(10,11)) border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image on Page 2 doc = fitz.open(name_ + '.pdf') rect= fitz.Rect(50,50,700,700) page = doc.loadPage(1) page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") doc.saveIncr() # Very high risk else: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoVeryHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14) for x in range(1,5): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14) for x in range(2,6): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[25]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) # plt.close() plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image at the top doc = fitz.open(name_ + '.pdf') rect1= fitz.Rect(-500,30,500,650) rect2 = fitz.Rect(500,100,1100,580) page = doc.loadPage(1) page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg") doc.saveIncr() if desg=="Timberman": if risk_rating <28: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoLow(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12) for x in range(1,7): plt.annotate(str(x)+") "+Precaution[Precaution_headings[9]][x+1], xy=(-90, 140-x*25), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <48: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(17,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoMed(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <70: with PdfPages(name_ + '.pdf') as pdf: # Page 1 plt.figure(figsize=(15,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[27]][x], xy=(-90, 230-(x-1)*25), xycoords='axes points',fontsize=14) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 100-x*25), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # Page 2 plt.figure(figsize=(10,11)) border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image on Page 2 doc = fitz.open(name_ + '.pdf') rect= fitz.Rect(50,50,700,700) page = doc.loadPage(1) page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") doc.saveIncr() # Very high risk else: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoVeryHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14) for x in range(1,5): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14) for x in range(2,5): plt.annotate(str(x-1)+") "+Mitigation[Mitigation_headings[27]][x], xy=(-90, 160-x*20), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # plt.close() plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image at the top doc = fitz.open(name_ + '.pdf') rect1= fitz.Rect(-500,30,500,650) rect2 = fitz.Rect(500,100,1100,580) page = doc.loadPage(1) page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg") doc.saveIncr() if desg=="Common Worker": if risk_rating <28: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(20,10)) border = plt.figure(figsize=(20,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoLow(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*25), xycoords='axes points',fontsize=12) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <48: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(17,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoMed(name_,desg,loca_,zone_) for x in range(1,10): plt.annotate(str(x)+") "+Precaution[Precaution_headings[0]][x], xy=(-90, 400-x*20), xycoords='axes points',fontsize=10) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90, 180-x*20), xycoords='axes points',fontsize=13) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 50-x*20), xycoords='axes points',fontsize=13) pdf.savefig(edgecolor=border.get_edgecolor()) elif risk_rating <70: with PdfPages(name_ + '.pdf') as pdf: # Page 1 plt.figure(figsize=(15,10)) border = plt.figure(figsize=(17,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*25), xycoords='axes points',fontsize=14) for x in range(1,4): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[17]][x], xy=(-90, 230-x*25), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # Page 2 plt.figure(figsize=(10,11)) border = plt.figure(figsize=(10,11), linewidth=10, edgecolor="#8B4513") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image on Page 2 doc = fitz.open(name_ + '.pdf') rect= fitz.Rect(50,50,700,700) page = doc.loadPage(1) page.insertImage(rect, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") doc.saveIncr() # Very high risk else: with PdfPages(name_ + '.pdf') as pdf: plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.title("HAZCOM REPORT FOR ROOF FALLS", fontsize= 30, weight= "bold") plt.axis('off') headerInfoVeryHigh(name_,desg,loca_,zone_) for x in range(1,6): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[0]][x], xy=(-90,400-x*20), xycoords='axes points',fontsize=14) for x in range(1,5): plt.annotate(str(x)+") "+Mitigation[Mitigation_headings[3]][x], xy=(-90, 260-x*20), xycoords='axes points',fontsize=14) pdf.savefig(edgecolor=border.get_edgecolor()) # plt.close() plt.figure(figsize=(15,10)) border = plt.figure(figsize=(15,10), linewidth=10, edgecolor="#8B4513") plt.annotate("HAZARD REPORT FORM", xy=(490,570), xycoords='axes points',fontsize=27, weight="bold") plt.axis('off') pdf.savefig(edgecolor=border.get_edgecolor()) #opening the file again to add an image at the top doc = fitz.open(name_ + '.pdf') rect1= fitz.Rect(-500,30,500,650) rect2 = fitz.Rect(500,100,1100,580) page = doc.loadPage(1) page.insertImage(rect1, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\emergency.jpg") page.insertImage(rect2, filename="D:\\Ken related\\HazCom_students\\Project\\interface\\annotation.jpg") doc.saveIncr()