def convertAnnotatedPDF(fname, refNrPath, origPDF): """ Converts a PDF and it's annotations into one PDF. """ #tempdir is where I will save in between files try: os.mkdir("tempDir") except: pass print(fname + " is being exported.") # get info on origin pdf input1 = PdfFileReader(open(origPDF, "rb")) npages = input1.getNumPages() pdfsize = input1.getPage(0).mediaBox pdfx = int(pdfsize[2]) pdfy = int(pdfsize[3]) # rM will not create a file when the page is empty so this is a # placeholde empty file to use. rm2svg(emptyRm, "tempDir/emptyrm.svg", coloured_annotations=True, x_width=pdfx, y_width=pdfy) # find what the page hashes are content = json.loads(open(refNrPath + ".content").read()) # convert all pages pdflist = [] for pg, pg_hash in enumerate(content['pages']): # print(pg) rmpath = refNrPath + "/" + pg_hash + ".rm" if os.path.isfile(rmpath): rm2svg(rmpath, "tempDir/temprm" + str(pg) + ".svg", coloured_annotations=False, x_width=pdfx, y_width=pdfy) svg_path = "tempDir/temprm" + str(pg) + ".svg" else: svg_path = "tempDir/emptyrm.svg" convertSvg2PdfCmd = "".join([ "rsvg-convert -f pdf -o ", "tempDir/temppdf" + str(pg), ".pdf ", svg_path ]) os.system(convertSvg2PdfCmd) pdflist.append("tempDir/temppdf" + str(pg) + ".pdf") # merge the annotated pages merged_rm = "tempDir/merged_rm.pdf" os.system("convert " + (" ").join(pdflist) + " " + merged_rm) # stamp extracted annotations onto original with pdftk stampCmd = "".join([ "pdftk ", origPDF, " multistamp ", merged_rm, " output ", origPDF[:-4], "_annot.pdf" ]) os.system(stampCmd) # Remove temporary files shutil.rmtree("tempDir", ignore_errors=False, onerror=None) return True
def convertNotebook(fname, refNrPath): """ Converts Notebook to a PDF by taking the annotations and the template background for that notebook. """ #tempdir is where I will save in between files try: os.mkdir('tempDir') except: pass # get list of backgrounds with open(refNrPath + ".pagedata") as file: backgrounds = [line.strip() for line in file] bg_pg = 0 bglist = [] for bg in backgrounds: convertSvg2PdfCmd = "".join(["rsvg-convert -f pdf -o ", "tempDir/bg_"\ + str(bg_pg) + ".pdf ", str(bgPath)\ + bg.replace(" ", "\ ") + ".svg"]) os.system(convertSvg2PdfCmd) bglist.append("tempDir/bg_" + str(bg_pg) + ".pdf ") bg_pg += 1 merged_bg = "tempDir/merged_bg.pdf" os.system("convert " + (" ").join(bglist) + " " + merged_bg) # get info from the pdf we just made input1 = PdfFileReader(open(merged_bg, 'rb')) pdfsize = input1.getPage(0).mediaBox # find out the page hashes content = json.loads(open(refNrPath + ".content").read()) # Now convert all Pages pdflist = [] for pg, pg_hash in enumerate(content['pages']): rmpath = refNrPath + "/" + pg_hash + ".rm" # skip page if it doesnt extist anymore. This is fine in notebooks # because nobody cares about the rM numbering. try: rm2svg(rmpath, "tempDir/temprm" + str(pg) + ".svg", coloured_annotations=True) convertSvg2PdfCmd = \ "".join(["rsvg-convert -f pdf -o ", "tempDir/temppdf" + \ str(pg), ".pdf ", "tempDir/temprm" + str(pg) + ".svg"]) os.system(convertSvg2PdfCmd) pdflist.append("tempDir/temppdf" + str(pg) + ".pdf") except FileNotFoundError: continue # merge all annotation pages merged_rm = "tempDir/merged_rm.pdf" os.system("convert " + (" ").join(pdflist) + " " + merged_rm) # combine with background stampCmd = "".join(["pdftk ", merged_bg, " multistamp ", merged_rm, \ " output " + syncDirectory + "/Notes/" + fname + ".pdf"]) os.system(stampCmd) # Delete temp directory shutil.rmtree("tempDir", ignore_errors=False, onerror=None) return True
def convertAnnotatedPDF(fname, refNrPath, origPDF, verbose=False): """ Converts a PDF and it's annotations into one PDF. """ #tempdir is where I will save in between files try: os.mkdir("tempDir") except: pass print(fname + " is being exported.") # get info on origin pdf try: input1 = pikepdf.Pdf.open(origPDF) #input1 = PdfFileReader(open(origPDF, "rb")) except: warnings.warn("could not read " + origPDF) return False npages = len(input1.pages) pdfsize = input1.pages[0].trimbox pdfx = int(pdfsize[2]) pdfy = int(pdfsize[3]) if verbose: print(f"the pdf has size {pdfx}x{pdfy}") # rM will not create a file when the page is empty so this is a # placeholde empty file to use. rm2svg(emptyRm, "tempDir/emptyrm.svg", coloured_annotations=True) ratio_rm = 1872 / 1404 ratio_pdf = pdfy / pdfx # rotate landscape pdfs landscape = False if ratio_pdf < 1: if verbose: print(f"its landscape format {ratio_pdf}") landscape = True if ratio_pdf != ratio_rm: pdf_obj = resize_pages(origPDF, landscape, verbose=verbose) pdf_obj.save("tempdir/resized.pdf") # saves as "resizedPDF" resizedPDF = "tempdir/resized.pdf" #warnings.warn("The PDF you are annotating has an unexpected size. Annotations may be misaligned.") # find what the page hashes are content = json.loads(open(refNrPath + ".content").read()) # convert all pages pdflist = [] for pg, pg_hash in enumerate(content['pages']): if verbose: print(f"converting page {pg}") rmpath = refNrPath + "/" + pg_hash + ".rm" if os.path.isfile(rmpath): rm2svg(rmpath, "tempDir/temprm" + str(pg) + ".svg", coloured_annotations=False) svg_path = "tempDir/temprm" + str(pg) + ".svg" else: svg_path = "tempDir/emptyrm.svg" convertSvg2PdfCmd = "".join([ "rsvg-convert -f pdf -a -o ", "tempDir/temppdf" + str(pg), ".pdf ", svg_path ]) os.system(convertSvg2PdfCmd) pdflist.append("tempDir/temppdf" + str(pg) + ".pdf") # merging at high quality gets sooo slow, so we decrease quality for big files density_pdf = 200 quality_pdf = 80 if pg > 80: density_pdf = 100 quality_pdf = 50 # merge the annotated pages merged_rm = "tempDir/merged_rm.pdf" mergeCmd = f"convert -density {density_pdf}x{density_pdf} -quality {quality_pdf} " + ( " ").join(pdflist) + " " + merged_rm if verbose: print(f"Doing Merge: {mergeCmd}") #print(mergeCmd) os.system(mergeCmd) # stamp extracted annotations onto original with pdftk stampCmd = "".join([ "pdftk ", resizedPDF, " multistamp ", merged_rm, " output ", origPDF[:-4], "_annot.pdf" ]) if verbose: print(f"Doing Stamp: {stampCmd}") os.system(stampCmd) # Remove temporary files shutil.rmtree("tempDir", ignore_errors=False, onerror=None) return True
def convertAnnotatedPDF(fname, refNrPath, origPDF): """ Converts a PDF and it's annotations into one PDF. """ #tempdir is where I will save in between files try: os.mkdir("tempDir") except: pass print(fname+" is being exported.") # get info on origin pdf try: input1 = PdfFileReader(open(origPDF, "rb")) except: warnings.warn("could not read " + origPDF) return False npages = input1.getNumPages() pdfsize = input1.getPage(0).mediaBox pdfx = int(pdfsize[2]) pdfy = int(pdfsize[3]) # rM will not create a file when the page is empty so this is a # placeholde empty file to use. rm2svg(emptyRm, "tempDir/emptyrm.svg", coloured_annotations=True) ratio_rm = 1872 / 1404 ratio_pdf = pdfy / pdfx # rotate landscape pdfs landscape = False if ratio_pdf < 1: landscape = True os.system("pdftk " + origPDF + " cat 1-endwest output tempdir/rotated.pdf") pdfx = int(pdfsize[3]) pdfy = int(pdfsize[2]) ratio_pdf = pdfy / pdfx resizedPDF = "tempdir/rotated.pdf" else: resizedPDF = origPDF if ratio_pdf < ratio_rm: keep_side = "x" change_side = "y" else: keep_side = "y" change_side = "x" size = {"x" : pdfx, "y" : pdfy} keep_side_is = size[keep_side] change_side_is = size[change_side] change_side_should = keep_side_is * ratio_rm if keep_side_is < change_side_is else keep_side_is / ratio_rm yoffset = change_side_should - change_side_is if change_side == "y" else 0 #print(size, ratio_rm, ratio_pdf, keep_side, change_side, keep_side_is, change_side_is, change_side_should, yoffset) output_size = [keep_side_is, change_side_should] if change_side == "y" else [change_side_should, keep_side_is] rszCmd = "gs -o tempDir/resized_org.pdf -sDEVICE=pdfwrite -g"+str(int(output_size[0]))+"0x"+str(int(output_size[1]))+"0 -c '<</PageOffset [0 " + str(int(yoffset)) + "]>> setpagedevice' -f " + resizedPDF #print(rszCmd) os.system(rszCmd) if landscape: os.system("pdftk tempDir/resized_org.pdf cat 1-endnorth output tempdir/resized_org2.pdf") resizedPDF = "tempDir/resized_org2.pdf" else: resizedPDF = "tempDir/resized_org.pdf" # find what the page hashes are content = json.loads(open(refNrPath + ".content").read()) # convert all pages pdflist = [] for pg, pg_hash in enumerate(content['pages']): # print(pg) rmpath = refNrPath + "/" + pg_hash + ".rm" if os.path.isfile(rmpath): rm2svg(rmpath, "tempDir/temprm" + str(pg) + ".svg", coloured_annotations=False) svg_path = "tempDir/temprm" + str(pg) + ".svg" else: svg_path = "tempDir/emptyrm.svg" convertSvg2PdfCmd = "".join(["rsvg-convert -f pdf -a -o ", "tempDir/temppdf" + str(pg), ".pdf ", svg_path]) os.system(convertSvg2PdfCmd) pdflist.append("tempDir/temppdf"+str(pg)+".pdf") # merge the annotated pages merged_rm = "tempDir/merged_rm.pdf" mergeCmd = "convert -density 200x200 -quality 80 " + (" ").join(pdflist) + " " + merged_rm #print(mergeCmd) os.system(mergeCmd) # stamp extracted annotations onto original with pdftk stampCmd = "".join(["pdftk ", resizedPDF, " multistamp ", merged_rm, " output ", origPDF[:-4], "_annot.pdf"]) os.system(stampCmd) # Remove temporary files shutil.rmtree("tempDir", ignore_errors=False, onerror=None) return True