Beispiel #1
0
    def write_with_template(self,out_file="",template_filename=""):
        """
        テンプレとマージしてそのまま出力しちゃう
        """
        # リーダーでテンプレ読む
        template_filename = template_filename or self.template_filename
        reader = PdfFileReader(template_filename)

        # なんかページ処理
        page = reader.getPage(0)
        page_width = page.mediaBox.getWidth()
        page_height = page.mediaBox.getHeight()

        # バッファからPDFデータ化
        self.buffer.seek(0) # シークして読み込みできるように
        new_pdf = PdfFileReader(self.buffer) # 読み込み

        # テンプレート・ページと内容をマージ
        page.mergePage(new_pdf.getPage(0))

        # writerに渡す
        writer = PdfFileWriter()
        writer.addPage(page)

        # 書き込む
        with open(out_file, 'wb') as f:
            writer.write(f)
Beispiel #2
0
    def domerge(self):
        """
        Main merge method. It will merge all the pdfs in the input directory.
        :return:
        """
        print "Getting all pdf files in the folder"
        self.get_file_list()
        print "Going to start merge"
        pdfmerger = PdfFileMerger()
        writer = PdfFileWriter()
        outputStream = file(self.output, "wb")
  
        for filename in self.files:
            print "Going to merge %s"%filename
            reader = PdfFileReader(file(filename, 'rb'))
            page_num = 0
            for page in reader.pages:
#                 print page.artBox
#                 print page.bleedBox
#                 print page.cropBox
#                 print page.trimBox
                self.add_footer(page,filename,page_num)
                page_num +=1
                writer.addPage(page)
        writer.write(outputStream)
        outputStream.close()              
        print "Done merging"
Beispiel #3
0
def combine_pdf_pages(pdfpath, pagesgroups, verbose=False):
    """
    Combines vertically groups of pages of a pdf file

    @type pdfpath: str or unicode
    @type pagesgroups: list of (list of int)
    """
    # opening input file
    if verbose:
        print(("Opening file " + pdfpath))
    fi = open(pdfpath, 'rb')
    pdf = PdfFileReader(fi)

    # opening output pdf
    pdfout = PdfFileWriter()

    # loop on groups of pages tom combine
    for pagesgroup in pagesgroups:
        if verbose:
            print(("Combining pages:", ))

        # heights and widths
        heights = [pdf.pages[i].mediaBox.getHeight() for i in pagesgroup]
        widths = [pdf.pages[i].mediaBox.getWidth() for i in pagesgroup]

        # adding new blank page
        page_out = pdfout.addBlankPage(width=max(widths), height=sum(heights))
        # merging pages of group
        for i, p in enumerate(pagesgroup):
            if verbose:
                print((p, ))
            page_out.mergeTranslatedPage(pdf.pages[p],
                                         tx=0,
                                         ty=sum(heights[i + 1:]))
        print()

    # exporting merged pdf into temporary output file
    fo = create_tmpfile('wb')
    if verbose:
        print(("Exporting merged pdf in file {}".format(fo.name)))
    pdfout.write(fo)

    # closing files
    fi.close()
    fo.close()

    # removing original file and replacing it with merged pdf
    if verbose:
        print(("Moving exported pdf to: " + pdfpath))
    os.remove(pdfpath)
    os.rename(fo.name, pdfpath)
Beispiel #4
0
 def merge(self, pdf_one, pdf_two, filename='my.pdf', output_dir='D:/pdf/'):
     '''
     function:#pdfone为扫描的正面;#pdftwo为扫描的背面;#本函数实现将两个扫描文件按原有的顺序合并起来
     :param pdf_one:
     :param pdf_two:
     :param filename:
     :param output_dir:
     :return:
     '''
     input_one = open(pdf_one, 'rb')
     input_two = open(pdf_two, 'rb')
     pdf_input_one = PdfFileReader(input_one)
     pdf_input_two = PdfFileReader(input_two)
     numOne = pdf_input_one.getNumPages()
     numTwo = pdf_input_two.getNumPages()
     print(numOne, numTwo)
     pdf_output = PdfFileWriter()
     index_one = 0
     index_two = numTwo - 1
     while True:
         if index_one == numOne: break
         print(index_one, index_two)
         page1 = pdf_input_one.getPage(index_one)
         pdf_output.addPage(page1)
         page2 = pdf_input_two.getPage(index_two)
         pdf_output.addPage(page2)
         index_one += 1
         index_two -= 1
     pdf_name = output_dir + filename
     output_stream = open(pdf_name, 'wb')
     pdf_output.write(output_stream)
     output_stream.close()
     input_one.close()
     input_two.close()
     print('Done!')
Beispiel #5
0
    def finalize_print_preparation(self):
        """Take the resulting multi page PDF and split into rotated single pages

        Taken from `pythonlibrary.org
        <https://www.blog.pythonlibrary.org/2018/04/11/splitting-and-merging-pdfs
        -with-python/>`_ in combination with `johndcook.com
        <https://www.johndcook.com/blog/2015/05/01/rotating-pdf-pages-with-python/>`_
        """

        pdf: PdfFileReader = PdfFileReader(self._full_output_path_)
        for page_number in range(pdf.getNumPages()):
            pdf_writer: PdfFileWriter = PdfFileWriter()
            page: PageObject = pdf.getPage(page_number)
            page.rotateCounterClockwise(90)
            pdf_writer.addPage(page)
            output_filename: str = (f"{self._output_base_filename}_page_"
                                    f"{str(page_number + 1).zfill(2)}.pdf")

            with open(
                    os.path.join(self._output_directory_name, output_filename),
                    "wb") as pdf_out:
                pdf_writer.write(pdf_out)

        path_to_pdf = os.path.join(os.getcwd(), self._full_output_path_)
        print(f"Create {pdf.getNumPages()} single paged PDFs.\n\n"
              f"You can find them concatenated at file://"
              f"{path_to_pdf}")
Beispiel #6
0
class ReadPdf(object):

    def __init__(self,in_file):
        self.pdf_read = PdfFileReader(open(in_file, 'rb'))
        self.pdf_write = PdfFileWriter()

    def parse(self,out_file,type):
        for page in self.pdf_read.pages:

            if type=='01':
                # 电测听 剪切方案
                pass
                # page.mediaBox.setUpperLeft((0,606))
                # page.mediaBox.setUpperRight((595,606))
                # page.mediaBox.setLowerLeft((0,0))
                # page.mediaBox.setLowerRight((595,0))
            elif type=='02':
                # 人体成分(投放) 剪切方案
                page.mediaBox.setUpperLeft((0, 765))
                page.mediaBox.setUpperRight((595, 765))
                page.mediaBox.setLowerLeft((0, 22))
                page.mediaBox.setLowerRight((595, 22))
            elif type=='03':
                pass

            elif type=='04':
                # 骨密度 剪切方案
                page.mediaBox.setUpperLeft((0, 860))
                page.mediaBox.setUpperRight((595, 860))
                page.mediaBox.setLowerLeft((0, 80))
                page.mediaBox.setLowerRight((595, 80))

            elif type=='05':
                # #超声骨密度 剪切方案
                page.mediaBox.setUpperLeft((0, 842))
                page.mediaBox.setUpperRight((595, 842))
                page.mediaBox.setLowerLeft((0, 35))
                page.mediaBox.setLowerRight((595, 35))

            else:
                pass

            self.pdf_write.addPage(page)

        ous = open(out_file, 'wb')
        self.pdf_write.write(ous)
        ous.close()
Beispiel #7
0
def merge_page_nums(pages: List[PageObject], options, filename='page_nums.pdf'):
    output = []
    path = os.path.join(options["folder-dir"], "tmp", filename)
    with open(path, 'rb') as f:
        page_num_pdf = PdfFileReader(f)
        for i, page in enumerate(pages):
            target: PageObject = page_num_pdf.getPage(i)
            target.mergePage(page)
            # For some reason the text doesn't appear properly if we don't write first
            thread_print("Writing extra output file because this is somehow necessary")
            tmp_out = PdfFileWriter()
            tmp_out.addPage(target)
            with open(os.path.join(options["folder-dir"], "tmp", "page_num_overlap.pdf"), 'wb') as f:
                pass#tmp_out.write(f)
            output.append(target)

    return output
Beispiel #8
0
def do_highlight(filename,keywords,output_file):
     locations = calculate_locations(filename,keywords)
     reader = PdfFileReader(file(filename, 'rb'))
     lnk = DictionaryObject()
     writer = PdfFileWriter()
     num = 0
     for page in reader.pages:
         for l in locations :
            if l.page_num == num :
                annot1 = highlight_annotation([l.bounds],
                            'Comments', 
                            'Author', 'Comments.')   
                popup_ref = writer._addObject(annot1)
                print l.page_num
                if "/Annots" in page:
                    page['/Annots'].append(popup_ref)
                    annots = page['/Annots']
                else:
                    page[NameObject('/Annots')] = ArrayObject([popup_ref])
                    annots = page['/Annots']
                annots_ref = writer._addObject(annots)
         num+=1
         writer.addPage(page) 
        # finally, write "output" to document-output.pdf
     outputStream = file(output_file, "wb")
     writer.write(outputStream) 
     outputStream.close()
                
Beispiel #9
0
def merge_pdfs(origin, num_pages, aux, verso=None, above=False, allPage=False):
    """
    this is a general purpose merging function, it helps in various plugins in order to
    not redo the wheel. It merges origin as the back, aux above.
    """
    try:
        output = PdfFileWriter()
        input_result = PdfFileReader(io.BytesIO(origin))
        pages = []

        for i in range(0, num_pages):
            page_origin = input_result.getPage(i)

            if allPage or i % 2 == 0:
                page_aux = PdfFileReader(io.BytesIO(aux)).getPage(0)
                pages.append(tasks.merge.delay(page_origin, page_aux, above))
            else:
                _merge_verso(verso, page_origin, above, pages)

        for page in pages :
            if type(page) == PyPDF2.pdf.PageObject:
                output.addPage(page)
            else :
                #request celery result
                data = page.get()
                output.addPage(data)

        out_io = io.BytesIO()
        output.write(out_io)
        out_io.seek(0, 0)
        return out_io.read()

    except Exception:
        labresult.app.logger.error(traceback.format_exc())
        raise MergePDFException('Error while merging PDFs')
Beispiel #10
0
def addBlankpage(inFile, outFile):
    '''
    pdf读取写入操作
    '''
    pdfFileWriter = PdfFileWriter()

    # 获取 PdfFileReader 对象
    pdfFileReader = PdfFileReader(
        inFile)  # 或者这个方式:pdfFileReader = PdfFileReader(open(readFile, 'rb'))
    numPages = pdfFileReader.getNumPages()

    for index in range(0, numPages):
        pageObj = pdfFileReader.getPage(index)
        pdfFileWriter.addPage(pageObj)  # 根据每页返回的 PageObject,写入到文件
        pdfFileWriter.write(open(outFile, 'wb'))

    pdfFileWriter.addBlankPage()  # 在文件的最后一页写入一个空白页,保存至文件中
    pdfFileWriter.write(open(outFile, 'wb'))
Beispiel #11
0
def PdfMultiplePassword(filepaths, password):
    # Check if files exists
    check_path = [os.path.isfile(x) for x in filepaths]

    # Gets the files extension
    file_extensions = [os.path.splitext(x)[1] for x in filepaths]

    # Check if files extension are pdf
    file_extensions_check = [x for x in file_extensions if x != ".pdf"]

    if False in check_path:

        # Get the index of the file that doesn't exists
        index = check_path.index(False)
        print(f"File Doesn't Exists: {filepaths[index]}")
        sys.exit()

    else:
        # Not a PDF file is given
        if file_extensions_check:
            print("Submit Only PDF Files")
            sys.exit()

        else:
            count = 1
            # Iterate through every pdf of the filepaths
            for path in filepaths:

                # Create a PdfFileWriter object
                pdf_writer = PdfFileWriter()

                # Open our PDF file with the PdfFileReader
                pdf_reader = PdfFileReader(path)

                # Get the page at index idx
                for page in range(pdf_reader.getNumPages()):
                    # Add each page to the writer object
                    pdf_writer.addPage(pdf_reader.getPage(page))

                # The output filename
                output_file = f"merge_enc_{count}_{ts}.pdf"

                # Encrypt the new file with the entered password
                pdf_writer.encrypt(password, use_128bit=True)

                # Write out the merged PDF
                with open(output_file, 'wb') as file:
                    pdf_writer.write(file)

                count += 1
                print('File Written To Path:', output_file)
Beispiel #12
0
def PdfPassword(filepath, password):
    # Check if file exists
    checkFile = os.path.isfile(filepath)

    if checkFile:
        # Get the path of directory and filename
        path, filename = os.path.split(filepath)

        # Get the file extension to check for pdf files
        file_extension = os.path.splitext(filepath)[1]

        if file_extension == ".pdf":

            # The output filename
            output_file = os.path.join(path, f"temp_{ts}_{filename}")

            # Create a PdfFileWriter object
            pdf_writer = PdfFileWriter()

            # Open our PDF file with the PdfFileReader
            file = PdfFileReader(filepath)

            # Get number of pages in original file
            # Iterate through every page of the original file and add it to our new file
            for idx in range(file.numPages):
                # Get the page at index idx
                page = file.getPage(idx)

                # Add it to the output file
                pdf_writer.addPage(page)

            # Encrypt the new file with the entered password
            pdf_writer.encrypt(password, use_128bit=True)

            # Open a new file
            with open(output_file, "wb") as file:
                # Write our encrypted PDF to this file
                pdf_writer.write(file)

            print('File Written To Path:', output_file)

        else:
            # File extension is not PDF
            print(
                f"Not A PDF File Given, File Has Extension: {file_extension}")
            sys.exit()

    else:
        # No file exists on the current path
        print("Check The File Path")
        sys.exit()
def RemovePdfOwnerPassword(inputname, outputname):
    '''
    '''
    inputfile = open(inputname, 'rb')
    wrt = PdfFileWriter()
    ipt = PdfFileReader(inputfile)
    try:
        ipt.decrypt("")
    except KeyError as e:
        if e.message == '/Encrypt':
            print("%s is not an encrypted pdf" % inputname)
            return -1
        else:
            raise e
    print(ipt.getDocumentInfo())
    size = ipt.getNumPages()
    i = 0
    while i < size:
        page = ipt.getPage(i)
        #print(page.extractText())
        wrt.addPage(page)
        i = i + 1
    fl = open(outputname, "wb")
    wrt.write(fl)

    inputfile.close()
    fl.close()
    return 0
Beispiel #14
0
    def run(self):
        if self.beforeHandler(self._id, self.attachUrl):
            return
        filename = self.tempDir + str(random.random())
        filename1 = self.tempDir + str(random.random()) + '.pdf'
        try:
            urllib.request.urlretrieve(self.attachUrl, filename)
            input_stream = open(filename, 'rb')
            pdf_input = PdfFileReader(input_stream)
            pdf_output = PdfFileWriter()

            page = 0
            pages = pdf_input.getNumPages() - 1
            # remove last page
            while page < pages:
                pdf_output.addPage(pdf_input.getPage(page))
                page += 1

            output_stream = open(filename1, 'wb')
            pdf_output.write(output_stream)
            output_stream.close()
            input_stream.close()
            if self.success is not None:
                self.success(self._id, filename1)
        except Exception as e:
            if self.error is not None:
                self.error(e, self.attachUrl)
        finally:
            if os.path.exists(filename):
                os.remove(filename)
            if os.path.exists(filename1):
                os.remove(filename1)
def generate_a_pdf(filename, num_pages, dir=None):
    """function to generate a random PDF file of N pages with single image per page

    taken from https://stackoverflow.com/questions/2925484/place-image-over-pdf

    Args:
        filename (str): path to save the pdf file
        num_pages (int): number of pages to make the pdf file

    KWArgs:
        dir (str): the path to the directory to save the pdf file

    Returns:
        str. path to the new pdf file
    """
    pdf = PdfFileWriter()
    for num in range(1, num_pages+1):
        imgTemp = BytesIO()
        jpeg_path = make_a_jpeg('{}.jpeg'.format(str(num)), pick_a_color(num)) 
        imgDoc = canvas.Canvas(imgTemp, pagesize=A4)
        imgDoc.drawImage(jpeg_path, 25, 45)
        imgDoc.save()
        pdf.addPage(PdfFileReader(BytesIO(imgTemp.getvalue())).getPage(0))
        remove(jpeg_path)
    if dir:
        path = join(dir, filename)
    else:
        path = join(getcwd(), filename)
    pdf.write(open(path, 'wb'))
    return path
Beispiel #16
0
	def __getPdfTxtAt(self,pageNum,bENHANCE):
		# print('---->>>>>'+str(pageNum))
		try:
			RESOLUTION = 250
			tempoutPdfName = 'temp.pdf'
			tempoutPdfNameWithAbsPath = os.path.join(self.__RootPath,tempoutPdfName)
			if os.path.exists(tempoutPdfNameWithAbsPath):
				os.remove(tempoutPdfNameWithAbsPath)
			pdfWriter = PdfFileWriter()     #生成一个空白的pdf文件
			pdfWriter.addPage(self.pdfReader.getPage(pageNum))
			with open(tempoutPdfNameWithAbsPath,'wb') as pdfOutput:
				pdfWriter.write(pdfOutput)                           #将复制的内容全部写入合并的pdf
			try:
				with Image(filename=tempoutPdfNameWithAbsPath,resolution=RESOLUTION) as image_pdf:
					image_jpeg = image_pdf.convert('jpeg')
			except Exception as e:
				raise e
				raise(r'Image(filename=tempoutPdfNameWithAbsPath,resolution=RESOLUTION) occurs error!' )
				quit = input("按任意键退出...")
				sys.exit(1)
			try:
				# img_page = Image(image=image_jpeg)
				req_image = image_jpeg.make_blob('jpeg')
			except Exception as e:
				raise e
				print('make_blob or ERROR!   '+ str(pageNum)+' 页失败!')
				quit = input("按任意键退出...")
				sys.exit(1)
			try:
				image_filtered = PI.open(io.BytesIO(req_image))
				# image_filtered= image_filtered.filter(ImageFilter.GaussianBlur(radius=1))
				# if bENHANCE:
				# 	image_filtered= image_filtered.filter(ImageFilter.EDGE_ENHANCE)
			except Exception as e:
				raise e
				print('PI.open ERROR!   '+ str(pageNum)+' 页失败!')
				quit = input("按任意键退出...")
				sys.exit(1)
			try:
				# print('>>> Debug:'+self.__lang)
				txt = self.__tool.image_to_string(
					image_filtered,
					lang=self.__lang,
					builder=pyocr.builders.TextBuilder()
				)
			except Exception as e:
				raise e
				print('image_to_string   '+ str(pageNum)+' 页失败!')
				quit = input("按任意键退出...")
				sys.exit(1)
			if os.path.exists(tempoutPdfNameWithAbsPath):
				os.remove(tempoutPdfNameWithAbsPath)
			return txt
		except Exception as e:
			raise e
			print('获取第 '+ str(pageNum)+' 页失败!')
			quit = input("按任意键退出...")
			sys.exit(1)
Beispiel #17
0
def merge_pdf(file_list, output_path):
    '''合并 PDF'''
    outpdf = PdfFileWriter()
    for f in file_list:
        f_pdf = PdfFileReader(open(f, 'rb'))
        for page in f_pdf.pages:
            outpdf.addPage(page)
    ous = open(output_path, 'wb')
    outpdf.write(ous)
    ous.close()
Beispiel #18
0
 def _create_hyperlinks(self, link_locations, page_locations):
     reader = PdfFileReader("tmp2.pdf")
     writer = PdfFileWriter()
     for i in range(reader.getNumPages()):
         page = reader.getPage(i)
         writer.addPage(page)
     for i in range(len(link_locations)):
         toc_page = 1
         if self.toc_orientation == "P":
             toc_page = math.floor(i / settings["Items on vertical toc"])
         if self.toc_orientation == "L":
             toc_page = math.floor(i / settings["Items on horizontal toc"])
         writer.addLink(pagenum=toc_page,
                        pagedest=page_locations[i] - 1,
                        rect=link_locations[i],
                        fit="/Fit",
                        border=[0, 0, 0])
     with open(self.filename, 'wb') as out:
         writer.write(out)
Beispiel #19
0
def pdfSplit(pdf_main, pdf_part):
    try:
        pdf_read_obj = PdfFileReader(pdf_main)
        pdf_write_obj = PdfFileWriter()
        page_num = pdf_read_obj.getNumPages()
        page_last_obj = pdf_read_obj.getPage(page_num - 1)
        page_last_obj.rotateClockwise(90)
        pdf_write_obj.addPage(page_last_obj)
        pdf_write_obj.write(open(pdf_part, 'wb'))
        return page_num - 1
    except Exception as e:
        return False
Beispiel #20
0
def add_number(p, n, x, y, counter=1):
    base_pdf = copy.copy(p.getPage(0))
    wm_pdf = PdfFileReader(io.BytesIO(n)).getPage(0)

    pdf_writer = PdfFileWriter()
    base_pdf.mergeTranslatedPage(wm_pdf, x, y)
    pdf_writer.addPage(base_pdf)

    saveloc = Path.cwd().joinpath("numbering", "assets",
                                  "numbered", f"{counter}.pdf")
    with open(saveloc, "wb") as outfile:
        pdf_writer.write(outfile)
    return
Beispiel #21
0
def getTitlePDFfromBookmarkfile(pdf_filepath, bookmark_filepath,
                                pdf_filepath_output):

    bookmark_file = codecs.open(bookmark_filepath, 'r', encoding='utf-8')
    lines = bookmark_file.readlines()
    page_start = 0
    for i, line in enumerate(lines):
        # print(line)
        if line.find(u'目录') >= 0:
            line = line.strip()
            print(line)
            print(line.split('\t'))
            page_start = int(line.split('\t')[1])
    page_start -= 1
    print(page_start)
    page_end = 0
    page_list = []
    for i, line in enumerate(lines):
        line = line.strip()
        # print(line)
        if line.find('\t') >= 0:
            # print(int(line.rsplit('\t',1)[1]))
            page_list.append(int(line.rsplit('\t', 1)[1]))
    # page_list=page_list.sort()
    # print(page_list)
    for i in range(0, len(page_list)):
        if page_list[i] > page_start:
            page_end = page_list[i]
            break
        page_end -= 1
    print(page_end)
    if page_end <= page_start and page_start >= 0 and page_end > 0:
        print('not find title page')
        return
    pdf = PdfFileReader(open(pdf_filepath, "rb"))

    output = PdfFileWriter()
    for i in range(page_start, page_end + 1):
        output.addPage(pdf.getPage(i))

        # dst_pdf.addPage(pdf.getPage(i))

        # pdf_bytes = io.BytesIO()
        # output.write(pdf_bytes)
        # pdf_bytes.seek(0)
        # img = Image(file=pdf_bytes, resolution=300)
        # img.convert("png")
        # img.save(pdf_filepath_output+'_out.tif')
    stream = open(pdf_filepath_output, 'wb')
    output.write(stream)
    def process_pdf_automatically(self):
        self.statusBar().showMessage('Procesando...')
        # print("File Name:", self.name)
        if self.name != "":
            self.dir = QFileDialog.getExistingDirectory()
            ls = []
            files = [x for x in os.listdir(self.dir + '/') if
                     x.endswith('.pdf') and x != "join.pdf"]
            outfile = PdfFileWriter()

            bancos = ['bbva', 'santander']
            for i in files:
                pdf = PdfFileReader(open(self.dir + '/' + str(i), 'rb'))
                page = pdf.getPage(0)
                pages = pdf.getNumPages()
                last = pdf.getPage(pages - 1)
                text = last.extractText()
                banco = re.findall("(bbva|santander)", text.lower())
                text = page.extractText()
                fecha = \
                re.findall("(corte.*[0-9]{1,2}[/][0-9]{1,2}[/][0-9]{2,4})",
                           text.lower())[0]
                fecha = \
                re.findall("([0-9]{1,2}[/][0-9]{1,2}[/][0-9]{2,4})", fecha)[0]
                ls.append({'page': page,
                           'bank': Counter(banco).most_common()[0][0].upper(),
                           'date': fecha})

            fecha = []
            for i in ls:
                fecha.append(i['date'])
            fecha.sort(key=lambda date: datetime.strptime(date, '%d/%m/%Y'))

            for i in fecha:
                for x in ls:
                    if (x['date'] == i):
                        outfile.addPage(x['page'])

            self.statusBar().showMessage('Creando PDF...')

            save_in = self.dir + '/' + self.name + '.pdf'

            with open(save_in, 'wb') as f:
                outfile.write(f)

            self.statusBar().showMessage('Creación del PDF Exitosa')
            self.show_dialog("Acción realizada con éxito")
        else:
            self.show_dialog("No fue posible crear el archivo PDF")
            self.statusBar().showMessage('')
Beispiel #23
0
def pdf2cut(pdf_in, pdf_out, axis_x, axis_y, width, height):
    pdf_read_obj = PdfFileReader(open(pdf_in, 'rb'))
    pdf_write_obj = PdfFileWriter()
    for page in pdf_read_obj.pages:
        page.mediaBox.setUpperLeft((axis_x, axis_y))
        page.mediaBox.setUpperRight((width, height))
        page.mediaBox.setLowerLeft((axis_x, axis_y - height))
        page.mediaBox.setLowerRight((axis_x + width, axis_y - height))
        pdf_write_obj.addPage(page)

    with open(pdf_out, 'wb') as f:
        pdf_write_obj.write(f)
        f.close()
    return pdf_out
Beispiel #24
0
    def _merge_pdfs(self, in_dir, out_file):
        """Merges PDFs in a given directory and outputs it to a single PDF file.
        If `same_page_number` is set to true in the config file, all tests will have `max_pages` number of pages.
        
        Parameters:
            in_dir (str): Path to the input directory containing the PDF files to be merged.
            out_file (str): Path to the merged PDF.
        """
        pw = PdfFileWriter()

        firstPDF = True
        for f in sorted(listdir(in_dir)):
            if isfile(join(in_dir, f)) and regex.match(
                    '^test.*\.pdf$', f, flags=regex.IGNORECASE):
                pr = PdfFileReader(join(in_dir, f), strict=False)

                form = pr.trailer["/Root"][
                    "/AcroForm"]  # see: https://stackoverflow.com/questions/47288578/pdf-form-filled-with-pypdf2-does-not-show-in-print

                pw.appendPagesFromReader(pr)
                if self.config['same_page_number'] and pr.getNumPages(
                ) < self.config['max_pages']:
                    for i in range(self.config['max_pages'] -
                                   pr.getNumPages()):  # pylint: disable=unused-variable
                        pw.addBlankPage()

                if firstPDF:
                    pw._root_object.update({NameObject("/AcroForm"): form})
                    firstPDF = False
                else:
                    pw._root_object["/AcroForm"]["/Fields"].extend(
                        form["/Fields"])

        pw._root_object["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})

        f = codecs.open(out_file, 'wb')
        pw.write(f)
        f.close()
Beispiel #25
0
def split_pdf(inFile, outFile):
    '''
    拆分文档
    :param inFile:     输入文件
    :param outFile:    输出文件
    :return:
    '''
    pdfFileWriter = PdfFileWriter()
    pdfFileReader = PdfFileReader(open(inFile, 'rb'))
    page_count = pdfFileReader.getNumPages()
    print(page_count)
    # 将 pdf 第2页之后的页面,输出到一个新的文件
    for i in range(2, page_count):
        pdfFileWriter.addPage(pdfFileReader.getPage(i))
    pdfFileWriter.write(open(outFile, 'wb'))
Beispiel #26
0
    def createNewBooks(self, pdf_file, stPage, endPage, filename='my.pdf'):
        input = PdfFileReader(open(pdf_file, "rb"))
        if input.isEncrypted:  #注意:所有的pdf,pypdf2默认都是加密形式,所以要先解密再读取
            input = input.decrypt('')
        pdf_input = input
        pdf_output = PdfFileWriter()
        i = stPage
        while i < endPage:
            page = pdf_input.getPage(i)  # 选取需要页面,需要注意的是第一页的编号是0
            pdf_output.addPage(page)  # 将选好的页面加入到新的pdf中
            i += 1
        output_stream = open(filename, 'wb')
        pdf_output.write(output_stream)
        output_stream.close()

        return 'Complete knifing'
 def _removePropertyEndPage(self, file_pdf):
     '''移除资产明细表中的无用页'''
     fd_in = open(file_pdf, "rb")
     pdf_in = PdfFileReader(fd_in)
     page_num = pdf_in.getNumPages()
     pdf_out = PdfFileWriter()
     for num in range(page_num - 1):
         page = pdf_in.getPage(num)
         pdf_out.addPage(page)
     fd_out = open(file_pdf + 'tmp.pdf', "wb")
     pdf_out.write(fd_out)
     fd_in.close()
     fd_out.close()
     os.remove(file_pdf)
     os.rename(os.path.join('', file_pdf + 'tmp.pdf'),
               os.path.join('', file_pdf))
     print('   > 已把最后一页删除')
Beispiel #28
0
def pdf_cat(input_files, output_stream):
    """https://stackoverflow.com/questions/3444645/merge-pdf-files"""
    input_streams = []
    try:
        # First open all the files, then produce the output file, and
        # finally close the input files. This is necessary because
        # the data isn't read from the input files until the write
        # operation. Thanks to
        # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733
        for input_file in input_files:
            input_streams.append(input_file)
        writer = PdfFileWriter()
        for reader in map(PdfFileReader, input_streams):
            for n in range(reader.getNumPages()):
                writer.addPage(reader.getPage(n))
        writer.write(output_stream)
    finally:
        for f in input_streams:
            f.close()
Beispiel #29
0
def fetchANs(bols, client, workOrderLocation):
    pdfs = []
    doneBols = []
    failedPDFs = ""
    for bol in bols:
        if bol != "" and not bol in doneBols:
            doneBols.append(bol)
            tickets = client.ticket_search(Title="Delivery Order for BL# " +
                                           bol,
                                           From="@msc.com")
            pdf = getPdf(tickets)
            if pdf:
                pdfs.append(pdf)
                with open(workOrderLocation + "\\" + "DOs.pdf", 'wb') as f:
                    input_streams = []
                    try:
                        # First open all the files, then produce the output file, and
                        # finally close the input files. This is necessary because
                        # the data isn't read from the input files until the write
                        # operation. Thanks to
                        # https://stackoverflow.com/questions/6773631/problem-with-closing-python-pypdf-writing-getting-a-valueerror-i-o-operation/6773733#6773733
                        i = 0
                        for input_file in pdfs:
                            f1 = open(input_file, 'r+b')
                            input_streams.append(f1)
                            i += 1
                        writer = PdfFileWriter()
                        for reader in map(PdfFileReader, input_streams):
                            for n in range(reader.getNumPages()):
                                writer.addPage(reader.getPage(n))
                        writer.write(f)
                    finally:
                        for f in input_streams:
                            f.close()
            else:
                failedPDFs = "\n" + bol + failedPDFs

    for pdf in pdfs:
        os.remove(pdf)

    if failedPDFs != "":
        popUpOK("Could not find the following BOLs: " + failedPDFs)
Beispiel #30
0
 def get(self, request, *args, **kwargs):
     fontname_g = "HeiseiMin-W3"
     pdfmetrics.registerFont(UnicodeCIDFont(fontname_g))
     reader = PdfFileReader('media/pdf/riyuu-format4.pdf')
     writer = PdfFileWriter()
     buffer = io.BytesIO()
     cc = canvas.Canvas(buffer)
     cc.setFont(fontname_g, 11)
     initial = 295
     before_rect_x = 748
     after_rect_x = 776.5
     line_height = 11.9
     input_list = [{
         'label': '便器からの立ち座り',
         'before_flag': True,
         'after_flag': False
     }, {
         'label': 'トイレまでの移動',
         'before_flag': False,
         'after_flag': True
     }, {
         'label': 'トイレ出入口の出入(扉の開閉含む)',
         'before_flag': True,
         'after_flag': False
     }]
     welfare_equipment_material = PdfMaterial.objects.get(
         key="welfare_equipment")
     cc = self.motion_purpose_draw(cc, before_rect_x, after_rect_x,
                                   welfare_equipment_material.materials,
                                   input_list, initial, line_height)
     cc.showPage()
     cc.save()
     buffer.seek(0)
     new_pdf = PdfFileReader(buffer)
     existing_page = reader.getPage(0)
     existing_page.mergePage(new_pdf.getPage(0))
     writer.addPage(existing_page)
     new = io.BytesIO()
     writer.write(new)
     new.seek(0)
     print('finish')
     return FileResponse(new, as_attachment=True, filename='hello.pdf')
Beispiel #31
0
 def get(self, request, *args, **kwargs):
     fontname_g = "HeiseiKakuGo-W5"
     pdfmetrics.registerFont(UnicodeCIDFont(fontname_g))
     buffer = io.BytesIO()
     cc = canvas.Canvas(buffer)
     reader = PdfFileReader('media/pdf/sample.pdf')
     existing_page = reader.getPage(0)
     cc.setFont(fontname_g, 24)
     cc.drawString(0, 820, "テスト")
     cc.showPage()
     cc.save()
     buffer.seek(0)
     new_pdf = PdfFileReader(buffer)
     existing_page.mergePage(new_pdf.getPage(0))
     writer = PdfFileWriter()
     writer.addPage(existing_page)
     new = io.BytesIO()
     writer.write(new)
     new.seek(0)
     return FileResponse(new, as_attachment=True, filename='hello.pdf')
Beispiel #32
0
def mergePdf(inFileList, outFile):
    '''
    合并文档
    :param inFileList: 要合并的文档的 list
    :param outFile:    合并后的输出文件
    :return:
    '''
    pdfFileWriter = PdfFileWriter()
    #排序一下,不然合并序号不对
    inFileList = sorted(inFileList)
    for inFile in inFileList:
        # 依次循环打开要合并文件
        pdfReader = PdfFileReader(open(inFile, 'rb'))
        numPages = pdfReader.getNumPages()
        for index in range(0, numPages):
            pageObj = pdfReader.getPage(index)
            pdfFileWriter.addPage(pageObj)

        # 最后,统一写入到输出文件中
        pdfFileWriter.write(open(outFile, 'wb'))
Beispiel #33
0
    def createNewBooks(self, pdf_file, output_file, output_dir):
        if not os.path.exists(output_dir):
            os.mkdir(output_dir)

        input_stream = open(pdf_file, 'rb')

        pdf_input = PdfFileReader(input_stream)
        pdf_output = PdfFileWriter()

        page = 0
        pages = pdf_input.getNumPages() - 1

        while page < pages:
            pdf_output.addPage(pdf_input.getPage(page))
            page += 1

        outputfilename = output_dir + '/' + output_file
        output_stream = open(outputfilename, 'wb')
        pdf_output.write(output_stream)
        output_stream.close()
        input_stream.close()
    def _getLtePropertyPrintPage(self, wbs_id, file_pdf, file_end):
        '''生成要打印签字的资产明细页'''
        print('---> 开始生成%s要打印签字的资产明细页' % (wbs_id))
        (page_num,
         page_end) = self._getPropertyKeyPage(file_pdf, self.key_yanshou)
        fp_in = open(file_pdf, "rb")
        pdf_in = PdfFileReader(fp_in)
        pageCount = pdf_in.getNumPages()
        if pageCount != page_num + 1:
            print('     > PyPDF2(%d)及pdfminer(%d)判定文件页数不同' %
                  (pageCount, page_num))
            self.dats_log.loc[self.log_index] = [
                'E', wbs_id, 'PDF文件',
                'PyPDF2(%d)及pdfminer(%d)判定文件页数不同,未生成最后签字页' %
                (pageCount, page_num)
            ]
            self.log_index += 1
            return
        if page_end == -1:
            print('   > 未找到关键字【%s】所在的页,也即未找到最后签字页' % (self.key_yanshou))
            self.dats_log.loc[self.log_index] = [
                'E', wbs_id, 'PDF文件',
                '未找到关键字【%s】所在的页,也即未找到最后签字页' % (self.key_yanshou)
            ]
            self.log_index += 1
            return
        page = pdf_in.getPage(page_end)
        pdf_out = PdfFileWriter()
        pdf_out.addPage(page)
        fp_out = open(file_end, 'wb')
        pdf_out.write(fp_out)
        fp_in.close()
        fp_out.close()
        print('---> 已获取资产明细最后一页并已保存至%s' % (file_end))

        if page_num == page_end + 1:
            print('   > 需把%s文件删除最后1页' % (file_pdf))
            self._removePropertyEndPage(file_pdf)
Beispiel #35
0
def crop(pdf_in, pdf_out):
    """
    Параметры
    pdf_in - абсолютный путь к пдф
    pdf_out - абсолютный путь для исходящего пдф
    :return: status
    """

    """ Временно к функции добавлен второй параметр - pdf_out. В продакшн она должна сохранять результат кропа
     в тот же файл
    """
    status = True

    # Словарь с размерами бумаги для каждой страницы
    papers = analyze_papersize(pdf_in)  # like {1: ('Speedmaster', 900, 640), 2: ('Dominant', 640, 450)}

    # TODO Доработать временное решение кропа в отсутствии инфы о размере бумаги.
    if papers == {}:
        perl_crop = "perl pdfcrop.pl {} {}".format(pdf_in, pdf_out)
        os.system(perl_crop)
        return status

    input = PdfFileReader(file(pdf_in, "rb"))
    output = PdfFileWriter()

    # Количество страниц
    pages_qty = input.getNumPages()

    for index in range(pages_qty):
        paper_machine = papers[index+1][0]
        paper_w = papers[index+1][1]
        paper_h = papers[index+1][2]

        for m in PrintingPress._registry:
            if paper_machine == m.name:
                machine = m

        plate_w = machine.plate_w
        plate_h = machine.plate_h

        page = input.getPage(index)

        """ EXAMLE
        # The resulting document has a trim box that is 200x200 points
        # and starts at 25,25 points inside the media box.
        # The crop box is 25 points inside the trim box.
        print mm(page.mediaBox.getUpperRight_x()), mm(page.mediaBox.getUpperRight_y())
        page.trimBox.lowerLeft = (25, 25)
        page.trimBox.upperRight = (225, 225)
        page.cropBox.lowerLeft = (50, 50)
        page.cropBox.upperRight = (200, 200)
        """

        print 'Crop page {} to paper {}x{}'.format(index+1, paper_w, paper_h)
        page.mediaBox.lowerLeft = ((pt(plate_w - paper_w)/2), pt(machine.klapan))  # отступ слева, отступ снизу
        page.mediaBox.upperRight = (pt(paper_w + (plate_w - paper_w)/2), pt(paper_h + machine.klapan))  # ширина+отступ, высота+отступ

        output.addPage(page)

    outputstream = file(pdf_out, "wb")
    output.write(outputstream)
    outputstream.close()

    return status