Exemple #1
1
def diff_pdf_pages(pdf1_path, pdf2_path):
    pdf2_fp = PdfFileReader(io.BytesIO(pdf2_path))
    pdf2_len = pdf2_fp.getNumPages()

    if not pdf1_path:
        return list(range(0, pdf2_len))

    pdf1_fp = PdfFileReader(io.BytesIO(pdf1_path))
    pdf1_len = pdf1_fp.getNumPages()

    list_differents = list()
    for i in range(pdf1_len):
        if i >= pdf2_len:
            list_differents.append(i)
            continue

        output1 = PdfFileWriter()
        output2 = PdfFileWriter()
        output1.addPage(pdf1_fp.getPage(i))
        output2.addPage(pdf2_fp.getPage(i))

        fp1 = io.BytesIO()
        fp2 = io.BytesIO()
        output1.write(fp1)
        output2.write(fp2)

        fp1.seek(0)
        fp2.seek(0)

        if fp1.read() != fp2.read():
            list_differents.append(i)
    return list_differents
Exemple #2
0
def add_watermark(pdf_file_in, pdf_file_mark, pdf_file_out):
    """添加水印
    """
    pdf_output = PdfFileWriter()
    input_stream = open(pdf_file_in, 'rb')
    pdf_input = PdfFileReader(input_stream)

    # PDF文件被加密了
    if pdf_input.getIsEncrypted():
        print('该PDF文件被加密了.')
        # 尝试用空密码解密
        try:
            pdf_input.decrypt('')
        except Exception as e:
            print('尝试用空密码解密失败.')
            return False
        else:
            print('用空密码解密成功.')
    # 获取PDF文件的页数
    page_num = pdf_input.getNumPages()
    # 读入水印pdf文件
    pdf_watermark_input_stream = open(pdf_file_mark, 'rb')
    pdf_watermark = PdfFileReader(pdf_watermark_input_stream)
    # 给每一页打水印
    for i in range(page_num):
        page = pdf_input.getPage(i)
        page.mergePage(pdf_watermark.getPage(0))
        page.compressContentStreams()  # 压缩内容
        pdf_output.addPage(page)
    output_stream = open(pdf_file_out, "wb")
    pdf_output.write(output_stream)
    input_stream.close()
    pdf_watermark_input_stream.close()
    output_stream.close()
def imp_exp_pdf(inputfile, outputfile, size, margin, padding):
    "For Import and Export PDF files by resizing"
    output = PdfFileWriter()
    input = PdfFileReader(file(inputfile, 'rb'), strict=False)
    totalPages = input.getNumPages()
    p = []

    for i in range(0, input.getNumPages()):
        p.append(input.getPage(i))
        if len(p) == 10:
            output_one_page(p, size, margin, padding, output)
            p = []

            echoer = "Printed {} of {}  [{:.2f}%]".format(
                i + 1, totalPages, (i + 1) / float(totalPages) * 100)
            print echoer


    if len(p) > 0:
        tmppdf = PdfFileReader(file('BlankA4.pdf', 'rb'), strict=False)
        tmppage = tmppdf.getPage(0)
        (w, h) = tmppage.mediaBox.upperRight
        output_one_page(p, size, margin, padding, output)
        p = []

    print
    print 'Completed converting.'
    print 'Saving...'
    outputStream = file(outputfile, "wb")
    output.write(outputStream)
    outputStream.close()
    print 'END OF PROGRAM'
Exemple #4
0
    def __call__(self, data, attachments=[], pages=None):
        self.rendered = {}
        for field, ctx in self.fields.items():
            if "template" not in ctx:
                continue

            self.context = ctx
            kwargs = self.template_args(data)
            template = self.context["template"]

            try:
                rendered_field = template.render(**kwargs)
            except Exception as err:
                logger.error("%s: %s %s", field, template, err)
            else:
                # Skip the field if it is already rendered by filter
                if field not in self.rendered:
                    if PY3:
                        field = field.decode('utf-8')
                    self.rendered[field] = rendered_field

        filled = PdfFileReader(self.exec_pdftk(self.rendered))
        for pagenumber, watermark in self.watermarks:
            page = filled.getPage(pagenumber)
            page.mergePage(watermark)

        output = PdfFileWriter()
        pages = pages or xrange(filled.getNumPages())
        for p in pages:
            output.addPage(filled.getPage(p))

        for attachment in attachments:
            output.addBlankPage().mergePage(attachment.pdf())

        return output
def splitPdf(inputPath, splitLeftPath,splitRightPath,splitIndex):

    pdf = PdfFileReader(open(inputPath , "rb"))

    numOfPages = pdf.getNumPages()
    if splitIndex<0:
    	print("split index should be native number. task canceled!")
    	return
    if numOfPages<=splitIndex:
    	print("split index is out of page range. task canceled!")
    	return

    leftWriter = PdfFileWriter()
    rightWriter = PdfFileWriter()

    for i in range(0, numOfPages):
        if i<splitIndex:
            leftWriter.addPage(pdf.getPage(i))
        else:
        	rightWriter.addPage(pdf.getPage(i))

    def writePdf2File(writer,path):
    	stream = open(path, "wb")
    	writer.write(stream)
    	stream.close()

    writePdf2File(leftWriter,splitLeftPath)
    writePdf2File(rightWriter,splitRightPath)
def add_signature(request, registration_id):

    registration = get_object_or_404(models.Registration, pk=registration_id)
    str_key = str(registration.key)

    path_convention = settings.MEDIA_ROOT+'/registration_data/conventions/'+str_key+'/convention_'+str_key+'.pdf'
    path_signature = settings.MEDIA_ROOT+'/signature/signature_only.pdf'


    output = PdfFileWriter()
    input1 = PdfFileReader(file(path_convention, "rb"))
    watermark = PdfFileReader(file(path_signature, "rb"))

    input1.getPage(2).mergePage(watermark.getPage(0))

    output.addPage(input1.getPage(0))
    output.addPage(input1.getPage(1))
    output.addPage(input1.getPage(2))
    # finally, write "output" to document-output.pdf
    outputStream = file(settings.MEDIA_ROOT+'/registration_data/conventions/'+str_key+'/convention_'+str_key+'_final.pdf', "wb")
    output.write(outputStream)
    outputStream.close()

    registration.convention.name = 'registration_data/conventions/'+str_key+'/convention_'+str_key+'_final.pdf'
    registration.state=3
    registration.save()
    mail.send_convocation(registration)

    return redirect('registration-archive-list')
Exemple #7
0
def union(input_files, output_file):
    output = PdfFileWriter()

    for input_file in input_files:
        if input_file.endswith('.pdf'):
            input = PdfFileReader(open(input_file, 'rb'))
            num_pages = input.getNumPages()

            for i in range(0, num_pages):
                output.addPage(input.getPage(i))

        else: # input_file isn't pdf ex. jpeg, png  
            im = PIL.Image.open(input_file)
            input_file_pdf = input_file.split('.')[0]+'.pdf'
            im.save(input_file_pdf, 'PDF', resoultion = 100.0)

            input = PdfFileReader(open(input_file_pdf, 'rb'))
            num_pages = input.getNumPages()

            for i in range(0, num_pages):
                output.addPage(input.getPage(i))

            os.remove(input_file_pdf)


    with open(output_file, 'wb') as outputStream:
        output.write(outputStream)

    print('completed.')
    print('Union of some file is ' + output_file)
Exemple #8
0
    def generate_document(self, data):
        packet = StringIO()
        if self.template_file is not None:
            template = PdfFileReader(open(self.template_file, 'rb'))
        c = canvas.Canvas(packet, pagesize=(self.width, self.height))

        i = 0
        for field_cls in self.fields:
            # TODO: Catch exception if there is less columns than fields
            field = field_cls(self, c, data[i])
            field.render()
            i += 1

        # Save canvas
        c.save()
        packet.seek(0)
        text = PdfFileReader(packet)
        output = PdfFileWriter()
        if self.template_file is not None:
            # Merge text with base
            page = template.getPage(0)
            page.mergePage(text.getPage(0))
        else:
            page = text.getPage(0)
        output.addPage(page)

        # Save file
        filename = "%s/%s.pdf" % (self.output_dir, self.generate_filename(data))
        outputStream = open(filename, 'wb')
        output.write(outputStream)
        outputStream.close()
    def handle(self, *args, **options):
        for cert_type, ss_class_children in settings.CERT_CHILDREN.iteritems():
            self.stdout.write('Certificate Type: {}\n'.format(cert_type))
            for ss_class, children in ss_class_children.iteritems():
                self.stdout.write('SS Class: {}\n'.format(ss_class))
                for child in children:
                    self.stdout.write('Child: {}\n'.format(child))
                    paf_path = os.path.join(settings.CERT_TEMPLATE_PATH, settings.CERT_FILE[cert_type])
                    pdf = PdfFileReader(paf_path)
                    page = pdf.getPage(0)

                    s = StringIO.StringIO()
                    c = canvas.Canvas(s, pagesize=letter)

                    # Child
                    font_name = settings.CERT_COORD[cert_type]['child']['font']['name']
                    font_size = settings.CERT_COORD[cert_type]['child']['font']['size']
                    x = settings.CERT_COORD[cert_type]['child']['x']
                    y = settings.CERT_COORD[cert_type]['child']['y']
                    c.setFont(font_name, font_size)
                    c.drawCentredString(x, y, child)

                    # Event
                    font_name = settings.CERT_COORD[cert_type]['event']['font']['name']
                    font_size = settings.CERT_COORD[cert_type]['event']['font']['size']
                    x = settings.CERT_COORD[cert_type]['event']['x']
                    y = settings.CERT_COORD[cert_type]['event']['y']
                    c.setFont(font_name, font_size)
                    c.drawCentredString(x, y, 'Sunday School Summer Festival {}'.format(datetime.now().strftime('%Y')))

                    # Date
                    font_name = settings.CERT_COORD[cert_type]['date']['font']['name']
                    font_size = settings.CERT_COORD[cert_type]['date']['font']['size']
                    x = settings.CERT_COORD[cert_type]['date']['x']
                    y = settings.CERT_COORD[cert_type]['date']['y']
                    c.setFont(font_name, font_size)
                    c.drawCentredString(x, y, '{}'.format(datetime.now().strftime('%B %Y')))

                    # Church
                    font_name = settings.CERT_COORD[cert_type]['church']['font']['name']
                    font_size = settings.CERT_COORD[cert_type]['church']['font']['size']
                    x = settings.CERT_COORD[cert_type]['church']['x']
                    y = settings.CERT_COORD[cert_type]['church']['y']
                    c.setFont(font_name, font_size)
                    c.drawCentredString(x, y, 'St. Mark Coptic Orthodox Church')
                    c.save()

                    pdf_with_custom_text = PdfFileReader(s)
                    page.mergePage(pdf_with_custom_text.getPage(0))

                    writer = PdfFileWriter()
                    writer.addPage(page)

                    output_file = '{}_{}.pdf'.format(child, datetime.now().strftime('%Y'))
                    output_dir = os.path.join(settings.CERT_PATH, ss_class)
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    output_path = os.path.join(output_dir, output_file)
                    with open(output_path, 'wb') as f:
                        writer.write(f)
Exemple #10
0
    def write_pdf(self, output):
        # get plain pdf from rml
        template = select_template([
            'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.slug),
            'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.subject_type),
            'leprikon/{}/subject.rml'.format(self.pdf_export),
        ])
        rml_content = template.render({
            'object': self,
            'site': LeprikonSite.objects.get_current(),
        })
        pdf_content = trml2pdf.parseString(rml_content.encode('utf-8'))

        # merge with background
        if self.print_setup.background:
            template_pdf = PdfFileReader(self.print_setup.background.file)
            registration_pdf = PdfFileReader(BytesIO(pdf_content))
            writer = PdfFileWriter()
            # merge pages from both template and registration
            for i in range(registration_pdf.getNumPages()):
                if i < template_pdf.getNumPages():
                    page = template_pdf.getPage(i)
                    page.mergePage(registration_pdf.getPage(i))
                else:
                    page = registration_pdf.getPage(i)
                writer.addPage(page)
            # write result to output
            writer.write(output)
        else:
            # write basic pdf registration to response
            output.write(pdf_content)
        return output
Exemple #11
0
def combine_for_print(folder_title):
    drive = get_drive()
    filenames = []

    # Download all pdf files from GDrive.
    for i, fil in enumerate(get_pdf_files(drive, folder_title), 1):
        print(fil['title'])
        filename = '__temp-{}.pdf'.format(i)
        fil.GetContentFile(filename)
        filenames.append(filename)

    if not len(filenames):
        print('No pdf files were downloaded')
        return

    # Compute output name by using date and number of files.
    output_filename = '{:%Y-%m-%d %H%M} ({}).pdf'.format(
        datetime.datetime.now(), len(filenames))
    print('Combining files into {}'.format(output_filename))

    writer = PdfFileWriter()

    for i, filename in enumerate(filenames):
        reader = PdfFileReader(open(filename, 'rb'), strict=False)
        if (i % 2) == 0:    # if even page
            page = reader.getPage(0)
            writer.addPage(page)
        else:
            page.mergeTranslatedPage(reader.getPage(0), 0, -5.3*inch)

    with open(output_filename, 'wb') as fp:
        writer.write(fp)

    # Delete temp pdf files.
    subprocess.call('rm __temp-*.pdf', shell=True)
Exemple #12
0
def split(paperpdf, splitpdf):
    output = PdfFileWriter()

    with open(paperpdf, "rb") as l:
        with open(paperpdf, "rb") as r:
            # I know... I know.
            # We have to do this because PyPDF2 kind of sucks.
            left = PdfFileReader(l)
            right = PdfFileReader(r)

            pagecount = left.getNumPages()
            print("%s has %s pages to split." % (paperpdf,pagecount))

            for num in range(0, pagecount):
                left_page = left.getPage(num)
                right_page = right.getPage(num)
                midpoint = (
                        left_page.mediaBox.getUpperRight_x() / 2,
                        left_page.mediaBox.getUpperRight_y()
                        )

                left_page.mediaBox.upperRight = midpoint
                output.addPage(left_page)

                right_page.mediaBox.upperLeft = midpoint
                output.addPage(right_page)

            print("Writing %s pages to %s" % (output.getNumPages(), splitpdf))
            with open(splitpdf, "wb") as s:
                output.write(s)
Exemple #13
0
def generate_pdf_letter(filename, template, formatdict):
    # conjure up a fake request for PDFTemplateResponse
    request = RequestFactory().get('/')
    request.user = AnonymousUser()
    request.session = {}

    # produce text-only PDF from template
    pdfgenerator = PDFTemplateResponse(
        request=request,
        template=template,
        context=formatdict,
        cmd_options={
            'margin-top': 50,
            'margin-bottom': 50,
        },
    )
    textonlypdf = io.BytesIO()
    textonlypdf.write(pdfgenerator.rendered_content)

    # create a blank pdf to work with
    finalpdf = PdfFileWriter()

    # open the text-only pdf
    pdfreader = PdfFileReader(textonlypdf)

    # get watermark from watermark file
    watermark = PdfFileReader(
        open(
            os.path.join(
                settings.STATICFILES_DIRS[0],
                'pdf',
                settings.PDF_LETTERHEAD_FILENAME
            ),
            'rb'
        )
    )

    # add the watermark to all pages
    for pagenum in range(pdfreader.getNumPages()):
        page = watermark.getPage(0)
        try:
            page.mergePage(pdfreader.getPage(pagenum))
        except ValueError:
            # watermark pdf might be broken?
            return False
        # add page to output
        finalpdf.addPage(page)

    # save the generated pdf to the archive
    fullpath = os.path.join(settings.PDF_ARCHIVE_PATH, filename)
    with open(fullpath, 'wb') as fh:
        finalpdf.write(fh)
        logger.info('Saved pdf to archive: %s' % fullpath)

    returnfile = io.BytesIO()
    finalpdf.write(returnfile)
    return returnfile
    def stampPages(self,listOfPageObjects,xPercentOffset=0.2,yPercentOffset=0.2):
    #def stampPages(self,listOfPageObjects,filepath):
        #output = PdfFileWriter()
        global output
        j=0
        stampedPages=[]

        for page in listOfPageObjects:
            packet = StringIO.StringIO()

            existingPdfPage=page
            widthInches=existingPdfPage.trimBox[2]/72
            heightInches=existingPdfPage.trimBox[3]/72

            widthMill=widthInches*25.4
            heightMill=heightInches*25.4

            dimensionCurrentPdfPage=(widthInches*72,heightInches*72)
            can = canvas.Canvas(packet, dimensionCurrentPdfPage)

            font=25
            offset=0.25*font
            top_offset=0

            can.setFillColorRGB(1,0,0,alpha=0.25)
            #canvas.setStrokeColor(red)
            can.setFont("Helvetica-Bold", font)

            #can.drawString(100, 100, "ISSUED FOR CONSTRUCTION")
            can.drawString(xPercentOffset*widthMill, yPercentOffset*heightMill, "ISSUED FOR CONSTRUCTION")
            #can.drawString(0,top_offset-font-offset, "BY_____________________")
            #can.drawString(0,top_offset-2*font-2*offset, "HOLA")
            can.save()
            packet.seek(0)
            new_pdf = PdfFileReader(packet)

            # existingPdfPage.mergePage(new_pdf.getPage(0))
            # output.addPage(existingPdfPage)

            if '/Rotate' in page:
                #print True
                rotationAngle=page['/Rotate']
            else:
                #print False
                rotationAngle=0

            if rotationAngle==0:
                existingPdfPage.mergePage(new_pdf.getPage(0))
                output.addPage(existingPdfPage)
            elif rotationAngle !=0:
                pageHeight=existingPdfPage.trimBox[3]
                translatePageDown=(float(pageHeight)/72)*25.4*sqrt(2)


                existingPdfPage.mergeRotatedTranslatedPage(new_pdf.getPage(0),rotation=90,tx=translatePageDown,ty=translatePageDown)
                output.addPage(existingPdfPage)
Exemple #15
0
    def get_claim_report_user(self, employee_id, **post):
        if not request.env.user.has_group('fleet.fleet_group_manager'):
            return request.not_found()

        employee = request.env['hr.employee'].search([('id', '=', employee_id)], limit=1)
        partner_ids = (employee.user_id.partner_id | employee.address_home_id).ids
        if not employee or not partner_ids:
            return request.not_found()

        car_assignation_logs = request.env['fleet.vehicle.assignation.log'].search([('driver_id', 'in', partner_ids)])
        doc_list = request.env['ir.attachment'].search([
            ('res_model', '=', 'fleet.vehicle.assignation.log'),
            ('res_id', 'in', car_assignation_logs.ids)], order='create_date')

        writer = PdfFileWriter()

        font = "Helvetica"
        normal_font_size = 14

        for document in doc_list:
            car_line_doc = request.env['fleet.vehicle.assignation.log'].browse(document.res_id)
            try:
                reader = PdfFileReader(io.BytesIO(base64.b64decode(document.datas)), strict=False, overwriteWarnings=False)
            except Exception:
                continue

            width = float(reader.getPage(0).mediaBox.getUpperRight_x())
            height = float(reader.getPage(0).mediaBox.getUpperRight_y())

            header = io.BytesIO()
            can = canvas.Canvas(header)
            can.setFont(font, normal_font_size)
            can.setFillColorRGB(1, 0, 0)

            car_name = car_line_doc.vehicle_id.display_name
            date_start = car_line_doc.date_start
            date_end = car_line_doc.date_end or '...'

            text_to_print = _("%s (driven from: %s to %s)") % (car_name, date_start, date_end)
            can.drawCentredString(width / 2, height - normal_font_size, text_to_print)
            can.save()
            header_pdf = PdfFileReader(header, overwriteWarnings=False)

            for page_number in range(0, reader.getNumPages()):
                page = reader.getPage(page_number)
                page.mergePage(header_pdf.getPage(0))
                writer.addPage(page)

        _buffer = io.BytesIO()
        writer.write(_buffer)
        merged_pdf = _buffer.getvalue()
        _buffer.close()

        pdfhttpheaders = [('Content-Type', 'application/pdf'), ('Content-Length', len(merged_pdf))]

        return request.make_response(merged_pdf, headers=pdfhttpheaders)
Exemple #16
0
def merge_pdfs(f1, f2, output_f):
    pdf1 = PdfFileReader(f1)
    pdf2 = PdfFileReader(f2)
    output = PdfFileWriter()

    page = pdf1.getPage(0)
    page.mergePage(pdf2.getPage(0))

    output.addPage(page)
    output.write(output_f)
def generate_course_info_page_pdf(class_data, num_students):
    #create page1 mask
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=(792, 612))
    can.setFont("Helvetica", 10)

    if (class_data['options']['New']):
        can.drawString(58, 428, u"✗")

    if (class_data['options']['Renewal']):
        can.drawString(58, 416, u"✗")

    if (class_data['options']['Instructor']):
        can.drawString(58, 404, u"✗")

    if (class_data['options']['Provider']):
        can.drawString(58, 392, u"✗")

    can.drawString(495, 426, class_data['curr_instructor']['instructor_name'])
    can.drawString(519, 403, class_data['curr_instructor']['instructor_renewal_date'])
    can.drawString(493, 389, class_data['curr_instructor']['training_center_id'])
    can.drawString(512, 378, class_data['curr_instructor']['training_center_name'])
    can.drawString(510, 366, '') #training site name
    can.drawString(493, 354, class_data['class_location'])
    can.drawString(493, 341, '') #address

    can.drawString(165, 283, class_data['class_date'].strftime("%m/%d/%y"))
    can.drawString(378, 283, class_data['class_date'].strftime("%m/%d/%y"))
    can.drawString(614, 283, "4")
    can.drawString(153, 259, str(num_students))
    can.drawString(381, 259, class_data['student_manikin_ratio'])
    can.drawString(581, 259, class_data['card_issue_date'].strftime("%m/%y"))

    can.save()

    packet.seek(0)
    mask = PdfFileReader(packet)

    dir = os.path.realpath('.')
    #cards
    roster_filename = os.path.join(dir, 'pdf_templates','HCP_roster.pdf')

    roster = PdfFileReader(file(roster_filename, "rb"))

    #merge template with mask

    merged_roster = PdfFileWriter()

    page = roster.getPage(0)
    page.mergePage(mask.getPage(0))
    merged_roster.addPage(page)

    return merged_roster
def add_page_numbers(inputfile, outputfile, startno=None, endno=None, fontname="Helvetica", fontsize=12,
                     pagenoformat="- %i -", pagesize=A4, posx=280, posy=800):
    """
    Adds page numbers to the input PDF file and stores the modified PDF in output.
    Optionally, the page range can be limited.
    :param inputfile: the input PDF
    :type inputfile: str
    :param outputfile: the output PDF
    :type outputfile: str
    :param startno: the first page to number, 1-based, use None to start from first page
    :type startno: int
    :param endno: the last page to number, 1-based, use None to end with last page
    :type endno: int
    :param fontname: the name of the font to use, eg 'Helvetica'
    :type fontname: str
    :param fontsize: the size of the font, eg 12
    :type fontsize: int
    :param pagenoformat: the format string for the page number, eg '- %i -'
    :type pagenoformat: str
    :param pagesize: the page size, eg A4
    :type pagesize: object
    :param posx: the X position for the page number
    :type posx: int
    :param posy: the Y position for the page number
    :type posy: int
    """
    inputpdf = PdfFileReader(open(inputfile, "rb"))
    outputpdf = PdfFileWriter()

    if startno is None:
        startno = 1
    if endno is None:
        endno = inputpdf.getNumPages()
    for i in xrange(inputpdf.getNumPages()):
        page = i + 1
        current = inputpdf.getPage(i)
        # add page number?
        # taken from here: http://stackoverflow.com/a/17538003
        if (page >= startno) and (page <= endno):
            packet = StringIO.StringIO()
            can = canvas.Canvas(packet, pagesize=pagesize)
            can.setFont(fontname, fontsize)
            can.drawString(posx, posy, pagenoformat % page)
            can.save()
            packet.seek(0)
            pagenopdf = PdfFileReader(packet)
            logger.info("Page " + str(page) + " added")
            current.mergePage(pagenopdf.getPage(0))
        else:
            logger.info("Page " + str(page))
        outputpdf.addPage(current)

    outputstream = file(outputfile, "wb")
    outputpdf.write(outputstream)
Exemple #19
0
 def add_to_letterhead(self, data, letterhead):
     #move to the beginning of the StringIO buffer
     new_pdf = PdfFileReader(data)
     # read your existing PDF
     
     existing_pdf = PdfFileReader(io.BytesIO(letterhead))
     output = PdfFileWriter()
     # add the "watermark" (which is the new pdf) on the existing page
     page = existing_pdf.getPage(0)
     page.mergePage(new_pdf.getPage(0))
     output.addPage(page)
     return output
Exemple #20
0
def merge_pdf(infnList, outfn):
  """
  合并pdf
  :param infnList: 要合并的PDF文件路径列表
  :param outfn: 保存的PDF文件名
  :return: None
  """
  pagenum = 0
  pdf_output = PdfFileWriter()

  for pdf in infnList:
    # 先合并一级目录的内容
    first_level_title = pdf['title']
    dir_name = os.path.join(os.path.dirname(
        __file__), 'gen', first_level_title)
    padf_path = os.path.join(dir_name, first_level_title + '.pdf')

    pdf_input = PdfFileReader(open(padf_path, 'rb'))
    # 获取 pdf 共用多少页
    page_count = pdf_input.getNumPages()
    for i in range(page_count):
        pdf_output.addPage(pdf_input.getPage(i))

    # 添加书签
    parent_bookmark = pdf_output.addBookmark(
        first_level_title, pagenum=pagenum)

    # 页数增加
    pagenum += page_count

    # 存在子章节
    if pdf['child_chapters']:
      for child in pdf['child_chapters']:
        second_level_title = child['title']
        padf_path = os.path.join(dir_name, second_level_title + '.pdf')

        pdf_input = PdfFileReader(open(padf_path, 'rb'))
        # 获取 pdf 共用多少页
        page_count = pdf_input.getNumPages()
        for i in range(page_count):
            pdf_output.addPage(pdf_input.getPage(i))

        # 添加书签
        pdf_output.addBookmark(second_level_title, pagenum=pagenum, parent=parent_bookmark)
        # 增加页数
        pagenum += page_count

  # 合并
  pdf_output.write(open(outfn, 'wb'))

  # 删除所有章节文件
  shutil.rmtree(os.path.join(os.path.dirname(__file__), 'gen'))
def create_overlayed_page(entry_name):
    output = PdfFileWriter()
    input1 = PdfFileReader(open("%s.original.pdf" % entry_name, "rb"))
    watermark = PdfFileReader(open("overlay_tmp.pdf", "rb"))

    page1 = input1.getPage(0)
    page1_watermark = watermark.getPage(0)
    
    page1.mergePage(page1_watermark)
    output.addPage(page1)

    outputStream = file("%s.with-ref.pdf" % entry_name, "wb")
    output.write(outputStream)
def generate_course_info_page_pdf(course_info, num_students):
    #create page1 mask
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=(792, 612))
    can.setFont("Helvetica", 10)

    can.drawString(58, 428, u"✗")
    if (course_info['child_cpr'] == 'yes'):
        can.drawString(76, 415, u"✗")
    if (course_info['infant_cpr'] == 'yes'):
        can.drawString(171, 415, u"✗")
    if (course_info['written_test'] == 'yes'):
        can.drawString(243, 415, u"✗")

    can.drawString(495, 426, course_info['instructor_name'])
    can.drawString(519, 403, u"✗")
    can.drawString(512, 389, course_info['instructor_renewal_date'])
    can.drawString(493, 378, course_info['training_center_name'])
    can.drawString(510, 366, course_info['training_center_id'])
    can.drawString(493, 341, course_info['course_location'])

    can.drawString(165, 268, course_info['course_date'])
    can.drawString(378, 268, course_info['course_date'])
    can.drawString(614, 268, "4")
    can.drawString(153, 244, str(num_students))
    can.drawString(381, 244, course_info['student_manikin_ratio'])
    can.drawString(581, 244, course_info['card_issue_date'])

    can.save()

    packet.seek(0)
    mask = PdfFileReader(packet)

    dir = os.path.realpath('.')
    #cards
    roster_filename = os.path.join(dir, 'templates','HS_roster.pdf')

    roster = PdfFileReader(file(roster_filename, "rb"))

    #merge template with mask

    merged_roster = PdfFileWriter()

    page = roster.getPage(0)
    page.mergePage(mask.getPage(0))
    merged_roster.addPage(page)

    return merged_roster
Exemple #23
0
def page_extract(start, end, SUBSECTION):

    PDF_IN = PdfFileReader(open(PDF_DIR, 'rb'))

#    for i in xrange(PDF_IN.numPages): # for all pages
    for i in range(int(start) - 1, int(end)):

        output = PdfFileWriter()
        output.addPage(PDF_IN.getPage(i))
        
        base, name_ext = os.path.split(PDF_DIR)
        name, ext      = os.path.splitext(name_ext)
        PDF_OUT        = '{}{}'.format(TMP_DIR, '{}-{}{}'.format(name, str(i).zfill(6), ext))
        
        with open(PDF_OUT, 'wb') as outputStream:
            output.write(outputStream)
        
        gs_pdf_to_png(PDF_OUT)
        os.remove(PDF_OUT)
    
    png_list = group(os.listdir(TMP_DIR), 2)
    for tup in png_list:
        print tup
        card_front = os.path.join(TMP_DIR, tup[0])
        card_back  = os.path.join(TMP_DIR, tup[1])
        make_cards(card_front, card_back, SUBSECTION)
Exemple #24
0
def tearpage(filename, startpage=1):
    """
    Copy filename to a tempfile, write pages startpage..N to filename.

    :param filename: PDF filepath
    :param startpage: page number for the new first page
    """
    # Copy the pdf to a tmp file
    tmp = tempfile.NamedTemporaryFile()
    shutil.copy(filename, tmp.name)

    # Read the copied pdf
    try:
        input_file = PdfFileReader(open(tmp.name, 'rb'))
    except PdfReadError:
        _fixPdf(filename, tmp.name)
        input_file = PdfFileReader(open(tmp.name, 'rb'))
    # Seek for the number of pages
    num_pages = input_file.getNumPages()

    # Write pages excepted the first one
    output_file = PdfFileWriter()
    for i in range(startpage, num_pages):
        output_file.addPage(input_file.getPage(i))

    tmp.close()
    outputStream = open(filename, "wb")
    output_file.write(outputStream)
Exemple #25
0
class PdfSplitter:
    def __init__(self, path):
        self._stream = open(path, "rb")
        self._input_pdf = PdfFileReader(self._stream)

    def split(self, pages, filename):
        """
        Split pages from the wrapped pdf file (see constructor) into
        a new file called `filename`

        :param pages: Either a zero indexed page number or a list of pages
        :param filename: The name of the new file
        :return: None
        """
        if type(pages) is int:
            pages = [pages]

        output_writer = PdfFileWriter()
        for page in pages:
            output_writer.addPage(self._input_pdf.getPage(page))

        with open(filename, "wb") as output_stream:
            output_writer.write(output_stream)

    def close(self):
        self._stream.close()
Exemple #26
0
def getPLBURL(journal,doi,count):
    
    cj = http.cookiejar.CookieJar() # initialize the cookie jar
    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
    url = 'http://dx.doi.org/'+doi
    user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'
    headers = [('User-Agent', user_agent)]
    opener.addheaders = headers
    #with opener.open(url) as response:
    response= opener.open(url)
    output = response.read()
    import re
    p = re.compile('pdfurl="(.*?)"')
    m = p.search(output.strip().decode('utf-8')) # need to convert from bytes to string
    m.group(1)
    response=opener.open(m.group(1))
    out = response.read()

    type(out)

    f = io.BytesIO(out)

    if f:
        o = PdfFileReader(f)
        merged = PdfFileWriter()
        outName= "Single_"+str(count)+".pdf"
        merged.addPage(o.getPage(0))
        with open(outName,'wb') as pdf:
            merged.write(pdf)
Exemple #27
0
def pdf_to_csv_with_PyPDF():
    """
    Iterates throught all the pdf stored in ./data/pdf/ folder and export its
    content to the file data.csv.
    The format of the csv file should have two columns: id and text
    """
    bar = progressbar.ProgressBar()
    csv_data_file = _DATA_PATH + "data.csv"
    with open(csv_data_file, "w", newline='') as csvfile:
        data_writer = csv.writer(csvfile)
        data_writer.writerow(["document_id","document_text"])
        for fn in bar(os.listdir(_PDF_PATH)):
            file_path = os.path.join(_PDF_PATH, fn)
            if file_path.endswith(".pdf"):
                try:
                    input_file = PdfFileReader(open(file_path, 'rb'))
                    text = ""
                    for p in range(input_file.getNumPages()):
                        text += input_file.getPage(p).extractText() + " "
                except utils.PdfReadError as e:
                    print("Error al leer el PDF: {0}".format(fn))
                except Exception as e:
                    print("Error desconocido en el PDF: {0}".format(fn))
                    print("Error: {0}".format(e))
                else:
                    #TODO: Check if text is not empty
                    data_writer.writerow([fn,text])
Exemple #28
0
def _merge_pdf(documents):
    '''Merge PDF files into one.

    :param documents: list of path of pdf files
    :returns: path of the merged pdf
    '''
    writer = PdfFileWriter()
    streams = []  # We have to close the streams *after* PdfFilWriter's call to write()
    try:
        for document in documents:
            pdfreport = open(document, 'rb')
            streams.append(pdfreport)
            reader = PdfFileReader(pdfreport, overwriteWarnings=False)
            for page in range(0, reader.getNumPages()):
                writer.addPage(reader.getPage(page))

        merged_file_fd, merged_file_path = tempfile.mkstemp(suffix='.html', prefix='report.merged.tmp.')
        with closing(os.fdopen(merged_file_fd, 'w')) as merged_file:
            writer.write(merged_file)
    finally:
        for stream in streams:
            try:
                stream.close()
            except Exception:
                pass

    for stream in streams:
        stream.close()

    return merged_file_path
 def buildPDF(self, data, document_root):
   data = json.loads(data)[0]['fields']
   content = StringIO.StringIO()
   parser = canvas.Canvas(content, pagesize=letter)
 
   self.employee_name(parser, data['name'])
   self.social_security(parser, data['ssn'])
   self.title(parser, data['title'])
   self.base_salary(parser, data['base_salary'])
   self.period(parser, data['period'])
   self.period_year(parser, data['period_year'])
   self.effective_date(parser, data['effective_date'])
   self.multi_campus(parser, data['multi_campus'])
   self.sponsored_accounts(parser, data['sponsored_accounts'])
   self.cost_sharing(parser, data['cost_sharing'])
   self.university_funds(parser, data['university_funds'])
   self.payments_paid(parser, data['payments_paid'])
   self.comments(parser, data['comments'])
 
   parser.save()
   content.seek(0)
   text = PdfFileReader(content)
   
   form = PdfFileReader(document_root+'/a125.pdf').getPage(0)
   output = PdfFileWriter()
   form.mergePage(text.getPage(0))
   output.addPage(form)
   
   outputStream = open(document_root+'/a125-gen.pdf', 'wb')
   output.write(outputStream)
   self.form = output
Exemple #30
0
def get_images(pdf_file):
    with open(pdf_file, 'rb') as fp:
        reader = PdfFileReader(fp)
        page = reader.getPage(0)
        xObject = page['/Resources']['/XObject'].getObject()

        for obj in xObject:
            if xObject[obj]['/Subtype'] == '/Image':
                width, height = (xObject[obj]['/Width'], xObject[obj]['/Height'])
                # Ignore smaller images.
                if height < 100:
                    continue

                size = width, height
                data = xObject[obj].getData()
                if xObject[obj]['/ColorSpace'] == '/DeviceRGB':
                    mode = "RGB"
                else:
                    mode = "P"

                encoding = xObject[obj]['/Filter']
                if encoding == '/FlateDecode' or '/FlateDecode' in encoding:
                    yield Image.frombytes(mode, size, data)
                else:
                    raise Exception(
                        'Unexpected image encoding: {}'.format(encoding))
Exemple #31
0
def generate(config, pdf_config, save_path):
    # support chinese
    font_chinese = 'STSong-Light'  # from Adobe's Asian Language Packs
    pdfmetrics.registerFont(UnicodeCIDFont(font_chinese))

    # Create the watermark from an image
    w = 595.27
    h = 841.89
    # page1
    c1 = canvas.Canvas(pdf_config['watermark1'], (w, h))
    c1.setFont(font_chinese, size=11)
    # get current time
    now = datetime.datetime.now()
    date = now.strftime("%Y-%m-%d")
    time = now.strftime('%H:%M')
    # Add content
    c1.drawString(137, h - 134, config['patient-id'])
    c1.drawString(160, h - 159, config['report-id'])
    c1.drawString(147, h - 184, date)
    c1.drawString(147, h - 209, time)
    c1.drawString(122, h - 234, config['eye'])
    # image quality content
    offset = 0
    if config['vessel_analysis']:
        c1.drawCentredString(304, h - 282, config['vessel-point'])
        c1.drawCentredString(334, h - 282, config['vessel-quality'])
        c1.drawCentredString(129, h - 299, config['distance'])
        c1.drawCentredString(432, h - 298, config['angle'])
        c1.drawCentredString(127, h - 316, config['standard'])
    else:
        offset -= 16
        c1.drawCentredString(377, h - 282, config['distance'])
        c1.drawCentredString(289, h - 299, config['angle'])
        c1.drawCentredString(346, h - 298, config['standard'])
    # diabetic retinopathy
    c1.drawCentredString(215, h - 339 - offset, config['dr'])
    if config['dr'] == u'有':
        c1.drawCentredString(262, h - 340 - offset, config['stage'])
        c1.drawCentredString(391, h - 340 - offset, config['bleed'])
        c1.drawCentredString(151, h - 357 - offset, config['1-bleed'])
        c1.drawCentredString(221, h - 356 - offset, config['exudation'])
        c1.drawCentredString(378, h - 357 - offset, config['1-exudation'])
    if config['vessel_analysis']:
        if config['dr'] == u'有':
            offset += 18
        c1.drawCentredString(250, h - 364 - offset, config['2-length'])
        c1.drawCentredString(409, h - 364 - offset, config['2-length_compare'])
        c1.drawCentredString(124, h - 381 - offset, config['2-density'])
        c1.drawCentredString(260, h - 381 - offset,
                             config['2-density_compare'])
        c1.drawCentredString(378, h - 381 - offset, config['2-diameter'])
        c1.drawCentredString(151, h - 397 - offset,
                             config['2-diameter_compare'])
    c1.drawImage(config['patient-image'],
                 153,
                 h - 658,
                 width=2880 // 10,
                 height=2136 // 10)
    c1.save()

    # page2
    c2 = canvas.Canvas(pdf_config['watermark2'], (w, h))
    c2.setFont(font_chinese, size=11)
    # Add content
    c2.drawImage(config['output_images']['macular_image'],
                 100,
                 h - 245,
                 width=2880 // 18,
                 height=2136 // 18)
    c2.drawCentredString(447, h - 141, config['DR_prob'])
    c2.drawCentredString(310, h - 176, config['stage'])
    c2.drawCentredString(447, h - 176, config['level1_prob'])
    c2.drawCentredString(447, h - 211, config['disc_diameter'])
    c2.drawCentredString(447, h - 241, config['macular_center_coordinate'])
    if config['dr'] == u'有':
        c2.drawImage(config['output_images']['bleed_image'],
                     113,
                     h - 522,
                     width=2880 // 18,
                     height=2136 // 18)
        c2.drawImage(config['output_images']['bleed_histogram'],
                     307,
                     h - 532,
                     width=187,
                     height=140)
        c2.drawImage(config['output_images']['exudation_image'],
                     113,
                     h - 703,
                     width=2880 // 18,
                     height=2136 // 18)
        c2.drawImage(config['output_images']['exudation_histogram'],
                     307,
                     h - 713,
                     width=187,
                     height=140)
    c2.save()

    if config['vessel_analysis']:
        # page3
        c3 = canvas.Canvas(pdf_config['watermark3'], (w, h))
        c3.setFont(font_chinese, size=11)
        # Add content
        c3.drawImage(config['output_images']['retinal_vessel_image'],
                     113,
                     h - 273,
                     width=2880 // 18,
                     height=2136 // 18)
        c3.drawImage(config['output_images']['quadrant_segmentation_image'],
                     320,
                     h - 273,
                     width=2880 // 18,
                     height=2136 // 18)
        c3.drawCentredString(274, h - 447, config['a-density'])
        c3.drawCentredString(428, h - 447, config['a-density_compare'])
        c3.drawCentredString(274, h - 464, config['a-length'])
        c3.drawCentredString(428, h - 464, config['a-length_compare'])
        c3.drawImage(config['output_images']['a-patient_length_histogram'],
                     198,
                     h - 588,
                     width=150,
                     height=116)
        c3.drawImage(config['a-normal_length_histogram'],
                     354,
                     h - 588,
                     width=150,
                     height=116)
        c3.drawCentredString(274, h - 604, config['a-diameter'])
        c3.drawCentredString(428, h - 604, config['a-diameter_compare'])
        c3.drawImage(config['output_images']['a-patient_diameter_histogram'],
                     198,
                     h - 729,
                     width=150,
                     height=116)
        c3.drawImage(config['a-normal_diameter_histogram'],
                     354,
                     h - 729,
                     width=150,
                     height=116)
        c3.save()
        '''
        # page4
        c4 = canvas.Canvas('./backup/watermark4.pdf', (w, h))
        c4.setFont(font_chinese, size=11)
        # Add content
        c4.drawCentredString(254, h-125, config['b-density'])
        c4.drawCentredString(428, h-125, config['b-density_compare']) 
        c4.drawCentredString(254, h-142, config['b-length'])
        c4.drawCentredString(428, h-142, config['b-length_compare'])
        c4.drawImage(config['b-patient_length_histogram'], 178, h-253, width=152, height=106)
        c4.drawImage(config['b-patient_length_histogram'], 349, h-253, width=152, height=106)
        c4.drawCentredString(254, h-267, config['b-diameter'])
        c4.drawCentredString(428, h-267, config['b-diameter_compare'])
        c4.drawImage(config['b-patient_diameter_histogram'], 177, h-379, width=154, height=108)
        c4.drawImage(config['b-patient_diameter_histogram'], 346, h-379, width=154, height=108)
        c4.drawCentredString(254, h-459, config['c-density'])
        c4.drawCentredString(428, h-459, config['c-density_compare']) 
        c4.drawCentredString(254, h-476, config['c-length'])
        c4.drawCentredString(428, h-476, config['c-length_compare'])
        c4.drawImage(config['c-patient_length_histogram'], 178, h-587, width=152, height=106)
        c4.drawImage(config['c-patient_length_histogram'], 349, h-587, width=152, height=106)
        c4.drawCentredString(254, h-601, config['c-diameter'])
        c4.drawCentredString(428, h-601, config['c-diameter_compare'])
        c4.drawImage(config['c-patient_diameter_histogram'], 177, h-713, width=154, height=106)
        c4.drawImage(config['c-patient_diameter_histogram'], 346, h-713, width=154, height=106)
        c4.save()
        '''
    # Get the watermark file you just created
    watermark1 = PdfFileReader(open(pdf_config['watermark1'], "rb"))
    watermark2 = PdfFileReader(open(pdf_config['watermark2'], "rb"))
    if config['vessel_analysis']:
        watermark3 = PdfFileReader(open(pdf_config['watermark3'], "rb"))
        #watermark4 = PdfFileReader(open("./backup/watermark4.pdf", "rb"))

    # Get our files ready
    output_file = PdfFileWriter()

    # Number of pages in input document
    if config['vessel_analysis']:
        if config['dr'] == u'有':
            input_file = PdfFileReader(open(pdf_config['template1'], "rb"))
        else:
            input_file = PdfFileReader(open(pdf_config['template4'], "rb"))
        page_count = 3
    else:
        if config['dr'] == u'有':
            input_file = PdfFileReader(open(pdf_config['template2'], "rb"))
        else:
            input_file = PdfFileReader(open(pdf_config['template3'], "rb"))
        page_count = 2

    # Go through all the input file pages to add a watermark to them
    for page_number in range(page_count):
        print("Watermarking page {} of {}".format(page_number, page_count))
        # merge the watermark with the page
        input_page = input_file.getPage(page_number)
        if page_number == 0:
            input_page.mergePage(watermark1.getPage(0))
        elif page_number == 1:
            input_page.mergePage(watermark2.getPage(0))
        elif page_number == 2:
            input_page.mergePage(watermark3.getPage(0))
        else:
            input_page.mergePage(watermark4.getPage(0))
        # add page from input file to output document
        output_file.addPage(input_page)

    # finally, write "output" to document-output.pdf
    with open(save_path, 'wb') as outputStream:
        output_file.write(outputStream)
Exemple #32
0
import PyPDF2
import io

import requests
from PyPDF2 import PdfFileReader


for url in pdfs:
    print("Scraping from" + url)
    r = requests.get(url)
    fi = io.BytesIO(r.content)
    reader = PdfFileReader(fi)
    number_of_pages = reader.getNumPages()
    for page_number in range(number_of_pages):
        page = reader.getPage(page_number)
        page_content = page.extractText()
        f.write(page_content)

def findHrefs(data):
    links = []
    for i in range(len(data)):
        g = data[i].find_all('a')
        for h in g:
            if "spotlights" in h['href'] and "www." in h["href"]:
                links.append(h['href'])
    return links        

soup = scraping("https://www.larimer.org/health/communicable-disease/coronavirus-covid-19/covid-19-public-health-orders-and-press-releases")
data = soup.find_all("ul")
links = findHrefs(data)
    engine.setProperty('rate', 120)
    engine.setProperty('voice', 'punjabi')
    engine.say(data)
    engine.runAndWait()


name = raw_input(
    "Type the file name Excluding .pdf if that is in same folder else give abs path without .pdf --  "
)
name = name + ".pdf"
infile = PdfFileReader(name, 'rb')

page = raw_input("Enter Page Number you wanna read --  ")
page = int(page)
num = page
reader_temp = infile.getPage(page)
data = reader_temp.extractText()

# gtts sounds pretty better than pyttsx that's why I'm reading it through gtts
# gtts takes nearly 30 sec to save that into a file and start reading but pyttsx do it within few seconds
# So if you don't want to wait then you can put func2() here and delete lines which is below

func1()

for num in range(page + 1, infile.numPages):
    reader_temp = infile.getPage(num)
    data = reader_temp.extractText()
    name = str(num) + ".mp3"
    p1 = Process(target=func1)
    p1.start()
    p2 = Process(target=func3(name))
Exemple #34
0
from PyPDF2 import PdfFileReader, PdfFileWriter
import tkinter as tk
from tkinter import filedialog

root = tk.Tk()
root.withdraw()

file_path = filedialog.askopenfilename()  #asking for any paf file to choose
file = file_path.split('/')  #extracting the filename from the pdf
file_name = file[-1]
file_name = "Encrypted " + file_name

pdf = PdfFileReader(file_path)  #Reading the pdf file
out_pdf = PdfFileWriter()  #Creating an instace of pdf file writer

pages = pdf.numPages  # getting the number of pages
for i in range(pages):
    page_details = pdf.getPage(i)  #extracting the details of each page
    out_pdf.addPage(page_details)  #adding the page to out_pdf

password = input("Enter your password for encryption: ")

out_pdf.encrypt(password)  #using encrypt method to encrypt the pdf

with open(file_name, 'wb') as filename:
    out_pdf.write(filename)

print(
    "\nYou can find your encrypted file under file name 'Encrypted filename(i.e. your original file name)' into your current directory!"
)
Exemple #35
0
class Renderer:
    def __init__(self, event, layout, background_file):
        self.layout = layout
        self.background_file = background_file
        self.variables = get_variables(event)
        if self.background_file:
            self.bg_pdf = PdfFileReader(BytesIO(self.background_file.read()))
        else:
            self.bg_pdf = None

    @classmethod
    def _register_fonts(cls):
        pdfmetrics.registerFont(
            TTFont('Open Sans', finders.find('fonts/OpenSans-Regular.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans I', finders.find('fonts/OpenSans-Italic.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans B', finders.find('fonts/OpenSans-Bold.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans B I',
                   finders.find('fonts/OpenSans-BoldItalic.ttf')))

        for family, styles in get_fonts().items():
            pdfmetrics.registerFont(
                TTFont(family, finders.find(styles['regular']['truetype'])))
            if 'italic' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' I',
                           finders.find(styles['italic']['truetype'])))
            if 'bold' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' B',
                           finders.find(styles['bold']['truetype'])))
            if 'bolditalic' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' B I',
                           finders.find(styles['bolditalic']['truetype'])))

    def _draw_barcodearea(self, canvas: Canvas, op: OrderPosition, o: dict):
        reqs = float(o['size']) * mm
        qrw = QrCodeWidget(op.secret,
                           barLevel='H',
                           barHeight=reqs,
                           barWidth=reqs)
        d = Drawing(reqs, reqs)
        d.add(qrw)
        qr_x = float(o['left']) * mm
        qr_y = float(o['bottom']) * mm
        renderPDF.draw(d, canvas, qr_x, qr_y)

    def _get_text_content(self, op: OrderPosition, order: Order, o: dict):
        ev = op.subevent or order.event
        if not o['content']:
            return '(error)'
        if o['content'] == 'other':
            return o['text'].replace("\n", "<br/>\n")
        elif o['content'].startswith('meta:'):
            return ev.meta_data.get(o['content'][5:]) or ''
        elif o['content'] in self.variables:
            try:
                return self.variables[o['content']]['evaluate'](op, order, ev)
            except:
                logger.exception('Failed to process variable.')
                return '(error)'
        return ''

    def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order,
                       o: dict):
        font = o['fontfamily']
        if o['bold']:
            font += ' B'
        if o['italic']:
            font += ' I'

        align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT}
        style = ParagraphStyle(name=uuid.uuid4().hex,
                               fontName=font,
                               fontSize=float(o['fontsize']),
                               leading=float(o['fontsize']),
                               autoLeading="max",
                               textColor=Color(o['color'][0] / 255,
                                               o['color'][1] / 255,
                                               o['color'][2] / 255),
                               alignment=align_map[o['align']])
        text = re.sub(
            "<br[^>]*>", "<br/>",
            bleach.clean(self._get_text_content(op, order, o) or "",
                         tags=["br"],
                         attributes={},
                         styles=[],
                         strip=True))
        p = Paragraph(text, style=style)
        p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm)
        # p_size = p.wrap(float(o['width']) * mm, 1000 * mm)
        ad = getAscentDescent(font, float(o['fontsize']))
        p.drawOn(canvas,
                 float(o['left']) * mm,
                 float(o['bottom']) * mm - ad[1])

    def draw_page(self, canvas: Canvas, order: Order, op: OrderPosition):
        for o in self.layout:
            if o['type'] == "barcodearea":
                self._draw_barcodearea(canvas, op, o)
            elif o['type'] == "textarea":
                self._draw_textarea(canvas, op, order, o)
        canvas.showPage()

    def render_background(self, buffer, title=_('Ticket')):
        from PyPDF2 import PdfFileWriter, PdfFileReader
        buffer.seek(0)
        new_pdf = PdfFileReader(buffer)
        output = PdfFileWriter()

        for page in new_pdf.pages:
            bg_page = copy.copy(self.bg_pdf.getPage(0))
            bg_page.mergePage(page)
            output.addPage(bg_page)

        output.addMetadata({
            '/Title': str(title),
            '/Creator': 'pretix',
        })
        outbuffer = BytesIO()
        output.write(outbuffer)
        outbuffer.seek(0)
        return outbuffer
Exemple #36
0
        print('usage: pcut --help')
        sys.exit(1)

    try:
        with open(args.input, 'rb') as pdf_file:
            file_reader = PdfFileReader(pdf_file)

            # Let's check if start and end arguments make sense.
            file_pages_number = file_reader.getNumPages()
            if args.start > file_pages_number:
                print('{}: this file only has {} pages, cannot start cutting at page {}'.format(
                    sys.argv[0], file_pages_number, args.start))
                sys.exit(1)
            elif args.end > file_pages_number:
                print('{}: this file only has {} pages, cannot end cutting at page {}'.format(
                    sys.argv[0], file_pages_number, args.end))
                sys.exit(1)

            output_pdf = PdfFileWriter()
            for i in range(args.start, args.end + 1):
                page = file_reader.getPage(i)
                output_pdf.addPage(page)

            output_file = open(args.output, 'wb')
            output_pdf.write(output_file)
            output_file.close()
    except FileNotFoundError:
        print('{}: file \"{}\" not found'.format(sys.argv[0], args.input))
    except utils.PdfReadError:
        print('{}: that is not a PDF!'.format(sys.argv[0]))
Exemple #37
0
    #     print(f'This is the start of the output for: {files}')
    headerset = 0
    data_row = []
    pdf_file = open(f'{pdf_output_path}/{files}', 'rb')
    pdfreader = PdfFileReader(pdf_file)
    num_pages = pdfreader.numPages
    if num_pages < 23:
        up_limit = num_pages
    else:
        up_limit = 23
    dfset = 0
    county_added_list = []
    for pages in range(3, up_limit):

        datalines = 0
        page = pdfreader.getPage(pages)

        if page.extractText().find('Coronavirus: PUI testing by county') > 0 or page.extractText().find(
                'Coronavirus: All persons with tests reported') > 0:
            pdf_writer.addPage(page)
            page_text = page.extractText()
            col_num = 0
            index_num = 0
            for lines in page_text.split('\n'):
                line_n = len(page_text.split('\n'))
                write_line = 0
                if not re.match(text_line_search, lines, flags=0) and not re.match(date_search, lines,
                                                                                   flags=0) and datalines == 0 and lines not in county_list and headerset != 1:
                    header.append(lines)
                if re.match(date_search, lines, flags=0):
                    date = lines[0:12]
Exemple #38
0
def extractPages(nameList):
    global original
    all_tickets = PdfFileReader(ticketsD)
    all_tag = PdfFileReader(tagsD)

    for i in range(len(nameList)):
        c = canvas.Canvas("Mergeable.pdf")
        c.drawString(100, 740, nameList[i])
        c.showPage()
        c.save()
        watermark = PdfFileReader("Mergeable.pdf")
        watermarkpage = watermark.getPage(0)
        pdf = PdfFileReader("Traveler.pdf")
        pdfwrite = PdfFileWriter()
        pdfpage = pdf.getPage(0)
        pdfpage.mergePage(watermarkpage)
        pdfwrite.addPage(pdfpage)
        with open(nameList[i] + "WM.pdf", 'wb') as fh:
            pdfwrite.write(fh)

    pgnum = all_tickets.getNumPages()

    outloc = open('OutputLocation.txt', 'r')
    end = outloc.readline().strip()
    outloc.close()
    dat = open('date&default.txt', 'r')
    prevdate = dat.readline().strip()
    dat.close()

    doubleloc = False
    if not os.path.isdir(end + "/" + month_entry.get() + "_" +
                         day_entry.get() + "_" + year_entry.get()):
        dir = os.path.join(end + "/" + month_entry.get() + "_" +
                           day_entry.get() + "_" + year_entry.get())
        if not os.path.exists(dir):
            os.mkdir(dir)
    else:
        for i in range(20):
            if not os.path.isdir(end + "/" + month_entry.get() + "_" +
                                 day_entry.get() + "_" + year_entry.get() +
                                 "(" + str(i + 1) + ")"):
                dir = os.path.join(end + "/" + month_entry.get() + "_" +
                                   day_entry.get() + "_" + year_entry.get() +
                                   "(" + str(i + 1) + ")")
                if not os.path.exists(dir):
                    os.mkdir(dir)
                    puthere = i + 1
                    doubleloc = True
                    break

    for i in range(pgnum):
        cons = PdfFileReader(nameList[i] + 'WM.pdf')
        curr_ticket = all_tickets.getPage(i)
        curr_tag = all_tag.getPage(i)
        constant = cons.getPage(0)
        pdf_writer = PdfFileWriter()
        pdf_writer.addPage(curr_ticket)
        pdf_writer.addPage(curr_tag)
        pdf_writer.addPage(constant)
        with Path(str(nameList[i]) + ".pdf").open(mode="wb") as output_file:
            pdf_writer.write(output_file)

    for k in range(len(final)):
        for m in range(len(nameList)):
            if nameList[m][:8] in final[k]:
                win32api.ShellExecute(0, 'print',
                                      str(nameList[m]) + '.pdf',
                                      currentprinter, '.', 0)
                nameList[m] = nameList[m] + "$"
                print("Printed " + str(nameList[m]) + ".pdf")
    for m in range(len(nameList)):
        if nameList[m][-1] != "$":
            pass
            win32api.ShellExecute(0, 'print',
                                  str(nameList[m]) + '.pdf', currentprinter,
                                  '.', 0)
        else:
            nameList[m] = nameList[m][:-1]
    for i in range(len(nameList)):
        os.remove(str(nameList[i]) + "WM.pdf")
    isdone = "null"
    while isdone == "null":
        isdone = input(
            "Press Enter if Printing is Done (PDFs are no longer open on PC): "
        )
    #time.sleep(30)
    while True:
        try:
            for i in range(pgnum):
                if doubleloc == False:
                    try:
                        shutil.move(
                            (original + "/" + str(nameList[i]) + ".pdf"),
                            (end + "/" + month_entry.get() + "_" +
                             day_entry.get() + "_" + year_entry.get()))
                    except shutil.Error:
                        pass
                else:
                    try:
                        shutil.move(
                            (original + "/" + str(nameList[i]) + ".pdf"),
                            (end + "/" + month_entry.get() + "_" +
                             day_entry.get() + "_" + year_entry.get() + "(" +
                             str(puthere) + ")"))
                    except shutil.Error:
                        pass
            break
        except PermissionError:
            print("Waiting for printing")
# Creating pdf from existing pdf

from PyPDF2 import PdfFileWriter, PdfFileReader

pdf_writer = PdfFileWriter()
pdf_reader = PdfFileReader('sample.pdf')

for page in range(pdf_reader.numPages):
    obj = pdf_reader.getPage(page)
    pdf_writer.addPage(obj)

output_file = open('output.pdf', 'wb')
pdf_writer.write(output_file)
print('File created successfully')
Exemple #40
0
from PyPDF2 import PdfFileWriter, PdfFileReader

inputpdf = PdfFileReader(open("CERN.pdf", "rb"))

for i in range(inputpdf.numPages):
    output = PdfFileWriter()
    output.addPage(inputpdf.getPage(i))
    with open("page%s.pdf" % i, "wb") as outputStream:
        output.write(outputStream)
Exemple #41
0
def post_create(request):
    form = PostForm(request.POST or None, request.FILES or None)
    if form.is_valid():
        instance = form.save(commit=False)
        instance.save()
        folder.append(str(instance))
        try:
            os.makedirs(cropped_folder + folder[0])
        except:
            pass
        #print(type(instance.pdf)
        inputpdf = PdfFileReader(instance.pdf.open())
        for k in range(inputpdf.numPages):
            temp = []

            output = PdfFileWriter()
            output.addPage(inputpdf.getPage(k))
            f_name = dest_pdf + str(instance) + '-page-' + str(k + 1) + '.pdf'
            with open(f_name, "wb") as outputStream:
                output.write(outputStream)
            page = convert_from_path(f_name, dpi=100)

            fname = dest_png + str(instance) + '-page-' + str(k + 1) + '.png'

            #print("\n\nIMAGES ARE SPLIT\n\n")
            # Cropping the image
            page[0].save(fname, 'PNG')
            img = cv2.imread(fname)
            fn = crop(img, str(instance), k, cropped_folder + str(instance))
            #print("\n\nIMAGES ARE CROPPED\n\n")

            # Apply the model
            #print("\n\nRUNNING THE MODEL\n\n")
            command = "python3 {} --graph={} --image={}".format(
                src_label, src_graph, fn)

            test = subprocess.Popen(command,
                                    shell=True,
                                    stdout=subprocess.PIPE)

            output, err = test.communicate()
            output = output.decode("utf-8")
            output = output.split('\n')
            #print(output)
            f = fn.split('/')
            img_list.append(str(f[-1]))

            temp.append(str(f[-1]))
            s = output[3]
            c = s[0]
            temp.append(c)

            f = s[s.find("(") + 1:s.find(")")]
            num = f.split('=')[1]

            temp.append(num)

            if (float(num) > 0.9):
                temp.append("Valid")
            else:
                temp.append("ValidationRequired")

            main_csv.append(temp)
            #classes.append(output[3])
            classes.append(Encoder[c])
            #classes.append(Encoder[int(output[3].split()[0])])

        for file in os.listdir(pdfs_norm):
            f_path = os.path.join(pdfs_norm, file)
            try:
                if os.path.isfile(f_path):
                    os.unlink(f_path)
            except Exception as e:
                pass

        for file in os.listdir(dest_pdf):
            f_path = os.path.join(dest_pdf, file)
            try:
                if os.path.isfile(f_path):
                    os.unlink(f_path)
            except Exception as e:
                pass
        for file in os.listdir(dest_png):
            f_path = os.path.join(dest_png, file)
            try:
                if os.path.isfile(f_path):
                    os.unlink(f_path)
            except Exception as e:
                pass

        with open(cropped_folder + str(instance) + '/data.csv',
                  'w') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerows(main_csv)
        csvFile.close()

        messages.success(request, "Success!")

        # redirect() to a page
        return redirect('/after/')
        #return HttpResponse("<h1>Done successfully</h1>")
    context = {"form": form}
    return render(request, 'up_conv/base.html', context)
Exemple #42
0
from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger
import os


caminho_pdf = "pdf"
# novo_pdf = PyPDF2.PdfFileMerger()
#
# for root, dirs, files in os.walk(caminho_pdf):
#     for file in files:
#         camininho_completo = os.path.join(root, file)
#
#         arquivo_pdf = open(camininho_completo, "rb")
#         novo_pdf.append(arquivo_pdf)
#
#
# with open(f"{caminho_pdf}/novo_arquivo.pdf", "wb") as meu_novo_pdf:
#     novo_pdf.write(meu_novo_pdf)

with open("pdf/arquivo1.pdf", "rb") as arquivo_pdf:
    leitor = PdfFileReader(arquivo_pdf)
    num_paginhas = leitor.getNumPages()
    for num_paginha in range(num_paginhas):
        escritor = PdfFileWriter()
        pagina_atual = leitor.getPage(num_paginha)
        escritor.addPage(pagina_atual)

        with open(f"novos_pdf/{num_paginha}.pdf", "wb") as novo_pdf:
            escritor.write(novo_pdf)
Exemple #43
0
with open('Offer Letters Candidates.csv', 'r') as csvFile:
    reader = csv.reader(csvFile)
    for row in reader:
        name = row[1]
        date = row[0]
        #-------------------------------------------------
        packet = io.BytesIO()
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFillColorRGB(1, 1, 1)
        can.rect(95, 645, 120, 15, fill=1, stroke=0)
        can.rect(103, 689, 120, 20, fill=1, stroke=0)
        can.setFillColorRGB(0, 0, 0)
        can.setFont('Calibri', 12)
        can.drawString(97, 649, name)
        can.setFont('Calibri', 12)
        can.drawString(104, 689, date)
        can.save()
        #--------------------------------------------------
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        existing_pdf = PdfFileReader(open("Offer Letter Template.pdf", "rb"))
        output = PdfFileWriter()
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        outputStream = open("Offer Letter " + name + ".pdf", "wb")
        output.write(outputStream)
        outputStream.close()
csvFile.close()
pdf_path = (Path.home() / "creating-and-modifying-pdfs" / "practice_files" /
            "Pride_and_Prejudice.pdf")

pdf = PdfFileReader(str(pdf_path))

print(pdf.getNumPages())

print(pdf.documentInfo)

print(pdf.documentInfo.title)

# ---------------------------
# Extracting Text From a Page
# ---------------------------

first_page = pdf.getPage(0)

print(type(first_page))

print(first_page.extractText())

for page in pdf.pages:
    print(page.extractText())

# -----------------------
# Putting It All Together
# -----------------------

from pathlib import Path  # noqa
from PyPDF2 import PdfFileReader  # noqa
class pypdfProcessor(object):
    """ Create an instance of this class to open a PDF file, process the contents of
        each page and draw each one on demand using the Python pypdf package
    """    
    def __init__(self, parent, fileobj, showloadprogress):
        self.parent = parent
        self.showloadprogress = showloadprogress
        self.pdfdoc = PdfFileReader(fileobj)
        self.numpages = self.pdfdoc.getNumPages()
        page1 = self.pdfdoc.getPage(0)
        self.pagewidth = float(page1.mediaBox.getUpperRight_x())
        self.pageheight = float(page1.mediaBox.getUpperRight_y())
        self.pagedrawings = {}
        self.unimplemented = {}
        self.formdrawings = {}

    "These methods interpret the PDF contents as a set of drawing commands"

    def Progress(self, ptype, value):
        " This function is called at regular intervals during Drawfile"
        if ptype == 'start':
            msg = 'Reading pdf file'
            self.progbar = wx.ProgressDialog('Load file', msg, value, None,  
                         wx.PD_AUTO_HIDE|
                            wx.PD_ESTIMATED_TIME|wx.PD_REMAINING_TIME)
        elif ptype == 'progress':
            self.progbar.Update(value)
        elif ptype == 'end':
            self.progbar.Destroy()

    def DrawFile(self, frompage, topage):
        """ Build set of drawing commands from PDF contents. Ideally these could be drawn
            straight into a PseudoDC and the visible section painted directly into
            scrolled window, but we need to be able to zoom and scale the output quickly
            without having to rebuild the drawing commands (slow). So roll our
            own command lists, one per page, into self.pagedrawings.
        """  
        t0 = time.time()
        numpages_generated = 0
        rp = (self.showloadprogress and frompage == 0 and topage == self.numpages-1)
        if rp: self.Progress('start', self.numpages)
        for self.pageno in range(frompage, topage+1):
            self.gstate = pdfState()    # state is reset with every new page
            self.saved_state = []
            self.page = self.pdfdoc.getPage(self.pageno)
            numpages_generated += 1
            pdf_fonts = self.FetchFonts(self.page)
            self.pagedrawings[self.pageno] = self.ProcessOperators(
                                    self.page.extractOperators(), pdf_fonts)    
            if rp: self.Progress('progress', numpages_generated)

        ## print 'Pages %d to %d. %d pages created in %.2f seconds' % (
        ##           frompage, topage, numpages_generated,(time.time()-t0))
        if rp: self.Progress('end', None)
        self.parent.GoPage(frompage)

    def RenderPage(self, gc, pageno, scale=None):
        """ Render the set of pagedrawings
            In a pdf file, bitmaps are treated as being of unit width and height and
            are scaled via a previous ConcatTransform containing the corresponding width 
            and height as scale factors. wx.GraphicsContext/Cairo appear not to respond to  
            this so scaling is removed from transform and width & height are added
            to the Drawbitmap call.
        """    
        drawdict = {'ConcatTransform': gc.ConcatTransform,
                    'PushState': gc.PushState,
                    'PopState': gc.PopState,
                    'SetFont': gc.SetFont,
                    'SetPen': gc.SetPen,
                    'SetBrush': gc.SetBrush,
                    'DrawText': gc.DrawText,
                    'DrawBitmap': gc.DrawBitmap,
                    'CreatePath': gc.CreatePath,
                    'DrawPath': gc.DrawPath }
        for drawcmd, args, kwargs in self.pagedrawings[pageno]:
            if drawcmd == 'ConcatTransform':
                cm = gc.CreateMatrix(*args, **kwargs)
                args = (cm,)
            if drawcmd == 'CreatePath':
                gp = drawdict[drawcmd](*args, **kwargs)
                continue
            elif drawcmd == 'DrawPath':
                args = (gp, args[1])
            if drawcmd in drawdict:
                drawdict[drawcmd](*args, **kwargs)
            else:
                pathdict = {'MoveToPoint': gp.MoveToPoint,
                            'AddLineToPoint': gp.AddLineToPoint,
                            'AddCurveToPoint': gp.AddCurveToPoint,
                            'AddRectangle': gp.AddRectangle,
                            'CloseSubpath': gp.CloseSubpath }
                if drawcmd in pathdict:    
                    pathdict[drawcmd](*args, **kwargs)

    def FetchFonts(self, currentobject):
        " Return the standard fonts in current page or form"
        pdf_fonts = {}
        try:
            fonts = currentobject["/Resources"].getObject()['/Font']
            for key in fonts:
                pdf_fonts[key] = fonts[key]['/BaseFont'][1:]     # remove the leading '/'
        except KeyError:
            pass
        return pdf_fonts

    def ProcessOperators(self, opslist, pdf_fonts):
        " Interpret each operation in opslist and return in drawlist"
        drawlist = []
        path = []
        for operand, operator in opslist :
            g = self.gstate
            if operator == 'cm':        # new transformation matrix
                # some operands need inverting because directions of y axis
                # in pdf and graphics context are opposite
                a, b, c, d, e, f = map(float, operand)
                drawlist.append(['ConcatTransform', (a, -b, -c, d, e, -f), {}])
            elif operator == 'q':       # save state
                self.saved_state.append(copy.deepcopy(g))
                drawlist.append(['PushState', (), {}])
            elif operator == 'Q':       # restore state
                self.gstate = self.saved_state.pop()
                drawlist.append(['PopState', (), {}])
            elif operator == 'RG':      # Stroke RGB
                rs, gs, bs = [int(v*255) for v in map(float, operand)]
                g.strokeRGB = wx.Colour(rs, gs, bs)
            elif operator == 'rg':      # Fill RGB
                rf, gf, bf = [int(v*255) for v in map(float, operand)]
                g.fillRGB = wx.Colour(rf, gf, bf)
            elif operator == 'K':       # Stroke CMYK
                rs, gs, bs = self.ConvertCMYK(operand)
                g.strokeRGB = wx.Colour(rs, gs, bs)
            elif operator == 'k':       # Fill CMYK
                rf, gf, bf = self.ConvertCMYK(operand)
                g.fillRGB = wx.Colour(rf, gf, bf)
            elif operator == 'w':       # Line width
                g.lineWidth = float(operand[0])
            elif operator == 'J':       # Line cap
                ix = float(operand[0])
                g.lineCapStyle = {0: wx.CAP_BUTT, 1: wx.CAP_ROUND,
                                              2: wx.CAP_PROJECTING}[ix]
            elif operator == 'j':       # Line join
                ix = float(operand[0])
                g.lineJoinStyle = {0: wx.JOIN_MITER, 1: wx.JOIN_ROUND,
                                              2: wx.JOIN_BEVEL}[ix]
            elif operator == 'd':       # Line dash pattern
                g.lineDashArray = map(int, operand[0])
                g.lineDashPhase = int(operand[1])
            elif operator in ('m', 'c', 'l', 're', 'v', 'y', 'h'):    # path defining ops
                path.append([map(float, operand), operator])
            elif operator in ('b', 'B', 'b*', 'B*', 'f', 'F', 'f*',
                                           's', 'S', 'n'):    # path drawing ops
                drawlist.extend(self.DrawPath(path, operator))
                path = []
            elif operator == 'BT':      # begin text object
                g.textMatrix = [1, 0, 0, 1, 0, 0]
                g.textLineMatrix = [1, 0, 0, 1, 0, 0]
            elif operator == 'ET':      # end text object
                continue
            elif operator == 'Tm':      # text matrix
                g.textMatrix = map(float, operand)
                g.textLineMatrix = map(float, operand)
            elif operator == 'TL':      # text leading
                g.leading = float(operand[0])
            #elif operator == 'Tc':     # character spacing
            #    g.charSpacing = float(operand[0])
            elif operator == 'Tw':      # word spacing
                g.wordSpacing = float(operand[0])
            elif operator == 'Ts':      # super/subscript
                g.textRise = float(operand[0])
            elif operator == 'Td':      # next line via offsets
                g.textLineMatrix[4] += float(operand[0])
                g.textLineMatrix[5] += float(operand[1])
                g.textMatrix = copy.copy(g.textLineMatrix)
            elif operator == 'T*':      # next line via leading
                g.textLineMatrix[4] += 0
                g.textLineMatrix[5] -= g.leading if g.leading is not None else 0
                g.textMatrix = copy.copy(g.textLineMatrix)
            elif operator == 'Tf':      # text font
                g.font = pdf_fonts[operand[0]]
                g.fontSize = float(operand[1])
            elif operator == 'Tj':      # show text
                drawlist.extend(self.DrawTextString(operand[0]))
            elif operator == 'Do':      # invoke named XObject
                dlist = self.InsertXObject(operand[0])
                if dlist:               # may be unimplemented decode
                    drawlist.extend(dlist)
            elif operator == 'INLINE IMAGE':    # special pyPdf case + operand is a dict
                dlist = self.InlineImage(operand)
                if dlist:               # may be unimplemented decode
                    drawlist.extend(dlist)
            else:                       # report once
                if operator not in self.unimplemented:
                    if VERBOSE: print 'PDF operator %s is not implemented' % operator
                    self.unimplemented[operator] = 1

        # Fix bitmap transform. Remove the scaling from any transform matrix that precedes
        # a DrawBitmap operation as the scaling is now done in that operation.
        for k in range(len(drawlist)-1):
            if drawlist[k][0] == 'ConcatTransform' and drawlist[k+1][0] == 'DrawBitmap':
                args = list(drawlist[k][1])
                args[0] = 1.0
                args[3] = 1.0
                drawlist[k][1] = tuple(args)
        return drawlist            

    def SetFont(self, pdfont, size):
        """ Returns wx.Font instance from supplied pdf font information """
        self.knownfont = True
        pdfont = pdfont.lower()
        if pdfont.count('courier'):
            family = wx.FONTFAMILY_MODERN 
            font = 'Courier New'
        elif pdfont.count('helvetica'):
            family = wx.FONTFAMILY_SWISS 
            font = 'Arial'
        elif pdfont.count('times'):
            family = wx.FONTFAMILY_ROMAN 
            font = 'Times New Roman'
        elif pdfont.count('symbol'):
            family = wx.FONTFAMILY_DEFAULT 
            font = 'Symbol'
        elif pdfont.count('zapfdingbats'):
            family = wx.FONTFAMILY_DEFAULT 
            font = 'Wingdings'
        else:
            if VERBOSE: print 'Unknown font %s' % pdfont
            self.knownfont = False
            family = wx.FONTFAMILY_SWISS 
            font = 'Arial'
           
        weight = wx.FONTWEIGHT_NORMAL         
        if pdfont.count('bold'):
            weight = wx.FONTWEIGHT_BOLD 
        style = wx.FONTSTYLE_NORMAL
        if pdfont.count('oblique') or pdfont.count('italic'):
            style = wx.FONTSTYLE_ITALIC
        return wx.Font(max(1,size), family, style, weight, faceName=font)

    def DrawTextString(self, text): 
        "word spacing only works for horizontal text (??)"
        dlist = []
        g = self.gstate
        f  = self.SetFont(g.font, g.fontSize*self.parent.font_scale)
        dlist.append(['SetFont', (f, g.fillRGB), {}])
        if g.wordSpacing > 0:
            textlist = text.split(' ')
        else:
            textlist = [text,]
        for item in textlist:
            dlist.append(self.DrawTextItem(item, f))
        return dlist    

    def DrawTextItem(self, textitem, f):
        dc = wx.ClientDC(self.parent)      # dummy dc for text extents 
        g = self.gstate
        x = g.textMatrix[4]
        y = g.textMatrix[5] + g.textRise
        if g.wordSpacing > 0:
            textitem += ' '
        wid, ht, descend, xlead = dc.GetFullTextExtent(textitem, f)
        if have_rlwidth and self.knownfont:   # use ReportLab stringWidth if available 
            width = stringWidth(textitem, g.font, g.fontSize)
        else:
            width = wid
        g.textMatrix[4] += (width + g.wordSpacing)  # update current x position
        return ['DrawText', (textitem, x, -y-(ht-descend)), {}]

    def DrawPath(self, path, action):
        """ Stroke and/or fill the defined path depending on operator """
        dlist = []
        g = self.gstate
        acts = {'S':  (1, 0, 0),
                's':  (1, 0, 0),
                'f':  (0, 1, wx.WINDING_RULE),
                'F':  (0, 1, wx.WINDING_RULE),
                'f*': (0, 1, wx.ODDEVEN_RULE),
                'B':  (1, 1, wx.WINDING_RULE),
                'B*': (1, 1, wx.ODDEVEN_RULE),
                'b':  (1, 1, wx.WINDING_RULE),
                'b*': (1, 1, wx.ODDEVEN_RULE),
                'n':  (0, 0, 0) }
        stroke, fill, rule = acts[action]
        if action in ('s', 'b', 'b*'):
            path.append([[], 'h'])      # close path

        if stroke:
            if g.lineDashArray:
                style = wx.USER_DASH
            else:
                style = wx.SOLID
            cpen = wx.Pen(g.strokeRGB, g.lineWidth, style)
            cpen.SetCap(g.lineCapStyle)
            cpen.SetJoin(g.lineJoinStyle)
            if g.lineDashArray:
                cpen.SetDashes(g.lineDashArray)
            dlist.append(['SetPen', (cpen,), {}])
        else:
            dlist.append(['SetPen', (wx.TRANSPARENT_PEN,), {}])

        if fill:    
            dlist.append(['SetBrush', (wx.Brush(g.fillRGB),), {}])
        else:   
            dlist.append(['SetBrush', (wx.TRANSPARENT_BRUSH,), {}])

        dlist.append(['CreatePath', (), {}]) 
        for xylist, op in path:
            if op == 'm':           # move (to) current point
                x0 = xc = xylist[0]
                y0 = yc = -xylist[1]
                dlist.append(['MoveToPoint', (x0, y0), {}])
            elif op == 'l':         # draw line
                x2 = xylist[0]
                y2 = -xylist[1]
                dlist.append(['AddLineToPoint', (x2, y2), {}])
                xc = x2
                yc = y2
            elif op == 're':        # draw rectangle (x,y at top left)
                x = xylist[0]
                y = -xylist[1]
                w = xylist[2]
                h = xylist[3]
                dlist.append(['AddRectangle', (x, y-h, w, h), {}])
            elif op in ('c', 'v', 'y'):         # draw Bezier curve
                args = []
                if op == 'v':
                    args.extend([xc, yc])
                args.extend([xylist[0], -xylist[1], 
                                xylist[2], -xylist[3]])
                if op == 'y':
                    args.extend([xylist[2], -xylist[3]])
                if op == 'c':
                    args.extend([xylist[4], -xylist[5]]) 
                dlist.append(['AddCurveToPoint', args, {}])
            elif op == 'h':
                dlist.append(['CloseSubpath', (), {}])
        dlist.append(['DrawPath', ('GraphicsPath', rule), {}]) 
        return dlist

    def InsertXObject(self, name):
        " XObject can be an image or a 'form' (an arbitrary PDF sequence) "
        dlist = []
        xobject = self.page["/Resources"].getObject()['/XObject']
        stream = xobject[name]
        if stream.get('/Subtype') == '/Form':
            # insert contents into current page drawing
            if not name in self.formdrawings:       # extract if not already done
                pdf_fonts = self.FetchFonts(stream)
                bbox = stream.get('/BBox')
                matrix = stream.get('/Matrix')
                form_ops = ContentStream(stream, self.pdfdoc).operations
                oplist = [([], 'q'), (matrix, 'cm')]    # push state & apply matrix
                oplist.extend(form_ops)                 # add form contents
                oplist.append(([], 'Q'))                # restore original state
                self.formdrawings[name] = self.ProcessOperators(oplist, pdf_fonts)
            dlist.extend(self.formdrawings[name])
        elif stream.get('/Subtype') == '/Image':
            width = stream['/Width'] 
            height = stream['/Height']
            depth = stream['/BitsPerComponent']
            filters = stream["/Filter"]
            item = self.AddBitmap(stream._data, width, height, filters)
            if item:            # may be unimplemented
                dlist.append(item)
        return dlist

    def InlineImage(self, operand):
        " operand contains an image"
        dlist = []
        data = operand.get('data')
        settings = operand.get('settings')
        width = settings['/W'] 
        height = settings['/H']
        depth = settings['/BPC']
        filters = settings['/F']
        item = self.AddBitmap(data, width, height, filters)
        if item:            # may be unimplemented
            dlist.append(item)
        return dlist

    def AddBitmap(self, data, width, height, filters):
        "Add wx.Bitmap from data, processed by filters"
        if '/A85' in filters or '/ASCII85Decode' in filters:
            data = _AsciiBase85DecodePYTHON(data)
        if '/Fl' in filters or '/FlateDecode' in filters:
            data = FlateDecode.decode(data, None)
        if '/CCF' in filters or  '/CCITTFaxDecode' in filters: 
            if VERBOSE:
                print 'PDF operation /CCITTFaxDecode is not implemented'
            return []
        if '/DCT' in filters or '/DCTDecode' in filters:
            stream = cStringIO.StringIO(data)
            image = wx.ImageFromStream(stream, wx.BITMAP_TYPE_JPEG)
            bitmap = wx.BitmapFromImage(image)
        else:    
            bitmap = wx.BitmapFromBuffer(width, height, data)
        return ['DrawBitmap', (bitmap, 0, 0-height, width, height), {}]

    def ConvertCMYK(self, operand):
        "Convert CMYK values (0 to 1.0) in operand to nearest RGB"
        c, m, y, k = operand
        r = round((1-c)*(1-k)*255)
        b = round((1-y)*(1-k)*255)
        g = round((1-m)*(1-k)*255)
        return (r, g, b)
Exemple #46
0
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 29 16:05:03 2019

@author: Administrator
"""

import io
from PyPDF2 import PdfFileReader, PdfFileWriter
from wand.image import Image

pdfile = PdfFileReader('E:\\YJZ\work\\化工安全\\M02模块送审\\修改\\904.pdf')
pageobj = pdfile.getPage(0)
dst_pdf = PdfFileWriter()
dst_pdf.addPage(pageobj)
pdf_bytes = io.BytesIO()
dst_pdf.write(pdf_bytes)
pdf_bytes.seek(0)
img = Image(file=pdf_bytes, resolution=500)
img.format = 'jpg'
img.save(filename='2.jpg')
img.destroy()
Exemple #47
0
def send_mail(letter_bureau, client):
    if client.id_proof is None:
        print(FileNotFoundError('Cannot find id proof of client'))
        return

    url = "http://127.0.0.1:8000" + reverse("grid_url:media_url",
                                            args={client.id_proof})
    print(url)
    page = urlopen(url)
    content_type = page.headers.get("content-type")
    extension = content_type.split("/")[-1]

    f = page.read()
    f = BytesIO(f)
    file_name = os.path.join(
        os.path.dirname(os.path.abspath(__file__)),
        "id_" + "".join(
            secrets.choice(string.ascii_uppercase + string.digits)
            for _ in range(15)),
    )

    if content_type == "application/pdf":
        reader = PdfFileReader(f)
        writer = PdfFileWriter()

        for pageNum in range(reader.getNumPages()):
            currentPage = reader.getPage(pageNum)
            writer.addPage(currentPage)

        file_name += f".{extension}"
        outputStream = open(file_name, "wb")
        writer.write(outputStream)
        outputStream.close()
    else:
        image = Image.open(f)
        file_name += f".{extension}"
        try:
            image.save(file_name)
        except OSError:
            new_image = image.convert("RGB")
            new_image.save(file_name)
    try:
        html = open(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         "letter_email_template.html"),
            "r",
            encoding="utf-8",
        )
    except:
        print(FileNotFoundError('Cannot find email template'))
        return

    bureau_email = Bureau.objects.filter(_id=ObjectId(
        get_id_from_url(letter_bureau.bureau_url))).values("email")[0]["email"]

    letter_path = os.path.join(
        os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
        "media",
        str(letter_bureau.pdf_file),
    )
    yag = yagmail.SMTP("*****@*****.**")
    yag.send(
        bureau_email,
        f"Request to solve dispute of {client.full_name}",
        html.read(),
        [
            file_name,
            letter_path,
        ],
    )

    if os.path.exists(file_name):
        os.remove(file_name)
#
# def split_by_num(filename, nums, password=None):
filename = r'F:\研一下\量化投资资料\量化教材\Hands-On_Machine_Learning_for_Algorithmic_Trading.pdf'
pdf_reader = PdfFileReader(open(filename, mode='rb' ))
pages = pdf_reader.getNumPages()
outline = pdf_reader.getOutlines()
outlinchapter = []
outlinepage = [i+18 for i in [8,33,65,88,119,147,175,224,260,284,312,351,389,418,441,458]]
for o in outline:
    res = re.findall(r"'/Title': '(.*?)', '/Page': IndirectObject\((.*?), 0\)",str(o),re.S)
    if 'Chapter' in res[0][0]:
        outlinchapter.append(res[0][0])
#print(list(outlinedict[0].keys())[0],list(outlinedict[0].values())[0])
outlinedict =[{i[0]:i[1]} for i in zip(outlinchapter,outlinepage)]


for i in range(len(outlinedict)+1):
    pdf_writer = PdfFileWriter()
    split_pdf_name = list(outlinedict[i].keys())[0].replace(':','') + '.pdf'
    start = list(outlinedict[i].values())[0]
    end = list(outlinedict[i+1].values())[0]
    print(split_pdf_name)
    for i in range(int(start), int(end)):
        pdf_writer.addPage(pdf_reader.getPage(i))
    with open(split_pdf_name,'wb') as out:
        pdf_writer.write(out)



Exemple #49
0
from PyPDF2 import PdfFileReader, PdfFileWriter

pdf_document = "source/Computer-Vision-Resources.pdf"
pdf = PdfFileReader(pdf_document)

for page in range(pdf.getNumPages()):
    pdf_writer = PdfFileWriter()
    current_page = pdf.getPage(page)
    pdf_writer.addPage(current_page)

    outputFilename = "dist/Computer-Vision-Resources-page-{}.pdf".format(page +
                                                                         1)
    with open(outputFilename, "wb") as out:
        pdf_writer.write(out)

        print("created", outputFilename)
Exemple #50
0
def main():
    """Where it all began."""

    parser = argparse.ArgumentParser()
    parser.add_argument("video",
                        type=argparse.FileType('rb'),
                        help="video of the speaker")
    parser.add_argument("slides",
                        type=argparse.FileType('rb'),
                        help="slides in pdf format")
    parser.add_argument("output",
                        type=argparse.FileType('wb'),
                        help="superimposed video output file")
    parser.add_argument("-n",
                        "--dry-run",
                        action='store_true',
                        help="don't run the final encoding pass")
    parser.add_argument("--height",
                        type=int,
                        help="height of output video (defaults to 1080)",
                        default=1080)
    parser.add_argument("--crop", help="box to crop video from (w:h:x:y)")
    parser.add_argument("--fraction",
                        type=float,
                        help="size of speaker box relative to video",
                        default=1 / 3.0)
    parser.add_argument(
        "-t",
        type=argparse.FileType('r', encoding='UTF-8'),
        help="path to the file that specifies the slide transitions",
        default='transitions.txt')
    parser.add_argument("--end", help="timestamp to end video at")
    parser.add_argument(
        'remaining',
        nargs=argparse.REMAINDER,
        help="additional arguments to pass to ffmpeg as output options")
    args = parser.parse_args()
    args.video.close()
    args.output.close()

    slides = tempfile.TemporaryDirectory("slides")
    segments = tempfile.TemporaryDirectory("segments")
    segment_list = tempfile.NamedTemporaryFile('w')

    # get all the transitions
    transitions = []
    with open('transitions.txt', 'r') as t:
        for line in t.readlines():
            line = line.strip()
            if line.startswith('#'):
                continue
            fields = line.split()
            if len(fields) >= 2:
                transitions.append((fields[0], int(fields[1])))

    # split the pdf
    inputpdf = PdfFileReader(args.slides)
    size = None
    for i in range(inputpdf.numPages):
        page = inputpdf.getPage(i)
        if size is None:
            size = page.mediaBox
        elif size != page.mediaBox:
            print("pdf page sizes differ.")
            sys.exit(1)
            return
        output = PdfFileWriter()
        output.addPage(page)
        with open("%s/%d.pdf" % (slides.name, i + 1), "wb") as outputStream:
            output.write(outputStream)

    if size is None:
        print("no slides?")
        sys.exit(1)
        return

    # get info about the video
    end = int(
        float(
            subprocess.run([
                "ffprobe", "-v", "error", "-show_entries", "format=duration",
                "-of", "default=noprint_wrappers=1:nokey=1", args.video.name
            ],
                           capture_output=True).stdout.strip()))
    if args.end:
        parts = args.end.split(":")
        s = int(parts.pop())
        if len(parts) != 0:
            s += int(parts.pop()) * 60
        if len(parts) != 0:
            s += int(parts.pop()) * 60 * 60
        end = s
    fps = subprocess.run([
        "ffprobe", "-v", "error", "-select_streams", "V", "-show_entries",
        "stream=r_frame_rate", "-of", "default=noprint_wrappers=1:nokey=1",
        args.video.name
    ],
                         capture_output=True,
                         encoding="UTF-8").stdout.strip()
    transitions.append((end, transitions[-1][1]))

    # h * 192 / 72 = 1080
    # h * r / 72 = target_h
    # r = 72 * target_h / h
    height = args.height
    # search for next pdf scale that produces divisible-by-two width and height
    while True:
        pdf_scale = int(72.0 * args.height /
                        float(size.upperLeft[1] - size.lowerLeft[1]))
        width = float(size.upperRight[0] -
                      size.upperLeft[0]) * pdf_scale / 72.0
        # print(width, height, pdf_scale)
        if math.ceil(width) % 2 == 0:
            break
        height += 1

    print("pdf page size is", size.lowerRight[0], "by", size.upperRight[1],
          "and will be scaled with DPI", pdf_scale)
    print("output will be %dx%d, and %s long (at %s fps)" %
          (width, height, pretty_time_delta(end), fps))
    print("transitions:")
    print("\n".join([
        " - slide % 3d @ %s" % (slide, time)
        for (time, slide) in transitions[:-1]
    ]))

    print("---")
    print("==> producing slide video segments")

    i = 0
    since = 0
    show = None
    l_trim = None
    for (time, slide) in transitions:
        if type(time) == type(end):
            s = time
        else:
            parts = time.split(":")
            s = int(parts.pop())
            if len(parts) != 0:
                s += int(parts.pop()) * 60
            if len(parts) != 0:
                s += int(parts.pop()) * 60 * 60

        # non-zero start time (maybe) implies we should trim the beginning
        if l_trim is None:
            l_trim = s
            # note that _in theory_ we should adjust all the slide transition
            # timestamps back by this amount, but in practice this isn't
            # necessary since we only really care about the _differences_
            # between them. the one exception to this is the very last
            # timestamp, since it is one we artificially set to the end time of
            # the video. if the video is trimmed at the beginning, the last
            # slide should also end correspondingly sooner
            transitions[-1] = (transitions[-1][0] - l_trim, transitions[-1][1])

        if since >= s:
            print(
                "slide times do not monotonically increase (%s came after %s)"
                % (pretty_time_delta(s), pretty_time_delta(since)))
            sys.exit(1)
            return

        if s > end:
            if since >= end:
                # no point in encoding things after this
                break
            # run this slide until the end time
            s = end

        # create a png for the slide we're supposed to show
        subprocess.run([
            "pdftoppm", "-singlefile", "-png", "-r",
            "%d" % pdf_scale,
            "%s/%d.pdf" % (slides.name, slide),
            "%s/%d" % (segments.name, i)
        ])

        if show is not None:
            # loop that png frame for the currently shown frame until this time
            print(" -> loop slide %d for %s" %
                  (i - 1, pretty_time_delta(s - since)))
            subprocess.run([
                "ffmpeg", "-loglevel", "error", "-f", "image2", "-loop", "1",
                "-framerate", "1", "-pattern_type", "none", "-i",
                "%s/%d.png" % (segments.name, i - 1), "-r", "1", "-t",
                "%d" % (s - since), "-vcodec", "png", "-an",
                "%s/%d.mov" % (segments.name, i - 1)
            ])
            segment_list.write("file '%s/%d.mov'\n" % (segments.name, i - 1))

        since = s
        show = slide
        i += 1

    filter_complex = []
    filter_complex.append("[1] fps=%s [slides]" % fps)

    if args.crop is None:
        filter_complex.append("[0] scale=-1:%d [pip]" %
                              (int(height * args.fraction)))
    else:
        filter_complex.append("[0] crop=%s,scale=-1:%d [pip]" %
                              (args.crop, int(height * args.fraction)))

    filter_complex.append(
        "[slides][pip] overlay=main_w-overlay_w-10:main_h-overlay_h-10")

    segment_list.flush()

    print("==> producing slide video")
    subprocess.run([
        "ffmpeg", "-y", "-loglevel", "error", "-f", "concat", "-safe", "0",
        "-i", segment_list.name, "-c", "copy",
        "%s/all.mov" % segments.name
    ])

    if args.dry_run:
        copyfile("%s/all.mov" % segments.name, "all.mov")

    encoding_args = [
        "ffmpeg", "-ss",
        "%d" % l_trim, "-to",
        "%d" % end, "-i", args.video.name, "-i",
        "all.mov" if args.dry_run else "%s/all.mov" % segments.name,
        "-filter_complex", "; ".join(filter_complex), "-pix_fmt", "yuv420p",
        "-r",
        "%s" % fps, *args.remaining, args.output.name
    ]

    print("==> superimposing video onto slides")
    if args.dry_run:
        print(" -> would run:")
        print(encoding_args)
        print(" -> but skipping since this is a dry run.")
    else:
        subprocess.run(encoding_args)
Exemple #51
0
# doc = fitz.open(pdf_document)
# print("Исходный документ: ", doc)
# print("\nКоличество страниц: %i\n\n------------------\n\n" % doc.pageCount)
# print(doc.metadata)
#
# for current_page in range(5,6):
#     page = doc.loadPage(current_page)
#     page_text = page.getText()
#     print("Стр. ", current_page+1, "\n\nСодержание;\n")
#     print(page_text)

from PyPDF2 import PdfFileReader
#
# pdf_document = "Sobitiya.pdf"
# with open(pdf_document, "rb") as f:
#    pdf = PdfFileReader(f)
#    info = pdf.getDocumentInfo()
#    pages = pdf.getNumPages()
#    print (info)
#    print ("number of pages: %i" % pages)
#    page1 = pdf.getPage(0)
#    print(page1)
#    print(page1.extractText())

pgs = open('muzika.pdf', 'rb')
read_pdf = PdfFileReader(pgs)
number = read_pdf.getNumPages()
page = read_pdf.getPage(6)
page_content = page.extractText()
print(page_content.encode('utf-8'))
Exemple #52
0
    def draw_pdf(self, blank_filename, filename):
        tmp_pdf = io.BytesIO()
        can = canvas.Canvas(tmp_pdf)
        can.setFontSize(7)

        # Box first 4 winners
        xs = [154, 260, 452, 560]
        y = 490
        h = 79
        w = 10
        for game, x in zip(
                self.bracket_heap[ROUND_SECTIONS[1][0]:ROUND_SECTIONS[1][1]],
                xs):
            if game.winner.team_id in TOP_ROW_TEAM_IDS:
                can.rect(x, y, h, w)
            else:
                can.rect(x, y - 14, 79, 10)

        bracket_pdf_points = {
            # left start point, right start point, dy val, switch value
            1: ((0, 463), (763, 463), 25, 16),
            2: ((127, 456), (607, 456), 29, 16),
            3: ((187, 442), (555, 442), 58, 8),
            4: ((237, 413), (507, 413), 116, 4),
            5: ((286, 357), (460, 357), 234, 2),
            6: ((290, 235), (442, 235), 0, 1)
        }

        # Put win prob on first round
        x, y = bracket_pdf_points[1][0]
        dy = 25
        switch = bracket_pdf_points[1][3]
        for i in range(ROUND_SECTIONS[2][0], ROUND_SECTIONS[2][1], 1):
            if switch == 0:
                x, y = bracket_pdf_points[1][1]
                switch = bracket_pdf_points[1][3]
                dy = 25
            can.drawString(x, y, f"({self.bracket_heap[i].win_prob:.2%})")
            can.drawString(x, y - dy / 2,
                           f"({1 - self.bracket_heap[i].win_prob:.2%})")
            y -= dy
            if switch == bracket_pdf_points[1][3] / 2:
                y -= 10
                dy = 33
            switch -= 1

        # Fill in rounds
        for r in range(2, 7):
            x, y = bracket_pdf_points[r][0]
            switch = bracket_pdf_points[r][3]
            for i in range(ROUND_SECTIONS[r][0], ROUND_SECTIONS[r][1]):
                if switch == 0:
                    x, y = bracket_pdf_points[r][1]
                    switch = bracket_pdf_points[r][3]
                win_prob = self.bracket_heap[(i - 1) // 2].win_prob
                if self.bracket_heap[
                    (i - 1) // 2].teams[1] == self.bracket_heap[i].winner:
                    win_prob = 1 - win_prob
                can.drawString(
                    x, y,
                    str(self.bracket_heap[i].winner) + f" ({win_prob:.2%})")
                y -= bracket_pdf_points[r][2]
                switch -= 1
        # Fill in winner
        can.drawString(363, 235, str(self.bracket_heap[0].winner))

        can.save()

        watermark = PdfFileReader(tmp_pdf)

        out_pdf = PdfFileWriter()
        blank_pdf = PdfFileReader(open(blank_filename, 'rb'))

        blank_page = blank_pdf.getPage(0)
        blank_page.mergePage(watermark.getPage(0))
        out_pdf.addPage(blank_page)

        with open(filename, 'wb') as out_stream:
            out_pdf.write(out_stream)
# how to merge pdf files so that each file begins on an odd page number?
#
# http://unix.stackexchange.com/a/66455

import sys

from PyPDF2 import PdfFileWriter, PdfFileReader


alignment = 2           # to align on even pages


output = PdfFileWriter()
output_page_number = 0
for filename in sys.argv[1:]:
    inpdf = PdfFileReader(open(filename, 'rb'))

    pages = [inpdf.getPage(i) for i in range(0, inpdf.getNumPages())]
    for p in pages:
        output.addPage(p)
        output_page_number += 1

    # blank pages until next alignment boundary
    while output_page_number % alignment != 0:
        output.addBlankPage()
        output_page_number += 1

# speedbump:  on python2, sys.stdout is opened as text, NOT binary
# clean this up sometime...
output.write(sys.stdout)
from PyPDF2 import PdfFileReader as PdfReader, PdfFileWriter as PdfWriter

pdf_obj = open('Ch13/meetingminutes.pdf', 'rb')
pdf_reader = PdfReader(pdf_obj)
print(pdf_reader.numPages) # Output: 19

page_obj = pdf_reader.getPage(0)
print(page_obj.extractText())
Exemple #55
0
    def generate(self, op):
        from reportlab.graphics.shapes import Drawing
        from reportlab.pdfgen import canvas
        from reportlab.lib import pagesizes, units
        from reportlab.graphics.barcode.qr import QrCodeWidget
        from reportlab.graphics import renderPDF
        from PyPDF2 import PdfFileWriter, PdfFileReader

        order = op.order

        pagesize = self.settings.get('pagesize', default='A4')
        if hasattr(pagesizes, pagesize):
            pagesize = getattr(pagesizes, pagesize)
        else:
            pagesize = pagesizes.A4
        orientation = self.settings.get('orientation', default='portrait')
        if hasattr(pagesizes, orientation):
            pagesize = getattr(pagesizes, orientation)(pagesize)

        buffer = BytesIO()
        p = canvas.Canvas(buffer, pagesize=pagesize)

        event_s = self.settings.get('event_s', default=22, as_type=float)
        if event_s:
            p.setFont("Helvetica", event_s)
            event_x = self.settings.get('event_x', default=15, as_type=float)
            event_y = self.settings.get('event_y', default=235, as_type=float)
            p.drawString(event_x * units.mm, event_y * units.mm,
                         str(self.event.name))

        order_s = self.settings.get('order_s', default=17, as_type=float)
        if order_s:
            p.setFont("Helvetica", order_s)
            order_x = self.settings.get('order_x', default=15, as_type=float)
            order_y = self.settings.get('order_y', default=220, as_type=float)
            p.drawString(order_x * units.mm, order_y * units.mm,
                         _('Order code: {code}').format(code=order.code))

        name_s = self.settings.get('name_s', default=17, as_type=float)
        if name_s:
            p.setFont("Helvetica", name_s)
            name_x = self.settings.get('name_x', default=15, as_type=float)
            name_y = self.settings.get('name_y', default=210, as_type=float)
            item = str(op.item.name)
            if op.variation:
                item += " – " + str(op.variation)
            p.drawString(name_x * units.mm, name_y * units.mm, item)

        price_s = self.settings.get('price_s', default=17, as_type=float)
        if price_s:
            p.setFont("Helvetica", price_s)
            price_x = self.settings.get('price_x', default=15, as_type=float)
            price_y = self.settings.get('price_y', default=200, as_type=float)
            p.drawString(price_x * units.mm, price_y * units.mm,
                         "%s %s" % (str(op.price), self.event.currency))

        qr_s = self.settings.get('qr_s', default=80, as_type=float)
        if qr_s:
            reqs = qr_s * units.mm
            qrw = QrCodeWidget(op.secret, barLevel='H')
            b = qrw.getBounds()
            w = b[2] - b[0]
            h = b[3] - b[1]
            d = Drawing(reqs, reqs, transform=[reqs / w, 0, 0, reqs / h, 0, 0])
            d.add(qrw)
            qr_x = self.settings.get('qr_x', default=10, as_type=float)
            qr_y = self.settings.get('qr_y', default=120, as_type=float)
            renderPDF.draw(d, p, qr_x * units.mm, qr_y * units.mm)

        code_s = self.settings.get('code_s', default=11, as_type=float)
        if code_s:
            p.setFont("Helvetica", code_s)
            code_x = self.settings.get('code_x', default=15, as_type=float)
            code_y = self.settings.get('code_y', default=120, as_type=float)
            p.drawString(code_x * units.mm, code_y * units.mm, op.secret)

        attendee_s = self.settings.get('attendee_s', default=0, as_type=float)
        if code_s and op.attendee_name:
            p.setFont("Helvetica", attendee_s)
            attendee_x = self.settings.get('attendee_x',
                                           default=15,
                                           as_type=float)
            attendee_y = self.settings.get('attendee_y',
                                           default=90,
                                           as_type=float)
            p.drawString(attendee_x * units.mm, attendee_y * units.mm,
                         op.attendee_name)

        p.showPage()

        p.save()

        buffer.seek(0)
        new_pdf = PdfFileReader(buffer)
        output = PdfFileWriter()
        bg_file = self.settings.get('background', as_type=File)
        if isinstance(bg_file, File):
            bgf = default_storage.open(bg_file.name, "rb")
        else:
            bgf = open(finders.find('pretixpresale/pdf/ticket_default_a4.pdf'),
                       "rb")
        bg_pdf = PdfFileReader(bgf)
        for page in new_pdf.pages:
            bg_page = copy.copy(bg_pdf.getPage(0))
            bg_page.mergePage(page)
            output.addPage(bg_page)

        outbuffer = BytesIO()
        output.write(outbuffer)
        outbuffer.seek(0)
        return 'order%s%s.pdf' % (
            self.event.slug, order.code), 'application/pdf', outbuffer.read()
Exemple #56
0
def run_one(options, data, tokens, name, tree_key='binary_tree'):
    example_id = data['example_id']
    parse = data[tree_key]

    style = data.get('style', None)

    with tempfile.NamedTemporaryFile(mode='w') as f:
        path_ps = f.name
        path_pdf = os.path.join(options.out_dir,
                                '{}-{}.pdf'.format(name, example_id))

        turtle.speed('fastest')

        fig = TreeFig(color=options.color, size=options.size)

        # Setup
        scale = 1
        # x0 = -300 * scale
        y0 = 65 * scale
        yMax = 200 * scale
        widthWindow = WIDTH * scale
        heightWindow = 500 * scale
        x0 = -widthWindow / 2 + 10

        fig.setup_turtle(widthWindow, heightWindow, scale, x0)

        # Init turtle.
        ts = turtle.getscreen()
        ts.tracer(
            0, 0
        )  # https://stackoverflow.com/questions/16119991/how-to-speed-up-pythons-turtle-function-and-stop-it-freezing-at-the-end

        # Draw settings.
        settings = {}
        settings['style'] = style

        # Draw.
        bounding_box = fig.draw_tree(parse, tokens, **settings)

        # Update Canvas.
        ts.update()
        ts.getcanvas().postscript(file=path_ps)

        print('writing to {}'.format(path_pdf))

        os.system('ps2pdf -dEPSCrop {} {}'.format(path_ps, path_pdf))

        # Crop the image.

        # print('bounding box = {}'.format(bounding_box))

        output_filename = os.path.join(
            options.out_dir, '{}-{}-cropped.pdf'.format(name, example_id))
        input1 = PdfFileReader(open(path_pdf, "rb"))
        output = PdfFileWriter()

        page = input1.getPage(0)
        # print('mediaBox', page.mediaBox)
        # print(page.mediaBox.getUpperRight_x(), page.mediaBox.getUpperRight_y())
        page.trimBox.lowerLeft = (bounding_box['x0'], bounding_box['y0'])
        page.trimBox.upperRight = (bounding_box['x1'], bounding_box['y1'])
        page.cropBox.lowerLeft = (bounding_box['x0'], bounding_box['y0'])
        page.cropBox.upperRight = (bounding_box['x1'], bounding_box['y1'])
        output.addPage(page)

        print('writing to {}'.format(output_filename))
        outputStream = open(output_filename, "wb")
        output.write(outputStream)
        outputStream.close()

    return bounding_box
Exemple #57
0
class Renderer:
    def __init__(self, event, layout, background_file):
        self.layout = layout
        self.background_file = background_file
        self.variables = get_variables(event)
        self.images = get_images(event)
        self.event = event
        if self.background_file:
            self.bg_bytes = self.background_file.read()
            self.bg_pdf = PdfFileReader(BytesIO(self.bg_bytes), strict=False)
        else:
            self.bg_bytes = None
            self.bg_pdf = None

    @classmethod
    def _register_fonts(cls):
        pdfmetrics.registerFont(
            TTFont('Open Sans', finders.find('fonts/OpenSans-Regular.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans I', finders.find('fonts/OpenSans-Italic.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans B', finders.find('fonts/OpenSans-Bold.ttf')))
        pdfmetrics.registerFont(
            TTFont('Open Sans B I',
                   finders.find('fonts/OpenSans-BoldItalic.ttf')))

        for family, styles in get_fonts().items():
            pdfmetrics.registerFont(
                TTFont(family, finders.find(styles['regular']['truetype'])))
            if 'italic' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' I',
                           finders.find(styles['italic']['truetype'])))
            if 'bold' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' B',
                           finders.find(styles['bold']['truetype'])))
            if 'bolditalic' in styles:
                pdfmetrics.registerFont(
                    TTFont(family + ' B I',
                           finders.find(styles['bolditalic']['truetype'])))

    def _draw_poweredby(self, canvas: Canvas, op: OrderPosition, o: dict):
        content = o.get('content', 'dark')
        if content not in ('dark', 'white'):
            content = 'dark'
        img = finders.find(
            'pretixpresale/pdf/powered_by_pretix_{}.png'.format(content))

        ir = ThumbnailingImageReader(img)
        try:
            width, height = ir.resize(None, float(o['size']) * mm, 300)
        except:
            logger.exception("Can not resize image")
            pass
        canvas.drawImage(ir,
                         float(o['left']) * mm,
                         float(o['bottom']) * mm,
                         width=width,
                         height=height,
                         preserveAspectRatio=True,
                         anchor='n',
                         mask='auto')

    def _draw_barcodearea(self, canvas: Canvas, op: OrderPosition, o: dict):
        content = o.get('content', 'secret')
        if content == 'secret':
            content = op.secret
        elif content == 'pseudonymization_id':
            content = op.pseudonymization_id

        level = 'H'
        if len(content) > 32:
            level = 'M'
        if len(content) > 128:
            level = 'L'
        reqs = float(o['size']) * mm
        qrw = QrCodeWidget(content,
                           barLevel=level,
                           barHeight=reqs,
                           barWidth=reqs)
        d = Drawing(reqs, reqs)
        d.add(qrw)
        qr_x = float(o['left']) * mm
        qr_y = float(o['bottom']) * mm
        renderPDF.draw(d, canvas, qr_x, qr_y)

    def _get_ev(self, op, order):
        return op.subevent or order.event

    def _get_text_content(self,
                          op: OrderPosition,
                          order: Order,
                          o: dict,
                          inner=False):
        if o.get('locale', None) and not inner:
            with language(o['locale'], self.event.settings.region):
                return self._get_text_content(op, order, o, True)

        ev = self._get_ev(op, order)
        if not o['content']:
            return '(error)'
        if o['content'] == 'other':
            return o['text']
        elif o['content'].startswith('itemmeta:'):
            return op.item.meta_data.get(o['content'][9:]) or ''
        elif o['content'].startswith('meta:'):
            return ev.meta_data.get(o['content'][5:]) or ''
        elif o['content'] in self.variables:
            try:
                return self.variables[o['content']]['evaluate'](op, order, ev)
            except:
                logger.exception('Failed to process variable.')
                return '(error)'
        return ''

    def _draw_imagearea(self, canvas: Canvas, op: OrderPosition, order: Order,
                        o: dict):
        ev = self._get_ev(op, order)
        if not o['content'] or o['content'] not in self.images:
            image_file = None
        else:
            try:
                image_file = self.images[o['content']]['evaluate'](op, order,
                                                                   ev)
            except:
                logger.exception('Failed to process variable.')
                image_file = None

        if image_file:
            ir = ThumbnailingImageReader(image_file)
            try:
                ir.resize(float(o['width']) * mm, float(o['height']) * mm, 300)
            except:
                logger.exception("Can not resize image")
                pass
            canvas.drawImage(
                image=ir,
                x=float(o['left']) * mm,
                y=float(o['bottom']) * mm,
                width=float(o['width']) * mm,
                height=float(o['height']) * mm,
                preserveAspectRatio=True,
                anchor='c',  # centered in frame
                mask='auto')
        else:
            canvas.saveState()
            canvas.setFillColorRGB(.8, .8, .8, alpha=1)
            canvas.rect(
                x=float(o['left']) * mm,
                y=float(o['bottom']) * mm,
                width=float(o['width']) * mm,
                height=float(o['height']) * mm,
                stroke=0,
                fill=1,
            )
            canvas.restoreState()

    def _draw_textarea(self, canvas: Canvas, op: OrderPosition, order: Order,
                       o: dict):
        font = o['fontfamily']
        if o['bold']:
            font += ' B'
        if o['italic']:
            font += ' I'

        align_map = {'left': TA_LEFT, 'center': TA_CENTER, 'right': TA_RIGHT}
        style = ParagraphStyle(name=uuid.uuid4().hex,
                               fontName=font,
                               fontSize=float(o['fontsize']),
                               leading=float(o['fontsize']),
                               autoLeading="max",
                               textColor=Color(o['color'][0] / 255,
                                               o['color'][1] / 255,
                                               o['color'][2] / 255),
                               alignment=align_map[o['align']])
        text = conditional_escape(
            self._get_text_content(op, order, o)
            or "", ).replace("\n", "<br/>\n")

        # reportlab does not support RTL, ligature-heavy scripts like Arabic. Therefore, we use ArabicReshaper
        # to resolve all ligatures and python-bidi to switch RTL texts.
        configuration = {
            'delete_harakat': True,
            'support_ligatures': False,
        }
        reshaper = ArabicReshaper(configuration=configuration)
        try:
            text = "<br/>".join(
                get_display(reshaper.reshape(l)) for l in text.split("<br/>"))
        except:
            logger.exception('Reshaping/Bidi fixes failed on string {}'.format(
                repr(text)))

        p = Paragraph(text, style=style)
        w, h = p.wrapOn(canvas, float(o['width']) * mm, 1000 * mm)
        # p_size = p.wrap(float(o['width']) * mm, 1000 * mm)
        ad = getAscentDescent(font, float(o['fontsize']))
        canvas.saveState()
        # The ascent/descent offsets here are not really proven to be correct, they're just empirical values to get
        # reportlab render similarly to browser canvas.
        if o.get('downward', False):
            canvas.translate(float(o['left']) * mm, float(o['bottom']) * mm)
            canvas.rotate(o.get('rotation', 0) * -1)
            p.drawOn(canvas, 0, -h - ad[1] / 2)
        else:
            canvas.translate(
                float(o['left']) * mm,
                float(o['bottom']) * mm + h)
            canvas.rotate(o.get('rotation', 0) * -1)
            p.drawOn(canvas, 0, -h - ad[1])
        canvas.restoreState()

    def draw_page(self,
                  canvas: Canvas,
                  order: Order,
                  op: OrderPosition,
                  show_page=True):
        for o in self.layout:
            if o['type'] == "barcodearea":
                self._draw_barcodearea(canvas, op, o)
            elif o['type'] == "imagearea":
                self._draw_imagearea(canvas, op, order, o)
            elif o['type'] == "textarea":
                self._draw_textarea(canvas, op, order, o)
            elif o['type'] == "poweredby":
                self._draw_poweredby(canvas, op, o)
            if self.bg_pdf:
                canvas.setPageSize((self.bg_pdf.getPage(0).mediaBox[2],
                                    self.bg_pdf.getPage(0).mediaBox[3]))
        if show_page:
            canvas.showPage()

    def render_background(self, buffer, title=_('Ticket')):
        if settings.PDFTK:
            buffer.seek(0)
            with tempfile.TemporaryDirectory() as d:
                with open(os.path.join(d, 'back.pdf'), 'wb') as f:
                    f.write(self.bg_bytes)
                with open(os.path.join(d, 'front.pdf'), 'wb') as f:
                    f.write(buffer.read())
                subprocess.run([
                    settings.PDFTK,
                    os.path.join(d, 'front.pdf'), 'background',
                    os.path.join(d, 'back.pdf'), 'output',
                    os.path.join(d, 'out.pdf'), 'compress'
                ],
                               check=True)
                with open(os.path.join(d, 'out.pdf'), 'rb') as f:
                    return BytesIO(f.read())
        else:
            from PyPDF2 import PdfFileReader, PdfFileWriter
            buffer.seek(0)
            new_pdf = PdfFileReader(buffer)
            output = PdfFileWriter()

            for page in new_pdf.pages:
                bg_page = copy.copy(self.bg_pdf.getPage(0))
                bg_page.mergePage(page)
                output.addPage(bg_page)

            output.addMetadata({
                '/Title': str(title),
                '/Creator': 'pretix',
            })
            outbuffer = BytesIO()
            output.write(outbuffer)
            outbuffer.seek(0)
            return outbuffer
parser.add_argument('-o',
                    '--output',
                    metavar='output',
                    help='set the output file name',
                    default='extracted')
parser.add_argument('-c',
                    '--copy',
                    help='copy the text to the clipboard',
                    action='store_true')
args = parser.parse_args()

pdf = PdfFileReader(args.file)
out = open(args.output + '.txt', 'a')
text = ''

for i in range(pdf.getNumPages()):
    page = pdf.getPage(i)
    content = page.extractText()
    text += content + '\n'

if args.copy:
    try:
        import pyperclip
    except ImportError:
        print('pyperclip module is required to use the copy argument')
    else:
        pyperclip.copy(text)

out.write(text)
out.close()
Exemple #59
0
    newpage.rotateClockwise(int(angle))
elif clock == "ac":
    newpage.rotateCounterClockwise(int(angle))
pdfWriter.addPage(newpage)
resultPdfFile = open('rotatedPage.pdf', 'wb')
pdfWriter.write(resultPdfFile)
resultPdfFile.close()
resultPdfFile1 = open('rotatedPage.pdf', 'rb')
pdfReader1 = PyPDF2.PdfFileReader(resultPdfFile1)
pageObj = pdfReader1.getPage(0)
pdfWriter.addPage(pageObj)
pdfOutputFile = open('combinedfile.pdf', 'wb')
pdfWriter.write(pdfOutputFile)
pdfOutputFile.close()
pdffinal = PdfFileReader('combinedfile.pdf', 'rb')
output = PdfFileWriter()

for i in range(pdffinal.getNumPages() - 2):
    p = pdffinal.getPage(i)
    output.addPage(p)

with open('final.pdf', 'wb') as f:
    output.write(f)
print("CONFIGURING...")
for i in tqdm(range(5)):
    time.sleep(3)
print("DONE!")
resultPdfFile1.close()
pdfget.close()
os.remove("rotatedPage.pdf")
os.remove("combinedfile.pdf")
Exemple #60
0
def _overlay_printable_areas_with_white(src_pdf):
    """
    Overlays the printable areas onto the src PDF, this is so the code can check for a presence of non white in the
    areas outside the printable area.

    Our overlay function draws four areas in white. Logo, address, service address, and the body. Logo is the area
    above the address area. Service address runs from the top right, down the side of the letter to the right of
    the address area.

    This function subtracts/adds 1mm to make every boundary more generous. This is to solve pixel-hunting issues where
    letters fail validation because there's one pixel of the boundary, generally because of anti-aliasing some text.
    This doesn't affect the red overlays we draw when displaying to end users, so people should still layout their PDFs
    based on the published constraints.

    :param BytesIO src_pdf: A file-like
    :return BytesIO: New file like containing the overlaid pdf
    """

    pdf = PdfFileReader(src_pdf)
    page = pdf.getPage(0)
    can = NotifyCanvas(white)

    # Overlay the blanks where the service can print as per the template
    # The first page is more varied because of address blocks etc subsequent pages are more simple

    # Body
    pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BODY_TOP_FROM_TOP_OF_PAGE - 1
    pt2 = BORDER_RIGHT_FROM_LEFT_OF_PAGE + 1, BORDER_BOTTOM_FROM_TOP_OF_PAGE + 1
    can.rect(pt1, pt2)

    # Service address block - the writeable area on the right hand side (up to the top right corner)
    pt1 = SERVICE_ADDRESS_LEFT_FROM_LEFT_OF_PAGE - 1, SERVICE_ADDRESS_TOP_FROM_TOP_OF_PAGE - 1
    pt2 = SERVICE_ADDRESS_RIGHT_FROM_LEFT_OF_PAGE + 1, SERVICE_ADDRESS_BOTTOM_FROM_TOP_OF_PAGE + 1
    can.rect(pt1, pt2)

    # Service Logo Block - the writeable area above the address (only as far across as the address extends)
    pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BORDER_TOP_FROM_TOP_OF_PAGE - 1
    pt2 = LOGO_RIGHT_FROM_LEFT_OF_PAGE + 1, LOGO_BOTTOM_FROM_TOP_OF_PAGE + 1
    can.rect(pt1, pt2)

    # Citizen Address Block - the address window
    pt1 = ADDRESS_LEFT_FROM_LEFT_OF_PAGE - 1, ADDRESS_TOP_FROM_TOP_OF_PAGE - 1
    pt2 = ADDRESS_RIGHT_FROM_LEFT_OF_PAGE + 1, ADDRESS_BOTTOM_FROM_TOP_OF_PAGE + 1
    can.rect(pt1, pt2)

    # move to the beginning of the StringIO buffer
    new_pdf = PdfFileReader(can.get_bytes())

    page.mergePage(new_pdf.getPage(0))

    # For each subsequent page its just the body of text
    for page_num in range(1, pdf.numPages):
        page = pdf.getPage(page_num)

        can = NotifyCanvas(white)

        # Each page of content
        pt1 = BORDER_LEFT_FROM_LEFT_OF_PAGE - 1, BORDER_TOP_FROM_TOP_OF_PAGE - 1
        pt2 = BORDER_RIGHT_FROM_LEFT_OF_PAGE + 1, BORDER_BOTTOM_FROM_TOP_OF_PAGE + 1
        can.rect(pt1, pt2)

        # move to the beginning of the StringIO buffer
        new_pdf = PdfFileReader(can.get_bytes())

        page.mergePage(new_pdf.getPage(0))

    out = bytesio_from_pdf(pdf)
    # it's a good habit to put things back exactly the way we found them
    src_pdf.seek(0)

    return out