예제 #1
0
파일: main.py 프로젝트: pvalente/pdfyql
  def get(self):
    
    url = self.request.get('url')
    if not url:
        url = ""
    
    
    page = self.request.get('pg')
    if not page:
        page = 'all'
    else:
        page = int(page)-1
    
    u = urllib2.urlopen(urllib2.unquote(url))
    
    output = StringIO.StringIO()
    output.write(u.read())
    
    p = PdfFileReader(output)

    pages = p.getNumPages()
    title = p.getDocumentInfo().title
    if page == "all":
        content=""
        for i in range(0, pages):
            # Extract text from page and add to content
            content += "<page number='%d'><![CDATA[%s]]></page>\n" % (i+1,  p.getPage(i).extractText())
    else:
        content = "<page number='%d'><![CDATA[%s]]></page>\n" % (page+1,  p.getPage(page).extractText())
    output.close()
    
    result = "<?xml version='1.0' encoding='UTF-8'?>\n<document url='%s' title='%s'>%s</document>" % (url, title, content)
    
    self.response.headers['Content-type'] = 'application/xml'
    self.response.out.write(result)
예제 #2
0
 def get_pdf(self, text, use_design=False):
     """
     Given the text, produces an A4 blank PDF with the text put in the
     position given by the tranlsation box.
     :param text: Text to put inside a translation box
     :param use_design: Set to true to use a design in the background
     :return: Rendered PDF
     :rtype: pypdf.PdfFileReader if use_design is False or PdfFileWriter
     """
     self.ensure_one()
     packet = StringIO.StringIO()
     can = Canvas(packet, bottomup=0)
     text_wrap = self._wrap_text(text, can._fontname, can._fontsize)
     top = self.top * inch
     left = self.left * inch
     for line in text_wrap[:self.nb_lines + 1]:
         can.drawString(left, top, line)
         top += can._leading
     can.save()
     # Move to the beginning of the StringIO buffer
     packet.seek(0)
     remaining = ''
     if len(text_wrap) > self.nb_lines:
         remaining = ' '.join(text_wrap[self.nb_lines + 1:])
     out_pdf = PdfFileReader(packet)
     if use_design:
         design_pdf_path = self.env['ir.config_parameter'].get_param(
             'sbc_compassion.composition_design')
         if design_pdf_path:
             design_pdf = PdfFileReader(file(design_pdf_path, 'rb'))
             page = design_pdf.getPage(0)
             page.mergePage(out_pdf.getPage(0))
             out_pdf = PdfFileWriter()
             out_pdf.addPage(page)
     return out_pdf, remaining
    def create(self, vals):
        """ Create a message for sending the CommKit after be translated on
             the local translate plaform.
        """
        if vals.get('direction') == "Beneficiary To Supporter":
            correspondence = super(Correspondence, self).create(vals)
        else:
            sponsorship = self.env['recurring.contract'].browse(
                vals['sponsorship_id'])

            original_lang = self.env['res.lang.compassion'].browse(
                vals.get('original_language_id'))

            # TODO Remove this fix when HAITI case is resolved
            # For now, we switch French to Creole for avoiding translation
            if 'HA' in sponsorship.child_id.local_id:
                french = self.env.ref(
                    'child_compassion.lang_compassion_french')
                creole = self.env.ref(
                    'child_compassion.lang_compassion_haitien_creole')
                if original_lang == french:
                    vals['original_language_id'] = creole.id

            if original_lang.translatable and original_lang not in sponsorship\
                    .child_id.project_id.field_office_id.spoken_language_ids:
                correspondence = super(Correspondence, self.with_context(
                    no_comm_kit=True)).create(vals)
                correspondence.send_local_translate()
            else:
                correspondence = super(Correspondence, self).create(vals)

        # Swap pages for L3 layouts as we scan in wrong order
        if correspondence.template_id.layout == 'CH-A-3S01-1' and \
                correspondence.source != 'compassion':
            input_pdf = PdfFileReader(BytesIO(base64.b64decode(
                correspondence.letter_image)))
            output_pdf = PdfFileWriter()
            nb_pages = input_pdf.numPages
            if nb_pages >= 2:
                output_pdf.addPage(input_pdf.getPage(1))
                output_pdf.addPage(input_pdf.getPage(0))
                if nb_pages > 2:
                    for i in range(2, nb_pages):
                        output_pdf.addPage(input_pdf.getPage(i))
                letter_data = BytesIO()
                output_pdf.write(letter_data)
                letter_data.seek(0)
                correspondence.write({
                    'letter_image': base64.b64encode(letter_data.read())
                })

        return correspondence
예제 #4
0
    def create(self, vals):
        """ Create a message for sending the CommKit after be translated on
             the local translate plaform.
        """
        if vals.get('direction') == "Beneficiary To Supporter":
            correspondence = super(Correspondence, self).create(vals)
        else:
            sponsorship = self.env['recurring.contract'].browse(
                vals['sponsorship_id'])

            original_lang = self.env['res.lang.compassion'].browse(
                vals.get('original_language_id'))

            # TODO Remove this fix when HAITI case is resolved
            # For now, we switch French to Creole for avoiding translation
            if 'HA' in sponsorship.child_id.local_id:
                french = self.env.ref(
                    'child_compassion.lang_compassion_french')
                creole = self.env.ref(
                    'child_compassion.lang_compassion_haitien_creole')
                if original_lang == french:
                    vals['original_language_id'] = creole.id

            if original_lang.translatable and original_lang not in sponsorship\
                    .child_id.project_id.field_office_id.spoken_language_ids:
                correspondence = super(
                    Correspondence,
                    self.with_context(no_comm_kit=True)).create(vals)
                correspondence.send_local_translate()
            else:
                correspondence = super(Correspondence, self).create(vals)

        # Swap pages for L3 layouts as we scan in wrong order
        if correspondence.template_id.layout == 'CH-A-3S01-1' and \
                correspondence.source != 'compassion':
            input_pdf = PdfFileReader(
                BytesIO(base64.b64decode(correspondence.letter_image)))
            output_pdf = PdfFileWriter()
            nb_pages = input_pdf.numPages
            if nb_pages >= 2:
                output_pdf.addPage(input_pdf.getPage(1))
                output_pdf.addPage(input_pdf.getPage(0))
                if nb_pages > 2:
                    for i in range(2, nb_pages):
                        output_pdf.addPage(input_pdf.getPage(i))
                letter_data = BytesIO()
                output_pdf.write(letter_data)
                letter_data.seek(0)
                correspondence.write(
                    {'letter_image': base64.b64encode(letter_data.read())})

        return correspondence
예제 #5
0
	def parse(self, file_full, statdata):
		pdf = PdfFileReader(file(file_full, 'rb'))

		pages = pdf.getNumPages()
		text = ''

		self._extra['pages'] = pages

		for pagenr in range(pages):
			page = pdf.getPage(pagenr-1)
			text += ' ' + page.extractText()

		return text
 def get_pdf(self, text, use_design=False):
     """
     Given the text, produces an A4 blank PDF with the text put in the
     position given by the tranlsation box.
     :param text: Text to put inside a translation box
     :param use_design: Set to true to use a design in the background
     :return: Rendered PDF
     :rtype: pypdf.PdfFileReader if use_design is False or PdfFileWriter
     """
     self.ensure_one()
     packet = StringIO.StringIO()
     can = Canvas(packet, bottomup=0)
     text_wrap = self._wrap_text(text, can._fontname, can._fontsize)
     top = self.top*inch
     left = self.left*inch
     for line in text_wrap[:self.nb_lines+1]:
         can.drawString(left, top, line)
         top += can._leading
     can.save()
     # Move to the beginning of the StringIO buffer
     packet.seek(0)
     remaining = ''
     if len(text_wrap) > self.nb_lines:
         remaining = ' '.join(text_wrap[self.nb_lines+1:])
     out_pdf = PdfFileReader(packet)
     if use_design:
         design_pdf_path = self.env['ir.config_parameter'].get_param(
             'sbc_compassion.composition_design')
         if design_pdf_path:
             design_pdf = PdfFileReader(file(design_pdf_path, 'rb'))
             page = design_pdf.getPage(0)
             page.mergePage(out_pdf.getPage(0))
             out_pdf = PdfFileWriter()
             out_pdf.addPage(page)
     return out_pdf, remaining
    def preview(self):
        """ Generate a picture for preview.
        """
        pdf = self._get_pdf(self.sponsorship_ids[0], preview=True)
        if self.s2b_template_id.layout == 'CH-A-3S01-1':
            # Read page 2
            in_pdf = PdfFileReader(BytesIO(pdf))
            output_pdf = PdfFileWriter()
            out_data = BytesIO()
            output_pdf.addPage(in_pdf.getPage(1))
            output_pdf.write(out_data)
            out_data.seek(0)
            pdf = out_data.read()

        with Image(blob=pdf) as pdf_image:
            preview = base64.b64encode(pdf_image.make_blob(format='jpeg'))

        return self.write({'state': 'preview', 'preview_image': preview})
예제 #8
0
    def preview(self):
        """ Generate a picture for preview.
        """
        pdf = self._get_pdf(self.sponsorship_ids[0], preview=True)
        if self.s2b_template_id.layout == 'CH-A-3S01-1':
            # Read page 2
            in_pdf = PdfFileReader(BytesIO(pdf))
            output_pdf = PdfFileWriter()
            out_data = BytesIO()
            output_pdf.addPage(in_pdf.getPage(1))
            output_pdf.write(out_data)
            out_data.seek(0)
            pdf = out_data.read()

        with Image(blob=pdf) as pdf_image:
            preview = base64.b64encode(pdf_image.make_blob(format='jpeg'))

        return self.write({'state': 'preview', 'preview_image': preview})
예제 #9
0
def add_annotation_to_page(pdf_page, annotation, fontsize=24, margin=20):
  # Adds an annotation at bottom-left of the page
  width = pdf_page.mediaBox.getWidth()
  height = pdf_page.mediaBox.getHeight()

  # Create background with annotation a lower-left
  io = StringIO.StringIO()
  can = canvas.Canvas(io)
  can.setPageSize([width, height + fontsize + margin])
  can.setFontSize(fontsize)
  can.drawString(margin, margin, annotation)
  can.save()

  io.seek(0) #move to the beginning of the StringIO buffer
  page_background = PdfFileReader(io).getPage(0)

  page_background.mergeTranslatedPage(pdf_page, ty=margin+fontsize, tx=0)

  return page_background
예제 #10
0
    def pair_pages(self):
        """
        construct an empty pdf file object and append as many blank pages to
        the end to pair the pages.
        """
        paired_pages = PdfFileWriter()

        for page in PdfFileReader(file(self.job.pdf_file, "rb")).pages:
            paired_pages.addPage(page)

#       while paired_pages.getNumPages() % 4 != 0:
#           paired_pages.addBlankPage()

        return paired_pages
예제 #11
0
    def mergePdf(self):
    #        self.threadPdfWritingStatus.emit(
    #            '<font size=4><b>Method "%s": </b></font><b>Setting Title for</b> %s<b>. Please Wait...</b><br />' % (
    #                self.groupType, self.url))
        self.threadPdfWritingStatus.emit(
            '<b>Setting Title for</b> %s<b>. Please Wait...</b><br />' % self.url)

        packet = StringIO()
        # create a new PDF with Reportlab
        pdfCanvas = canvas.Canvas(packet, pagesize=A4)
        pdfCanvas.setFont('Helvetica', 8)
        if len(self.title) is 0:
            self.title = str(self.url).split('/')[-1]
            self.title = self.regex.getSearchedData('(?i)([a-zA-Z0-9-_ ]*?)\.[a-zA-Z0-9_]*$', self.title)
            self.title = self.regex.replaceData('(?i)_', ' ', self.title)
        title = unicode(self.title[:57] + '...') if  (len(self.title) > 60) else unicode(self.title)
        url = self.url[:57] + '...' if (len(self.title) > 60) else self.url
        pdfCanvas.drawString(5, 830, title + '                      ' + str(url).lower())
        d = datetime.datetime.now()
        strDate = str(d.strftime("%Y-%m-%d %H-%M-%S %p"))
        pdfCanvas.drawString(420, 5, 'Created Date Time: ' + strDate)
        pdfCanvas.save()
        packet.seek(0)
        newPdf = PdfFileReader(packet)

        if not os.path.exists(self.tempPdfFile):
            return self.printWebHtmlToPdf(self.url, self.filePath, self.fileName)

        writer = PdfFileWriter()
        tmpPdfFile = file(self.tempPdfFile, 'rb')
        reader = PdfFileReader(tmpPdfFile)
        for i in range(0, (reader.getNumPages())):
            page = reader.getPage(i)
            page.mergePage(newPdf.getPage(0))
            #            page = newPdf.getPage(0)
            #            page.mergePage(reader.getPage(i))
            writer.addPage(page)
        print 'Filename: ' + self.fileName
        outputStream = file(self.filePath + self.fileName, "wb")
        writer.write(outputStream)
        outputStream.close()
        tmpPdfFile.close()
        os.remove(str(self.tempPdfFile))
예제 #12
0
def printMeta(filename):
    f = PdfFileReader(open(filename, "rb"))
    info = f.getDocumentInfo()
    print("[*] PDF Metadata for:" + filename)
    for item in info:
        print("[+]", item, ":", info[item])
예제 #13
0
from pyPdf.pdf import PdfFileWriter, PdfFileReader
from



if __name__ == '__main__':

    output = PdfFileWriter()
    input1 = PdfFileReader('D:\Anaconda\workspace\\vtk_bucket\\1.pdf')
    print('ok')
예제 #14
0
def readTheFile(fName):
    return PdfFileReader(file(fName, "rb"))
예제 #15
0
def extract_text_from_pdf_stream(stream):
    reader = PdfFileReader(stream)
    return '\n'.join(
        reader.getPage(i).extractText()
        for i in range(reader.getNumPages())
    )
예제 #16
0
파일: pdfdice.py 프로젝트: ffshr/prsannots
    for col in range(ncols):
        for row in range(nrows - 1, -1, -1):
            newpage = copy_page(page)
            bbox = (col * xspace + x0, row * yspace + y0,
                    col * xspace + x0 + width, row * yspace + y0 + height)
            newpage.cropBox = newpage.artBox = newpage.trimBox = newpage.mediaBox = RectangleObject(
                bbox)
            outpdf.addPage(newpage)
            bboxes.append(bbox)
    return bboxes


if __name__ == '__main__':
    import sys
    try:
        inpdf = PdfFileReader(open(sys.argv[1], 'rb'))
        ncols = int(sys.argv[2])
        nrows = int(sys.argv[3])
    except (IndexError, IOError):
        print "Usage: %s file.pdf ncols nrows [overlap [crop]]" % sys.argv[0]
        raise SystemExit

    overlap = map(float, sys.argv[4:6])
    if not overlap:
        overlap = 0.05
    crop = map(float, sys.argv[6:10])
    if not crop:
        crop = 0

    outpdf, _ = dice(inpdf, ncols, nrows, crop, overlap)
    write_pdf(outpdf, 'pdfdice.pdf')
예제 #17
0
def extractTextFromPdfStream(stream):
    reader = PdfFileReader(stream)
    return '\n'.join(
        reader.getPage(i).extractText() for i in range(reader.getNumPages()))