예제 #1
0
파일: tests.py 프로젝트: cburgmer/pdfserver
    def test_two_on_one_page(self):
        # Build a document with two pages
        pdf = PdfFileReader(self.get_pdf_stream())
        output = PdfFileWriter()
        output.addPage(pdf.getPage(0))
        output.addPage(pdf.getPage(0))
        assert output.getNumPages() == 2
        assert output.getPage(0).extractText().count('Test') ==  1
        buf = StringIO()
        output.write(buf)
        buf.seek(0)

        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        rv = self.app.post('/handleform',
                           data={'file': (buf, 'test.pdf')})

        rv = self.combine_and_download(pages_sheet='2')

        pdf_download = PdfFileReader(StringIO(rv.data))
        self.assertEquals(pdf_download.getPage(0).extractText().count('Test'),
                          2)
        self.assertEquals(pdf_download.getNumPages(), 1)

        self.clean_up()
예제 #2
0
파일: crop.py 프로젝트: vogler/PdfTools
def crop(filein):
	output = PdfFileWriter()
	input1 = PdfFileReader(file(filein, "rb"))
	input2 = PdfFileReader(file(filein, "rb"))
	# print the title of the document
	print "title = %s" % (input1.getDocumentInfo().title)
	n = input1.getNumPages()
	# loop over pages
	for i in range(n):
		# p = input1.getPage(i)
		# print p.cropBox
		# p.mediaBox.upperRight = (
		# 	p.mediaBox.getUpperRight_x() / 2,
		# 	p.mediaBox.getUpperRight_y() / 2
		# )
		w, h = 359, 269
		slide1 = input1.getPage(i)
		rect = lambda x, y: generic.RectangleObject([x, y, x+w, y+h])
		slide1.mediaBox = rect(117, 462)
		output.addPage(slide1)
		slide2 = input2.getPage(i)
		slide2.mediaBox = rect(117, 111)
		output.addPage(slide2)
	# write output
	fileout = "%s-cropped%s" % os.path.splitext(filein)
	outputStream = file(fileout, "wb")
	output.write(outputStream)
	outputStream.close()
	# print some information
	n2 = output.getNumPages()
	print "%s has %s pages." % (filein, n)
	print "%s has %s pages." % (fileout, n2)
예제 #3
0
파일: tests.py 프로젝트: pombreda/pdfserver
    def test_two_on_one_page(self):
        # Build a document with two pages
        pdf = PdfFileReader(self.get_pdf_stream())
        output = PdfFileWriter()
        output.addPage(pdf.getPage(0))
        output.addPage(pdf.getPage(0))
        assert output.getNumPages() == 2
        assert output.getPage(0).extractText().count('Test') == 1
        buf = StringIO()
        output.write(buf)
        buf.seek(0)

        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')})

        rv = self.combine_and_download(pages_sheet='2')

        pdf_download = PdfFileReader(StringIO(rv.data))
        self.assertEquals(
            pdf_download.getPage(0).extractText().count('Test'), 2)
        self.assertEquals(pdf_download.getNumPages(), 1)

        self.clean_up()
예제 #4
0
def compact(filein):
    output = PdfFileWriter()
    input1 = PdfFileReader(file(filein, "rb"))
    # print the title of the document
    print "title = %s" % (input1.getDocumentInfo().title)
    n = input1.getNumPages()
    # loop over pages
    for i in range(1, n):
        curr = input1.getPage(i)
        prev = input1.getPage(i - 1)
        currTxt = curr.extractText()[:-3]
        prevTxt = prev.extractText()[:-3]
        if currTxt.find(prevTxt) == 0:  # prevTxt is prefix of currTxt
            pass  # the current page is an extension to the previous one -> continue
        else:
            output.addPage(
                prev)  # current page is something new -> save latest old one
    output.addPage(input1.getPage(n - 1))  # add last page
    # write output
    fileout = "%s-compact%s" % os.path.splitext(filein)
    outputStream = file(fileout, "wb")
    output.write(outputStream)
    outputStream.close()
    # print some information
    n2 = output.getNumPages()
    print "%s has %s pages." % (filein, n)
    print "%s has %s pages." % (fileout, n2)
    print "-> removed %s pages\n" % (n - n2)
예제 #5
0
def consume_files(filelist):
    i = 0
    while (len(filelist) > 0):
        output = PdfFileWriter()
        merge_pages_better(output, filelist)
        outputstream = file(PREFIX_STR+str(i)+'.pdf', 'wb')
        print "%s%d.pdf has %d pages" % (PREFIX_STR, i, output.getNumPages())
        i = i + 1
        output.write(outputstream)
        outputstream.close()
    return 1
def merge_pdf(url_list):
    # Download each PDF and merge them into one giant PDF, post this giant PDF to anonfiles.com, add URL to scraperwiki database
    output = PdfFileWriter()
    for url in url_list:

        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/083.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/83.pdf"
             url[1] = "83.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/039.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/39.pdf"
             url[1] = "39.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/021.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/21.pdf"
             url[1] = "21.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/016S.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/16S.pdf"
             url[1] = "16S.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/015.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/15.pdf"
             url[1] = "15.pdf"

        pdf_file = os.system("wget %s" % url[0])
        input1 = PdfFileReader(file('/tmp/%s' % url[1], "rb"))
        numPages  = input1.getNumPages()
        print "number of pages = %s" % (numPages)
        page1 = input1.getPage(0)
        page2 = input1.getPage(1)
        output.addPage(page1)
        output.addPage(page2)

        if numPages == 3:
            page3 = input1.getPage(2)
            output.addPage(page3)

    final_page_count = output.getNumPages()
    print "Number of Pages in Final = %s" % (final_page_count)

    outputStream = file("/tmp/bus.pdf", "wb")
    output.write(outputStream)
    outputStream.close()

    reply = os.system('curl -kF "[email protected];filename=bus.pdf" https://anonfiles.com/api/hotlink -o "reply.txt"')

    with open('reply.txt', 'r') as f:
        read_data = f.read()

    data_dict = {
                   'Title':'Link to COTA Bus Schedule',
                   'URL':read_data,
                }
    scraperwiki.sqlite.save(unique_keys=['Title', 'URL'], data=data_dict)
예제 #7
0
def splitPDF(inputpdf, output_fn, start, end):
    """ from the input pdf creates a new pdf file with the name output_fn
    containing only the pages of the pdf from start to end
    """    
    
    outputpdf = PdfFileWriter()
    print 'added pages ',
    for i in xrange(start, end):
        outputpdf.addPage(inputpdf.getPage(i))
        print str(i) + ',',
    print 'finished. printed', outputpdf.getNumPages(), 'pages'
    
    outstream = file(output_fn, 'wb')
    outputpdf.write(outstream)
    outstream.close()
예제 #8
0
def merge_pdfs(output_name, files):
    """ Merges files in the order given. Make sure to sort first."""
    output = PdfFileWriter()
    for f in files:
        try:
            i = PdfFileReader(file(f, "rb"))
        except IOError as e:
            print(e)
        except PdfReadError as e:
            print(e)
        else:
            for p in i.pages:
                output.addPage(p)
    if output.getNumPages():
        ostream = file(output_name, "wb")
        output.write(ostream)
        ostream.close()
예제 #9
0
파일: tests.py 프로젝트: pombreda/pdfserver
    def test_page_ranges(self):
        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        pdf = PdfFileReader(self.get_pdf_stream())

        # Build a document with twenty pages
        output = PdfFileWriter()
        page = pdf.getPage(0)
        for i in range(1, 21):
            new_page = CombineTestCase.replace_text(page, 'Test',
                                                    'Test %d' % i)
            output.addPage(new_page)
        assert output.getNumPages() == 20

        buf = StringIO()
        output.write(buf)
        buf.seek(0)

        rv = self.app.post('/handleform',
                           data={'file': (buf, 'test.pdf')},
                           follow_redirects=True)

        ids = self.extract_ids_from_main_page(rv.data)
        pages_kw = 'pages_%d' % ids[0]
        rv = self.combine_and_download(**{pages_kw: '-5, 10, 12-14, 18-'})

        pdf_download = PdfFileReader(StringIO(rv.data))

        # Test ranges
        ranges = [1, 2, 3, 4, 5, 10, 12, 13, 14, 18, 19, 20]

        self.assert_(
            all((('Test %d' % page) in pdf_download.getPage(i).extractText())
                for i, page in enumerate(ranges)))

        self.assertEquals(pdf_download.getNumPages(), len(ranges))

        self.clean_up()
예제 #10
0
파일: tests.py 프로젝트: cburgmer/pdfserver
    def test_page_ranges(self):
        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        pdf = PdfFileReader(self.get_pdf_stream())

        # Build a document with twenty pages
        output = PdfFileWriter()
        page = pdf.getPage(0)
        for i in range(1, 21):
            new_page = CombineTestCase.replace_text(page, 'Test', 'Test %d' % i)
            output.addPage(new_page)
        assert output.getNumPages() == 20

        buf = StringIO()
        output.write(buf)
        buf.seek(0)

        rv = self.app.post('/handleform',
                           data={'file': (buf, 'test.pdf')},
                           follow_redirects=True)

        ids = self.extract_ids_from_main_page(rv.data)
        pages_kw = 'pages_%d' % ids[0]
        rv = self.combine_and_download(**{pages_kw: '-5, 10, 12-14, 18-'})

        pdf_download = PdfFileReader(StringIO(rv.data))

        # Test ranges
        ranges = [1, 2, 3, 4, 5, 10, 12, 13, 14, 18, 19, 20]

        self.assert_(all(
            (('Test %d' % page) in pdf_download.getPage(i).extractText())
                        for i, page in enumerate(ranges)))

        self.assertEquals(pdf_download.getNumPages(), len(ranges))

        self.clean_up()
예제 #11
0
   batchnames.append("batch"+str(i)+".pdf")


# Loop through HouseDistricts
for i in range(1,100):
      output = PdfFileWriter()
      outputfilename = "pdfs/HD"+str(i)+".pdf"
      for b in batchnames:
         input_pdf = PdfFileReader(file("pdfs/"+b, "rb"))
         with open('csvs/district_data.csv', 'rb') as csvfile:
            district_reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            for row in district_reader:
               if row[0] == b:
                  if row[9]== str(i):
                    output.addPage(input_pdf.getPage(int(row[1])-1))
      if output.getNumPages() >0:
         outputStream = file(outputfilename, "wb")
         output.write(outputStream)
         outputStream.close()
         print "There are %s form letters from %s " % (output.getNumPages(), outputfilename)


# Loop through SenateDistricts
for i in range(1,34):
      output = PdfFileWriter()
      outputfilename = "pdfs/SD"+str(i)+".pdf"
      for b in batchnames:
         input_pdf = PdfFileReader(file("pdfs/"+b, "rb"))
         with open('csvs/district_data.csv', 'rb') as csvfile:
            district_reader = csv.reader(csvfile, delimiter=',', quotechar='"')
            for row in district_reader:
# This is the pyPdf library. Install with: $ pip install pyPdf
from pyPdf import PdfFileWriter, PdfFileReader

# Open a writer object
output = PdfFileWriter()
# Load the pdf file(s) you want to input pages from
input1 = PdfFileReader(file("test.pdf", "rb"))

# Add some pages from your source/input files to the writer object
output.addPage(input1.getPage(8))
output.addPage(input1.getPage(15))

# For kicks, print the number of pages added to the console
print "number of pages is: %s " % output.getNumPages()

# Explain the kind of file this will be when outputted
outputStream = file("assembled_pdf.pdf", "wb")

# Write the actual output file
output.write(outputStream)

# Close the output file
outputStream.close()
예제 #13
0
for x in range(len(INPUT)) :
  hackfile = 'editionHack/hack{0}.pdf'.format(x+OFFSET)
  goodfile = 'editionHack/'+INPUT[x]+".pdf"
  output = PdfFileWriter()
  input1 = PdfFileReader(file(goodfile, "rb"))
  page1 = input1.getPage(0)
  watermark = PdfFileReader(file(hackfile, "rb"))
  page1.mergePage(watermark.getPage(0))
  output.addPage(page1)
  outputStream = file("editionHack/finalPage{0}.pdf".format(x+OFFSET), "wb")
  print "finalPage{0}.pdf".format(x+OFFSET)
  output.write(outputStream)
  outputStream.close()

output = PdfFileWriter()
for x in range(3,11) :
  input1 = PdfFileReader(file('editionHack/finalPage{0}.pdf'.format(x), "rb"))
  output.addPage(input1.getPage(0))

input1 = PdfFileReader(file('sitOzfarsWysr_a4.pdf', "rb"))
for x in range(input1.getNumPages()) :
  output.addPage(input1.getPage(x))

if output.getNumPages() % 2 == 1 :
  input1 = PdfFileReader(file('editionHack/blankPage.pdf', "rb"))
  output.addPage(input1.getPage(0))

outputStream = file("editionHack/sowInterior.pdf".format(x), "wb")
output.write(outputStream)
outputStream.close()
예제 #14
0
  })
# Add It
output._addObject(properties)
# Add back the one we deleted first
output._addObject(item)

address = PdfFileReader(file("input\\address\\"+fname, "rb"))
location = PdfFileReader(file("input\\location\\"+fname, "rb"))

print "Location Pages:", location.numPages
print "Address Pages:", address.numPages

if location.numPages > 1 :
  print "Location has more than 1 page, exiting..."
  sys.exit()
  
page = location.getPage(0)
for i in xrange(address.numPages):
  #print ".",
  output.addPage(address.getPage(i))
  output.addPage(page)

# output should be double address
print "Output Pages:", output.getNumPages()

#if output.getNumPages() == (address.numPages *2):
if output.getNumPages() > 0:
  outStream = file("output\\"+fname, "wb")
  output.write(outStream)
  outStream.close()
  print "Generation Complete"
예제 #15
0
for fn in fnames:
    for k in range(len(fn)):
        if len(out_name) - 1 < k:
            out_name.append([])
        if out_name[k].count(fn[k]) < 1:
            out_name[k].append(fn[k])
        #out_name[k].sort()
        #out_name[k].reverse();

for k in range(len(out_name)):
    if k == 0:
        out_name[k] = [datetime.date.today().strftime('%y%m%d')]
    out_name[k] = '_'.join(out_name[k])

# собираем из отфильтрованных частей конечное имя файла
postfix = '_crop'
if backpage:
    postfix = '_backpage'
out_name = ('_'.join(out_name) + postfix)[0:251] + '.pdf'

print('')
print(out_name)
print("has %s pages" % output.getNumPages())
print('Saving...')

outputStream = file(out_name, "wb")
output.write(outputStream)
outputStream.close()

print('Done')
예제 #16
0
파일: delivery.py 프로젝트: suningwz/fdoo
    def _dpd_label_get(self, cr, uid, picking, test=False, context=None):
        carrier = picking.carrier_id
        if not carrier or carrier.api != "dpd":
            raise Warning(_("Invalid carrier type!"))

        profile = carrier.dpd_profile_id
        if not profile:
            raise Warning(_("No DPD Profile defined!"))

        # check test profile
        if test:
            profile = carrier.dpd_test_profile_id
            if not profile:
                return True

        # check packages
        pack_op_obj = self.pool["stock.pack.operation"]
        package_count = 0
        package_count_all = True

        # check operation for package count
        for operation in picking.pack_operation_ids:
            if operation.qty_done:
                # check if all operation have a package count
                # if not add one package for itself
                if not operation.package_count:
                    if package_count_all:
                        package_count_all = False
                        package_count += 1
                else:
                    # add packages
                    package_count += operation.package_count

        if not package_count:
            package_count = 1

        partner = picking.partner_id
        client = self._dpd_client_get(context)

        tracking_refs = []
        carrier_errors = []
        label_pdf = PdfFileWriter()
        carrier_label_name = None
        try:

            for packageNo in range(0, package_count):

                parts = {}
                parts["username"] = profile.user
                parts["password"] = md5(profile.password).hexdigest()
                parts["mandant"] = profile.client
                parts["kdnr"] = partner.ref or ""

                name = partner.name.strip()
                bezugsp = partner.street2 and partner.street2.strip() or ""
                parent_partner = partner.parent_id
                if not partner.mail_without_company and parent_partner:
                    name = parent_partner.name
                    bezugsp = partner.name.strip()

                zusatz = ""
                if len(name) > 48:
                    shortName = name[:48]
                    lastSpacePos = shortName.rfind(" ")
                    if lastSpacePos > 30:
                        lastSpacePos += 1
                        zusatz = name[lastSpacePos:lastSpacePos + 32]
                        name = shortName[:lastSpacePos]
                    else:
                        zusatz = name[48:80]
                        name = shortName

                parts["name"] = name
                parts["zusatz"] = zusatz

                parts["anschrift"] = partner.street and partner.street.strip(
                ) or ""
                parts["plz"] = partner.zip and partner.zip.strip() or ""
                parts["ort"] = partner.city and partner.city.strip() or ""
                parts[
                    "land"] = partner.country_id and partner.country_id.code or "AT"

                parts["bezugsp"] = bezugsp
                parts["tel"] = partner.phone or partner.mobile or ""
                parts["mail"] = partner.email or ""
                parts["liefernr"] = picking.name or ""
                parts["pakettyp"] = carrier.dpd_type or "DPD"

                parts["gewicht"] = "1000"
                weight = picking.carrier_weight or picking.weight or 0.0
                if weight:
                    uom_obj = self.pool["product.uom"]
                    uom_id = uom_obj.search_id(
                        cr, uid, [("category_id", "=",
                                   picking.weight_uom_id.category_id.id), '|',
                                  ("name", "=", "g"), ("code", "=", "g")])
                    uom = uom_obj.browse(cr, uid, uom_id, context=context)
                    if not uom:
                        raise Warning(_("No unit gramm found!"))
                    parts["gewicht"] = str(
                        int(
                            uom_obj._compute_qty(cr, uid,
                                                 picking.weight_uom_id.id,
                                                 weight, uom.id)))

                parts["vdat"] = ""

                produkt1 = carrier.dpd_product1
                if not produkt1:
                    produkt1 = "KP"
                    if weight > 3:
                        produkt1 = "NP"

                parts["produkt1"] = produkt1
                parts["produkt2"] = []
                parts["produkt3"] = []
                parts["produkt4"] = []
                parts["produkt5"] = ""
                parts["produkt6"] = []
                parts["produkt7"] = ""

                msgSoapOut = client.service.getLabel(**parts)
                picking_obj = self.pool["stock.picking"]

                # save pdf
                label_url = msgSoapOut.label
                if label_url:
                    carrier_label_name = label_url.split("/")[-1]
                    label_file = urllib2.urlopen(label_url)
                    try:
                        # add page
                        label_pdf.addPage(
                            PdfFileReader(StringIO(
                                label_file.read())).getPage(0))
                    finally:
                        label_file.close()

                # evaluate error
                err_code = msgSoapOut.err_code
                if err_code:
                    carrier_error = err_code
                    foundError = False
                    for err, err_message in self._dpd_errors:
                        if err in err_code:
                            carrier_error = err_message
                            foundError = True

                    if not foundError:
                        h = HTMLParser()
                        carrier_error = h.unescape(carrier_error)

                    _logger.error(carrier_error)
                    carrier_errors.append(carrier_error)
                else:
                    # store ref
                    tracking_refs.append(msgSoapOut.paknr)

            # build label
            carrier_label = None
            if label_pdf.getNumPages() > 0:
                bufPdf = StringIO()
                try:
                    label_pdf.write(bufPdf)
                    carrier_label = base64.encodestring(bufPdf.getvalue())
                finally:
                    bufPdf.close()

            status = None
            carrier_tracking_ref = None

            if not carrier_errors and not test:
                status = "created"

            if not test:
                carrier_tracking_ref = ", ".join(tracking_refs)

            # write data
            picking_obj.write(cr,
                              uid,
                              picking.id, {
                                  "carrier_label_name": carrier_label_name,
                                  "carrier_label": carrier_label,
                                  "carrier_error": "\n".join(carrier_errors),
                                  "carrier_tracking_ref": carrier_tracking_ref,
                                  "carrier_status": status,
                                  "number_of_packages": package_count
                              },
                              context=context)

        except Exception, e:
            self._dpd_error(e)
            raise e
# This is the pyPdf library. Install with: $ pip install pyPdf
from pyPdf import PdfFileWriter, PdfFileReader

# Open a writer object
output = PdfFileWriter() 
# Load the pdf file(s) you want to input pages from
input1 = PdfFileReader(file("test.pdf", "rb"))

# Add some pages from your source/input files to the writer object
output.addPage(input1.getPage(8))
output.addPage(input1.getPage(15))

# For kicks, print the number of pages added to the console
print "number of pages is: %s " % output.getNumPages()

# Explain the kind of file this will be when outputted
outputStream = file("assembled_pdf.pdf", "wb")

# Write the actual output file
output.write(outputStream)

# Close the output file
outputStream.close()
예제 #18
0
파일: merge.py 프로젝트: alvonellos/sandbox
from pyPdf import PdfFileWriter, PdfFileReader
import glob
output = PdfFileWriter()
files = glob.glob(r'./*.pdf')
for stuff in list(sorted(files)):
	input = PdfFileReader(file(str(stuff), "rb"))
	print "processing %s " % (stuff)
	output.addPage(input.getPage(0))
	
print "output has %s pages." % output.getNumPages()

outputStream = file("out.pdf", 'wb')
output.write(outputStream)
outputStream.close()

예제 #19
0
파일: split.py 프로젝트: jwheare/pdf-utils
        
        page2 = output_pdf.addBlankPage(x/2, y)
        page2.mergeTranslatedPage(p, -x/2, 0)
        
        split_count = split_count + 1
    else:
        # Portrait, fine on its own
        output_pdf.addPage(p)
    
    progress.animate(amount=i)
    
progress.animate(amount=pages)

print 'Total pages: %d -> %d ' % (
    pages,
    output_pdf.getNumPages()
)
if split_count:
    print '/-{ %d }-\\ double spreads split apart' % split_count

print 'Writing... here is a bird to keep you company'

def write_file():
    # Write the file
    output_file = file(input_base + '.split' + input_ext, "wb")
    output_pdf.write(output_file)
    output_file.close()

t = threading.Thread(target=write_file)
t.start()