def test_two_on_one_page(self): # Build a document with two pages pdf = PdfFileReader(self.get_pdf_stream()) output = PdfFileWriter() output.addPage(pdf.getPage(0)) output.addPage(pdf.getPage(0)) assert output.getNumPages() == 2 assert output.getPage(0).extractText().count('Test') == 1 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}) rv = self.combine_and_download(pages_sheet='2') pdf_download = PdfFileReader(StringIO(rv.data)) self.assertEquals(pdf_download.getPage(0).extractText().count('Test'), 2) self.assertEquals(pdf_download.getNumPages(), 1) self.clean_up()
def crop(filein): output = PdfFileWriter() input1 = PdfFileReader(file(filein, "rb")) input2 = PdfFileReader(file(filein, "rb")) # print the title of the document print "title = %s" % (input1.getDocumentInfo().title) n = input1.getNumPages() # loop over pages for i in range(n): # p = input1.getPage(i) # print p.cropBox # p.mediaBox.upperRight = ( # p.mediaBox.getUpperRight_x() / 2, # p.mediaBox.getUpperRight_y() / 2 # ) w, h = 359, 269 slide1 = input1.getPage(i) rect = lambda x, y: generic.RectangleObject([x, y, x+w, y+h]) slide1.mediaBox = rect(117, 462) output.addPage(slide1) slide2 = input2.getPage(i) slide2.mediaBox = rect(117, 111) output.addPage(slide2) # write output fileout = "%s-cropped%s" % os.path.splitext(filein) outputStream = file(fileout, "wb") output.write(outputStream) outputStream.close() # print some information n2 = output.getNumPages() print "%s has %s pages." % (filein, n) print "%s has %s pages." % (fileout, n2)
def test_two_on_one_page(self): # Build a document with two pages pdf = PdfFileReader(self.get_pdf_stream()) output = PdfFileWriter() output.addPage(pdf.getPage(0)) output.addPage(pdf.getPage(0)) assert output.getNumPages() == 2 assert output.getPage(0).extractText().count('Test') == 1 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}) rv = self.combine_and_download(pages_sheet='2') pdf_download = PdfFileReader(StringIO(rv.data)) self.assertEquals( pdf_download.getPage(0).extractText().count('Test'), 2) self.assertEquals(pdf_download.getNumPages(), 1) self.clean_up()
def compact(filein): output = PdfFileWriter() input1 = PdfFileReader(file(filein, "rb")) # print the title of the document print "title = %s" % (input1.getDocumentInfo().title) n = input1.getNumPages() # loop over pages for i in range(1, n): curr = input1.getPage(i) prev = input1.getPage(i - 1) currTxt = curr.extractText()[:-3] prevTxt = prev.extractText()[:-3] if currTxt.find(prevTxt) == 0: # prevTxt is prefix of currTxt pass # the current page is an extension to the previous one -> continue else: output.addPage( prev) # current page is something new -> save latest old one output.addPage(input1.getPage(n - 1)) # add last page # write output fileout = "%s-compact%s" % os.path.splitext(filein) outputStream = file(fileout, "wb") output.write(outputStream) outputStream.close() # print some information n2 = output.getNumPages() print "%s has %s pages." % (filein, n) print "%s has %s pages." % (fileout, n2) print "-> removed %s pages\n" % (n - n2)
def consume_files(filelist): i = 0 while (len(filelist) > 0): output = PdfFileWriter() merge_pages_better(output, filelist) outputstream = file(PREFIX_STR+str(i)+'.pdf', 'wb') print "%s%d.pdf has %d pages" % (PREFIX_STR, i, output.getNumPages()) i = i + 1 output.write(outputstream) outputstream.close() return 1
def merge_pdf(url_list): # Download each PDF and merge them into one giant PDF, post this giant PDF to anonfiles.com, add URL to scraperwiki database output = PdfFileWriter() for url in url_list: if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/083.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/83.pdf" url[1] = "83.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/039.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/39.pdf" url[1] = "39.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/021.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/21.pdf" url[1] = "21.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/016S.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/16S.pdf" url[1] = "16S.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/015.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/15.pdf" url[1] = "15.pdf" pdf_file = os.system("wget %s" % url[0]) input1 = PdfFileReader(file('/tmp/%s' % url[1], "rb")) numPages = input1.getNumPages() print "number of pages = %s" % (numPages) page1 = input1.getPage(0) page2 = input1.getPage(1) output.addPage(page1) output.addPage(page2) if numPages == 3: page3 = input1.getPage(2) output.addPage(page3) final_page_count = output.getNumPages() print "Number of Pages in Final = %s" % (final_page_count) outputStream = file("/tmp/bus.pdf", "wb") output.write(outputStream) outputStream.close() reply = os.system('curl -kF "[email protected];filename=bus.pdf" https://anonfiles.com/api/hotlink -o "reply.txt"') with open('reply.txt', 'r') as f: read_data = f.read() data_dict = { 'Title':'Link to COTA Bus Schedule', 'URL':read_data, } scraperwiki.sqlite.save(unique_keys=['Title', 'URL'], data=data_dict)
def splitPDF(inputpdf, output_fn, start, end): """ from the input pdf creates a new pdf file with the name output_fn containing only the pages of the pdf from start to end """ outputpdf = PdfFileWriter() print 'added pages ', for i in xrange(start, end): outputpdf.addPage(inputpdf.getPage(i)) print str(i) + ',', print 'finished. printed', outputpdf.getNumPages(), 'pages' outstream = file(output_fn, 'wb') outputpdf.write(outstream) outstream.close()
def merge_pdfs(output_name, files): """ Merges files in the order given. Make sure to sort first.""" output = PdfFileWriter() for f in files: try: i = PdfFileReader(file(f, "rb")) except IOError as e: print(e) except PdfReadError as e: print(e) else: for p in i.pages: output.addPage(p) if output.getNumPages(): ostream = file(output_name, "wb") output.write(ostream) ostream.close()
def test_page_ranges(self): rv = self.app.get('/') self.assertEquals(rv.status_code, 200) pdf = PdfFileReader(self.get_pdf_stream()) # Build a document with twenty pages output = PdfFileWriter() page = pdf.getPage(0) for i in range(1, 21): new_page = CombineTestCase.replace_text(page, 'Test', 'Test %d' % i) output.addPage(new_page) assert output.getNumPages() == 20 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}, follow_redirects=True) ids = self.extract_ids_from_main_page(rv.data) pages_kw = 'pages_%d' % ids[0] rv = self.combine_and_download(**{pages_kw: '-5, 10, 12-14, 18-'}) pdf_download = PdfFileReader(StringIO(rv.data)) # Test ranges ranges = [1, 2, 3, 4, 5, 10, 12, 13, 14, 18, 19, 20] self.assert_( all((('Test %d' % page) in pdf_download.getPage(i).extractText()) for i, page in enumerate(ranges))) self.assertEquals(pdf_download.getNumPages(), len(ranges)) self.clean_up()
def test_page_ranges(self): rv = self.app.get('/') self.assertEquals(rv.status_code, 200) pdf = PdfFileReader(self.get_pdf_stream()) # Build a document with twenty pages output = PdfFileWriter() page = pdf.getPage(0) for i in range(1, 21): new_page = CombineTestCase.replace_text(page, 'Test', 'Test %d' % i) output.addPage(new_page) assert output.getNumPages() == 20 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}, follow_redirects=True) ids = self.extract_ids_from_main_page(rv.data) pages_kw = 'pages_%d' % ids[0] rv = self.combine_and_download(**{pages_kw: '-5, 10, 12-14, 18-'}) pdf_download = PdfFileReader(StringIO(rv.data)) # Test ranges ranges = [1, 2, 3, 4, 5, 10, 12, 13, 14, 18, 19, 20] self.assert_(all( (('Test %d' % page) in pdf_download.getPage(i).extractText()) for i, page in enumerate(ranges))) self.assertEquals(pdf_download.getNumPages(), len(ranges)) self.clean_up()
batchnames.append("batch"+str(i)+".pdf") # Loop through HouseDistricts for i in range(1,100): output = PdfFileWriter() outputfilename = "pdfs/HD"+str(i)+".pdf" for b in batchnames: input_pdf = PdfFileReader(file("pdfs/"+b, "rb")) with open('csvs/district_data.csv', 'rb') as csvfile: district_reader = csv.reader(csvfile, delimiter=',', quotechar='"') for row in district_reader: if row[0] == b: if row[9]== str(i): output.addPage(input_pdf.getPage(int(row[1])-1)) if output.getNumPages() >0: outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() print "There are %s form letters from %s " % (output.getNumPages(), outputfilename) # Loop through SenateDistricts for i in range(1,34): output = PdfFileWriter() outputfilename = "pdfs/SD"+str(i)+".pdf" for b in batchnames: input_pdf = PdfFileReader(file("pdfs/"+b, "rb")) with open('csvs/district_data.csv', 'rb') as csvfile: district_reader = csv.reader(csvfile, delimiter=',', quotechar='"') for row in district_reader:
# This is the pyPdf library. Install with: $ pip install pyPdf from pyPdf import PdfFileWriter, PdfFileReader # Open a writer object output = PdfFileWriter() # Load the pdf file(s) you want to input pages from input1 = PdfFileReader(file("test.pdf", "rb")) # Add some pages from your source/input files to the writer object output.addPage(input1.getPage(8)) output.addPage(input1.getPage(15)) # For kicks, print the number of pages added to the console print "number of pages is: %s " % output.getNumPages() # Explain the kind of file this will be when outputted outputStream = file("assembled_pdf.pdf", "wb") # Write the actual output file output.write(outputStream) # Close the output file outputStream.close()
for x in range(len(INPUT)) : hackfile = 'editionHack/hack{0}.pdf'.format(x+OFFSET) goodfile = 'editionHack/'+INPUT[x]+".pdf" output = PdfFileWriter() input1 = PdfFileReader(file(goodfile, "rb")) page1 = input1.getPage(0) watermark = PdfFileReader(file(hackfile, "rb")) page1.mergePage(watermark.getPage(0)) output.addPage(page1) outputStream = file("editionHack/finalPage{0}.pdf".format(x+OFFSET), "wb") print "finalPage{0}.pdf".format(x+OFFSET) output.write(outputStream) outputStream.close() output = PdfFileWriter() for x in range(3,11) : input1 = PdfFileReader(file('editionHack/finalPage{0}.pdf'.format(x), "rb")) output.addPage(input1.getPage(0)) input1 = PdfFileReader(file('sitOzfarsWysr_a4.pdf', "rb")) for x in range(input1.getNumPages()) : output.addPage(input1.getPage(x)) if output.getNumPages() % 2 == 1 : input1 = PdfFileReader(file('editionHack/blankPage.pdf', "rb")) output.addPage(input1.getPage(0)) outputStream = file("editionHack/sowInterior.pdf".format(x), "wb") output.write(outputStream) outputStream.close()
}) # Add It output._addObject(properties) # Add back the one we deleted first output._addObject(item) address = PdfFileReader(file("input\\address\\"+fname, "rb")) location = PdfFileReader(file("input\\location\\"+fname, "rb")) print "Location Pages:", location.numPages print "Address Pages:", address.numPages if location.numPages > 1 : print "Location has more than 1 page, exiting..." sys.exit() page = location.getPage(0) for i in xrange(address.numPages): #print ".", output.addPage(address.getPage(i)) output.addPage(page) # output should be double address print "Output Pages:", output.getNumPages() #if output.getNumPages() == (address.numPages *2): if output.getNumPages() > 0: outStream = file("output\\"+fname, "wb") output.write(outStream) outStream.close() print "Generation Complete"
for fn in fnames: for k in range(len(fn)): if len(out_name) - 1 < k: out_name.append([]) if out_name[k].count(fn[k]) < 1: out_name[k].append(fn[k]) #out_name[k].sort() #out_name[k].reverse(); for k in range(len(out_name)): if k == 0: out_name[k] = [datetime.date.today().strftime('%y%m%d')] out_name[k] = '_'.join(out_name[k]) # собираем из отфильтрованных частей конечное имя файла postfix = '_crop' if backpage: postfix = '_backpage' out_name = ('_'.join(out_name) + postfix)[0:251] + '.pdf' print('') print(out_name) print("has %s pages" % output.getNumPages()) print('Saving...') outputStream = file(out_name, "wb") output.write(outputStream) outputStream.close() print('Done')
def _dpd_label_get(self, cr, uid, picking, test=False, context=None): carrier = picking.carrier_id if not carrier or carrier.api != "dpd": raise Warning(_("Invalid carrier type!")) profile = carrier.dpd_profile_id if not profile: raise Warning(_("No DPD Profile defined!")) # check test profile if test: profile = carrier.dpd_test_profile_id if not profile: return True # check packages pack_op_obj = self.pool["stock.pack.operation"] package_count = 0 package_count_all = True # check operation for package count for operation in picking.pack_operation_ids: if operation.qty_done: # check if all operation have a package count # if not add one package for itself if not operation.package_count: if package_count_all: package_count_all = False package_count += 1 else: # add packages package_count += operation.package_count if not package_count: package_count = 1 partner = picking.partner_id client = self._dpd_client_get(context) tracking_refs = [] carrier_errors = [] label_pdf = PdfFileWriter() carrier_label_name = None try: for packageNo in range(0, package_count): parts = {} parts["username"] = profile.user parts["password"] = md5(profile.password).hexdigest() parts["mandant"] = profile.client parts["kdnr"] = partner.ref or "" name = partner.name.strip() bezugsp = partner.street2 and partner.street2.strip() or "" parent_partner = partner.parent_id if not partner.mail_without_company and parent_partner: name = parent_partner.name bezugsp = partner.name.strip() zusatz = "" if len(name) > 48: shortName = name[:48] lastSpacePos = shortName.rfind(" ") if lastSpacePos > 30: lastSpacePos += 1 zusatz = name[lastSpacePos:lastSpacePos + 32] name = shortName[:lastSpacePos] else: zusatz = name[48:80] name = shortName parts["name"] = name parts["zusatz"] = zusatz parts["anschrift"] = partner.street and partner.street.strip( ) or "" parts["plz"] = partner.zip and partner.zip.strip() or "" parts["ort"] = partner.city and partner.city.strip() or "" parts[ "land"] = partner.country_id and partner.country_id.code or "AT" parts["bezugsp"] = bezugsp parts["tel"] = partner.phone or partner.mobile or "" parts["mail"] = partner.email or "" parts["liefernr"] = picking.name or "" parts["pakettyp"] = carrier.dpd_type or "DPD" parts["gewicht"] = "1000" weight = picking.carrier_weight or picking.weight or 0.0 if weight: uom_obj = self.pool["product.uom"] uom_id = uom_obj.search_id( cr, uid, [("category_id", "=", picking.weight_uom_id.category_id.id), '|', ("name", "=", "g"), ("code", "=", "g")]) uom = uom_obj.browse(cr, uid, uom_id, context=context) if not uom: raise Warning(_("No unit gramm found!")) parts["gewicht"] = str( int( uom_obj._compute_qty(cr, uid, picking.weight_uom_id.id, weight, uom.id))) parts["vdat"] = "" produkt1 = carrier.dpd_product1 if not produkt1: produkt1 = "KP" if weight > 3: produkt1 = "NP" parts["produkt1"] = produkt1 parts["produkt2"] = [] parts["produkt3"] = [] parts["produkt4"] = [] parts["produkt5"] = "" parts["produkt6"] = [] parts["produkt7"] = "" msgSoapOut = client.service.getLabel(**parts) picking_obj = self.pool["stock.picking"] # save pdf label_url = msgSoapOut.label if label_url: carrier_label_name = label_url.split("/")[-1] label_file = urllib2.urlopen(label_url) try: # add page label_pdf.addPage( PdfFileReader(StringIO( label_file.read())).getPage(0)) finally: label_file.close() # evaluate error err_code = msgSoapOut.err_code if err_code: carrier_error = err_code foundError = False for err, err_message in self._dpd_errors: if err in err_code: carrier_error = err_message foundError = True if not foundError: h = HTMLParser() carrier_error = h.unescape(carrier_error) _logger.error(carrier_error) carrier_errors.append(carrier_error) else: # store ref tracking_refs.append(msgSoapOut.paknr) # build label carrier_label = None if label_pdf.getNumPages() > 0: bufPdf = StringIO() try: label_pdf.write(bufPdf) carrier_label = base64.encodestring(bufPdf.getvalue()) finally: bufPdf.close() status = None carrier_tracking_ref = None if not carrier_errors and not test: status = "created" if not test: carrier_tracking_ref = ", ".join(tracking_refs) # write data picking_obj.write(cr, uid, picking.id, { "carrier_label_name": carrier_label_name, "carrier_label": carrier_label, "carrier_error": "\n".join(carrier_errors), "carrier_tracking_ref": carrier_tracking_ref, "carrier_status": status, "number_of_packages": package_count }, context=context) except Exception, e: self._dpd_error(e) raise e
from pyPdf import PdfFileWriter, PdfFileReader import glob output = PdfFileWriter() files = glob.glob(r'./*.pdf') for stuff in list(sorted(files)): input = PdfFileReader(file(str(stuff), "rb")) print "processing %s " % (stuff) output.addPage(input.getPage(0)) print "output has %s pages." % output.getNumPages() outputStream = file("out.pdf", 'wb') output.write(outputStream) outputStream.close()
page2 = output_pdf.addBlankPage(x/2, y) page2.mergeTranslatedPage(p, -x/2, 0) split_count = split_count + 1 else: # Portrait, fine on its own output_pdf.addPage(p) progress.animate(amount=i) progress.animate(amount=pages) print 'Total pages: %d -> %d ' % ( pages, output_pdf.getNumPages() ) if split_count: print '/-{ %d }-\\ double spreads split apart' % split_count print 'Writing... here is a bird to keep you company' def write_file(): # Write the file output_file = file(input_base + '.split' + input_ext, "wb") output_pdf.write(output_file) output_file.close() t = threading.Thread(target=write_file) t.start()