def highlights(files, options): """Print notes from highlighted text. All is printed to output or error stream (usually stdout and stderr). Options: * options.recursive Handle directories * options.use_title Print filename/document title instead of full path * options.valid_types PDF annotation types to process * options.filter_keys Only print stated keys. * options.remove_key Don't print key tags * options.with_path Print the file path with each line * options.with_page Print the page number with each line * options.buffered Buffer output * options.list_keys Print key only * options.stdout Output stream * options.stderr Error stream """ for path in files: if os.path.isdir(path): if options.recursive: highlights([os.path.join(path, p) for p in os.listdir(path)], options) continue # Omit directories document = Pdf(path, options, pgm=sys.argv[0]) if options.list_keys: options.remove_key = False for item in document.annotations(options): list_keys(item.note, options) else: for item in document.annotations(options): print_note(item.note, item.page, options)
def test_to_pdf(self): first_html = "<h1>Hola Mundo</h1>" pdf = Pdf(first_html) fh = open('tests/fixtures/hola_mundo.pdf', 'rb') data = fh.read() fh.close() output = pdf.to_pdf(first_html) self.assertPdfEquals(output, data)
def test_get_stream(self): first_html = "<h1>Hola Mundo</h1>" second_html = "<h2>This is sparta</h2>" pdf = Pdf(first_html) pdf.append(second_html) fh = open('tests/fixtures/stream.pdf', 'rb') data = fh.read() fh.close() output = pdf.get_stream() self.assertPdfEquals(output, data)
def main(): pdf_path = path.abspath('cover.pdf') out_folder = path.abspath('out') config_path = path.abspath('config.json') parser = argparse.ArgumentParser() parser.add_argument('cover_type', help='Define the type of cover') args = parser.parse_args() config = Config(config_path, cover_type=args.cover_type) pdf = Pdf(pdf_path) pdf.set_cropbox(config.get_cover_geometry()) img = Img(pdf.cover.name, output_folder=out_folder) for width in config.get_output_width(): img.convert(width)
def execute(self): pdf = Pdf() try: pdf.load(self.config.getInputFilePath()) for effector in self.effectorChain: pdf = effector.apply(pdf) pdf.save(self.config.getOutputFilePath()) finally: pdf.close()
def confirmEmailSend(self): msg = messagebox.askquestion('Send Email', 'Are you sure to send email', icon='warning') if msg == 'yes': re_email = self.email_entry.get() print(re_email.strip()) db = Database() querys = db.fetch_today() db.db_close() pdf = Pdf() pdf.get_pdf(querys) send_email = EmailSend(re_email.strip()) send_email.send_email() messagebox.showinfo('Done','Your email has been sent') self.emailWin.quit() self.emailWin.destroy() else: self.emailWin.quit() self.emailWin.destroy() messagebox.showerror('Cancelled','Your email is not forwarded')
def generar_pdf_concatenado(): id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in = id_retiros) to_text = Traductor().to_text pdf = Pdf() for retiro in retiros: html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() return jsonify(name=titulo) else: abort(404)
def generar_zip_contenedor(): archivos_pdf_generados = [] id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in=id_retiros) to_text = Traductor().to_text for retiro in retiros: pdf = Pdf() html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = models.Retiro.obtener_nombre_por_id(retiro.id) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo + ".pdf") archivos_pdf_generados.append(nombre_archivo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() import zipfile nombre = "recibos_agrupados_%s.zip" % (retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre) zip = zipfile.ZipFile(nombre_archivo, mode='w') for nombre_pdf in archivos_pdf_generados: print "zipeando", [nombre_pdf] zip.write(nombre_pdf) zip.close() return jsonify(name=nombre) else: abort(404)
def generar_pdf_concatenado(): id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in=id_retiros) to_text = Traductor().to_text pdf = Pdf() for retiro in retiros: html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = "recibos_agrupados_%s.pdf" % (retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() return jsonify(name=titulo) else: abort(404)
def generar_zip_contenedor(): archivos_pdf_generados = [] id_retiros = request.form.to_dict(False).get('recibo') if id_retiros: retiros = models.Retiro.select().where(id__in=id_retiros) to_text = Traductor().to_text for retiro in retiros: pdf = Pdf() html = render_template("recibo.html", cooperativa=retiro.socio.cooperativa, retiro=retiro, monto_como_cadena=to_text(retiro.monto)) pdf.append(html) titulo = models.Retiro.obtener_nombre_por_id(retiro.id) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], titulo + ".pdf") archivos_pdf_generados.append(nombre_archivo) archivo_temporal = open(nombre_archivo, 'wb') archivo_temporal.write(pdf.get_stream()) archivo_temporal.close() import zipfile nombre = "recibos_agrupados_%s.zip" %(retiro.fecha_como_string()) nombre_archivo = os.path.join(app.config['UPLOAD_FOLDER'], nombre) zip = zipfile.ZipFile(nombre_archivo, mode='w') for nombre_pdf in archivos_pdf_generados: print "zipeando", [nombre_pdf] zip.write(nombre_pdf) zip.close() return jsonify(name=nombre) else: abort(404)
def __init__(self, app, network_manager, wait_timeout=20, wait_callback=None, viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', log_level=30, download_images=True, plugins_enabled=False, java_enabled=False, create_page_callback=None, is_popup=False, max_resource_queued=None, *args, **kargs): super(GhostWebPage, self).__init__(parent=app) self._app = app self.pdf_engine = Pdf() self.http_resources = [] self.http_resource_page = None self.max_resource_queued = max_resource_queued self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True self.create_page_callback = create_page_callback self.is_popup = is_popup # Internal library object self.ghostInit = GhostInit() self.setForwardUnsupportedContent(True) self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptCanOpenWindows, True) self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.loadFinished.connect(self._page_loaded) self.loadStarted.connect(self._page_load_started) self.loadProgress.connect(self._page_load_progress) self.unsupportedContent.connect(self._unsupported_content) self.network_manager = network_manager self.setNetworkAccessManager(self.network_manager) self.network_manager.finished.connect(self._request_ended) # User Agent self.setUserAgent(user_agent) self.main_frame = self.mainFrame() self._unsupported_files = {} self.windowCloseRequested.connect(self._closeWindow) logger.setLevel(log_level)
def pdf_split(): pdf = Pdf(title='PDF 文档分割', mode='split') pdf.construct_gui()
def test_append(self): first_html = "<h1>Hola Mundo</h1>" second_html = "<h2>This is sparta</h2>" pdf = Pdf(first_html) pdf.append(second_html) self.assertEquals(pdf.htmls, [first_html, second_html])
def anedit(path, target, options): """Edit notes from highlights in PDF files. All is printed to standard input or standard error. Options: * options.valid_types PDF annotation types to process * options.use_title Print filename/document title instead of filename * options.filter_keys Only print stated keys. * options.remove_key Don't print key tags * options.verbose Print varnings """ # FIXME: Who guarantees this method is only executed on valid files? # Also check for pusher, hillieo, hilliep, ... # wordlist for normalization wordlist = Dictionary() # open document document = Pdf(path, options, pgm=sys.argv[0]) # fetch notes notes = [] for n_annot, item in enumerate(document.annotations(options)): sugg = annotation_fixes(item.note, wordlist, options.verbose) notes.append((item, sugg)) # Walk through notes has_changes = False while len(notes) > 0: item, sugg = notes.pop(0) print "" print "\033[94m> {}: page {}, ETA {}\033[0m".format( item.page[0], item.page[1], len(notes)) print "Original: ", item.note if item.note != sugg: print "Suggested:", sugg elif options.diffs: continue valid_answers = 'nyecisq?' prompt = '[{}]: '.format('/'.join(valid_answers.title())) ans = 'NEIN' while ans not in valid_answers: ans = raw_input(prompt) \ .strip() \ .lower() \ .replace('yes', 'y') \ .replace('no', 'n') \ .replace('quit', 'q') \ .replace('ignore', 'i') \ .replace('ign', 'i') \ .replace('edit', 'e') \ .replace('correct', 'c') \ .replace('skip', 's') if ans == '': ans = valid_answers[0] # default is 'n' if ans == '?': ans = 'NEIN' print '''Usage: n no Stick with the original text (the default) y yes Accept the suggested text e edit Edit the original text c change Edit the suggested text i ignore Ignore for now (again prompted later) s skip Save and exit q quit Abort and exit (changes are lost) ''' if ans == 'y': # Use suggestion has_changes = True if item.key is None: item.set_content(sugg) else: item.set_content('<{}>{}</{}>'.format(item.key, sugg, item.key)) elif ans == 'n': # Use original pass elif ans in ('e', 'c'): # Edit manually def hook(): curr = ans == 'e' and item.note or sugg curr = curr.replace('\n', '\\n') readline.insert_text(curr) readline.redisplay() readline.set_pre_input_hook(hook) sugg = raw_input().strip().replace('\\n', '\n') readline.set_pre_input_hook(None) notes.insert(0, (item, sugg)) elif ans == 'i': # Ignore note for now notes.append((item, sugg)) elif ans == 'q': # Quit immediately, don't save return elif ans == 's': # Skip the rest break # save changes if has_changes: document.save(target, options)
def get_json_from_file(file_path): with open(file_path) as f: return json.load(f) if __name__ == '__main__': if len(sys.argv) != 4: sys.stderr.write('Argument count is invalid!\n') exit(1) current_pdf_file_path = sys.argv[1] old_generated_JSON_file_path = sys.argv[2] new_generated_JSON_file_path = sys.argv[3] new_pdf = Pdf(current_pdf_file_path) old_json = get_json_from_file(old_generated_JSON_file_path) # Exit if PDF file is the same as the last time. new_pdf_hash = new_pdf.get_pdf_hash() old_pdf_hash = old_json["referenced_pdf_hash"] if new_pdf_hash == old_pdf_hash: print("PDF file hasn't changed since the last time. Exiting.") exit(1) new_addresses = new_pdf.get_addresses() old_addresses = {Address(**address) for address in old_json["addresses"]} # Google Geocoding calls are pretty expensive, so we don't really want to query every address every time. addresses_to_be_removed = old_addresses - new_addresses
def pdf_watermark(): pdf = Pdf(title='PDF 文档加水印', mode='watermark') pdf.construct_gui()
def pdf_encrypt(): pdf = Pdf(title='PDF 文档加密', mode='encrypt') pdf.construct_gui()
parser.add_argument('--templatify-forms-whitelist', '--tfw', default='') parser.add_argument('--templatify-forms-uniquifier', '--tfu') parser.add_argument('--templatify-forms-padding', '--tfp', type=int, default=80) parser.add_argument('--templatify-forms-custom-padding', '--tfcp', default='{}') parser.add_argument('--templatify-forms-remove-dv', '--tfrd', action='store_true') parser.add_argument('--save', '-s') args = parser.parse_args() pdf = Pdf().load(args.pdf) if args.compare: other = Pdf().load(args.compare) for k, v in pdf.objects.items(): if k not in other.objects: print('===== {} ===== missing from other'.format(k)) if other.objects[k] != v: print(('===== {} =====\n' '{}\n' '\n' '===== other =====\n' '{}').format(k, v, other.objects[k])) for k in other.objects.keys(): if k not in pdf.objects: print('===== {} ===== missing'.format(k))
class GhostWebPage(QWebPage): """Overrides QtWebKit.QWebPage in order to intercept some graphical behaviours like alert(), confirm(). Also intercepts client side console.log(). :param app: a QApplication that it's running Ghost. :param network_manager: a NetworkManager instance in charge of managing all the network requests. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param viewport_size: A tupple that sets initial viewport size. :param user_agent: The default User-Agent header. :param log_level: The optional logging level. :param download_images: Indicate if the browser download or not the images :param plugins_enabled: Enable plugins (like Flash). :param java_enabled: Enable Java JRE. :param create_page_callback: A method called when a popup it's opened :param is_popup: Boolean who indicate if the page it's a popup :param max_resource_queued: Indicates witch it's the max number of resources that can be saved in memory. If None then no limits are applied. If 0 then no resources are kept/ If the number it's > 0 then the number of resources won't be more than max_resource_queued """ user_agent = "" removeWindowFromList = pyqtSignal(object) _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__( self, app, network_manager, wait_timeout=20, wait_callback=None, viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', log_level=30, download_images=True, plugins_enabled=False, java_enabled=False, create_page_callback=None, is_popup=False, max_resource_queued=None, *args, **kargs): super(GhostWebPage, self).__init__(parent=app) self._app = app self.pdf_engine = Pdf() self.http_resources = [] self.http_resource_page = None self.max_resource_queued = max_resource_queued self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True self.create_page_callback = create_page_callback self.is_popup = is_popup # Internal library object self.ghostInit = GhostInit() self.setForwardUnsupportedContent(True) self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True) self.settings().setAttribute( QtWebKit.QWebSettings.JavascriptCanOpenWindows, True) self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.loadFinished.connect(self._page_loaded) self.loadStarted.connect(self._page_load_started) self.loadProgress.connect(self._page_load_progress) self.unsupportedContent.connect(self._unsupported_content) self.network_manager = network_manager self.setNetworkAccessManager(self.network_manager) self.network_manager.finished.connect(self._request_ended) # User Agent self.setUserAgent(user_agent) self.main_frame = self.mainFrame() self._unsupported_files = {} self.windowCloseRequested.connect(self._closeWindow) logger.setLevel(log_level) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A bollean that confirm. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): GhostWebPage._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): GhostWebPage._confirm_expected = None class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): GhostWebPage._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): GhostWebPage._prompt_expected = None def chooseFile(self, frame, suggested_file=None): return self._upload_file def javaScriptConsoleMessage(self, message, line, source): """Prints client console message in current output stream.""" super(GhostWebPage, self).javaScriptConsoleMessage(message, line, source) log_type = "error" if "Error" in message else "info" Logger.log("%s(%d): %s" % (source or '<unknown>', line, message), sender="Frame", level=log_type) def javaScriptAlert(self, frame, message): """Notifies ghost for alert, then pass.""" self._alert = message Logger.log("alert('%s')" % message, sender="Frame") def javaScriptConfirm(self, frame, message): """Checks if ghost is waiting for confirm, then returns the right value. """ if GhostWebPage._confirm_expected is None: raise Exception('You must specified a value to confirm "%s"' % message) confirmation, callback = GhostWebPage._confirm_expected GhostWebPage._confirm_expected = None Logger.log("confirm('%s')" % message, sender="Frame") if callback is not None: return callback() return confirmation def javaScriptPrompt(self, frame, message, defaultValue, result=None): """Checks if ghost is waiting for prompt, then enters the right value. """ if GhostWebPage._prompt_expected is None: raise Exception('You must specified a value for prompt "%s"' % message) result_value, callback = GhostWebPage._prompt_expected Logger.log("prompt('%s')" % message, sender="Frame") if callback is not None: result_value = callback() if result_value == '': Logger.log("'%s' prompt filled with empty string" % message, level='warning') GhostWebPage._prompt_expected = None if result is None: # PySide return True, result_value result.append(result_value) return True def setUserAgent(self, user_agent): self.user_agent = user_agent def userAgentForUrl(self, url): return self.user_agent def acceptNavigationRequest(self, frame, request, ttype): self._lastUrl = request.url() return True def createWindow(self, ttype): page = None if self.create_page_callback is not None: page, name = self.create_page_callback(is_popup=True) page.open(self._lastUrl) return page def _closeWindow(self): #if self._main_window is not None: self.removeWindowFromList.emit(self) def switch_to_sub_window(self, index): """Change the focus to the sub window (popup) :param index: The index of the window, in the order that the window was opened """ if len(self._windows) > index: self._windows[index].mainFrame().setFocus() return self._windows[index] return None def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.currentFrame().render(painter) painter.end() image = image.copy(x1, y1, w, h) else: self.currentFrame().setScrollBarPolicy( QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff) self.currentFrame().setScrollBarPolicy( QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff) self.setViewportSize(self.currentFrame().contentsSize()) image = QImage(self.viewportSize(), format) painter = QPainter(image) self.currentFrame().render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. The available formats can be found here http://qt-project.org/doc/qt-4.8/qimage.html#Format-enum There is also a "pdf" format that will render the page into a pdf file """ if str(format).startswith("pdf"): return self.pdf_engine.render_pdf(self, path) else: self.capture(region=region, format=format, selector=selector).save(path) @client_utils_required def region_for_selector(self, selector): """Returns frame region for given selector as tupple. :param selector: The targeted element. """ geo = self.currentFrame().findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Exception("can't get region for selector '%s'" % selector) return region @client_utils_required @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Exception("Can't find element to click") return self.evaluate('GhostUtils.click("%s");' % selector) @property def content(self): """Returns main_frame HTML as a string.""" return unicode(self.main_frame.toHtml()) def get_current_frame_content(self): """Returns current frame HTML as a string.""" return unicode(self.currentFrame().toHtml()) @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return self.currentFrame().evaluateJavaScript("%s" % script) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.currentFrame().findFirstElement(selector).isNull() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Exception("Can't find form") for field in values: self.set_field_value("%s [name=%s]" % (selector, field), values[field]) return True @client_utils_required @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('GhostUtils.fireOn("%s", "%s");' % (selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name) def _reset_for_loading(self): """Prepare GhostWebPage to load a new url into the Main Frame """ self.http_resources = [] self.http_resource_page = None self.loaded = False def open(self, address, method='get', headers={}, auth=None, wait_onload_event=True, wait_for_loading=True): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tupple of HTTP auth (username, password). :param wait_onload_event: If it's set to True waits until the OnLoad event from the main page is fired. Otherwise wait until the Dom is ready. :param wait_for_loading: If True waits until the page is Loaded. Note that wait_onload_event isn't valid if wait_for_loading is False. :return: Page resource, All loaded resources. """ if not wait_onload_event: if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) == 0: self.ghostInit.dom_is_ready.connect(self._page_loaded) Logger.log("Waiting until OnReady event is fired") else: if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) > 0: self.ghostInit.dom_is_ready.disconnect(self._page_loaded) #Logger.log("Waiting until OnLoad event is fired") body = QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(QNetworkRequest.AlwaysNetwork) for header in headers: request.setRawHeader(header, headers[header]) if auth is not None: self.network_manager.setAuthCredentials(auth[0], auth[1]) self._reset_for_loading() self.main_frame.load(request, method, body) if not wait_for_loading: return self.get_loaded_page() return self.wait_for_page_loaded() def download(self, path, address, **kwards): page = self.open(address, **kwards) with open(path, "wb") as f: f.write(page.content) return page @can_load_page @client_utils_required def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_text_value(selector, value): return self.evaluate('document.querySelector("%s").value=%s;' % (selector, json.dumps(value))) res, resources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Exception('can\'t find element for %s"' % selector) self.fire_on(selector, 'focus') if element.tagName() in ["TEXTAREA", "SELECT"]: res = _set_text_value(selector, value) elif element.tagName() == "INPUT": if element.attribute('type') in [ "color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week" ]: res = _set_text_value(selector, value) elif element.attribute('type') == "checkbox": res = self.evaluate('GhostUtils.setCheckboxValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "radio": res = self.evaluate('GhostUtils.setRadioValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "file": self._upload_file = value res = self.click(selector) self._upload_file = None else: raise Exception('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.setViewportSize(QSize(width, height)) def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise Exception(timeout_message) time.sleep(0.01) self._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: self._alert is not None, 'User has not been alerted.') msg = self._alert self._alert = None return msg def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for( lambda: self.loaded and len(self._unsupported_files.keys()) == 0, 'Unable to load requested page') return self.get_loaded_page() def get_loaded_page(self): if self.loaded and len(self._unsupported_files.keys()) == 0: return self.http_resource_page return None def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.currentFrame().toPlainText(), 'Can\'t find "%s" in current frame' % text) return True def _page_load_progress(self, progress): pass def _page_loaded(self, ok): """Called back when page is loaded. """ # FIXME: Check why ok == False when we are trying to load # unsupported content self.loaded = True def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def release_last_resources(self): return self._release_last_resources() def _insert_dom_ready_code(self): self.mainFrame().addToJavaScriptWindowObject("GhostInit", self.ghostInit) #self.page.mainFrame().addToJavaScriptWindowObject("ghost_frame", self.page.mainFrame()); self.evaluate_js_file( os.path.join(os.path.dirname(__file__), 'domready.js')) def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.url() == self.currentFrame().url(): Logger.log("Injecting DOMReady code") self._insert_dom_ready_code() content = None if unicode(reply.url()) in self._unsupported_files: del self._unsupported_files[unicode(reply.url())] content = reply.readAll() if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): cache = self.network_manager.cache() http_resource = HttpResource(reply, cache, content) if self.http_resource_page is None: self.http_resource_page = http_resource if self.max_resource_queued is None or self.max_resource_queued > 0: self.http_resources.append(http_resource) if self.max_resource_queued is not None and \ len(self.http_resources) > self.max_resource_queued: self.http_resources.pop(0) #self._del_resources() def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ self._unsupported_files[unicode(reply.url())] = reply def switch_to_frame(self, frameName=None): """Change the focus to the indicated frame :param frameName: The name of the frame """ if frameName is None: self.main_frame.setFocus() return True for frame in self.currentFrame().childFrames(): if frame.frameName() == frameName: frame.setFocus() return True return False def switch_to_frame_nro(self, nro=-1): """Change the focus to the indicated frame :param nro: Number of the frame """ if nro == -1: self.main_frame.setFocus() frames = self.currentFrame().childFrames() if len(frames) <= (nro + 1): frames[nro].setFocus() return nro is None or len(frames) < nro @property def cookies(self): """Returns all cookies.""" return self.network_manager.cookieJar().allCookies() def delete_cookies(self): """Deletes all cookies.""" self.network_manager.cookieJar().setAllCookies([]) def delete_cache(self): self.network_manager.cache().clear() def load_cookies(self, cookie_storage, keep_old=False): """load from cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string on disk or CookieJar instance. :param keep_old: Don't reset, keep cookies not overriden. """ def toQtCookieJar(pyCookieJar, qtCookieJar): all_cookies = qtCookieJar.cookies if keep_old else [] for pc in pyCookieJar: qc = toQtCookie(pc) all_cookies.append(qc) qtCookieJar.setAllCookies(all_cookies) def toQtCookie(pyCookie): qc = QNetworkCookie(pyCookie.name, pyCookie.value) qc.setSecure(pyCookie.secure) if pyCookie.path_specified: qc.setPath(pyCookie.path) if pyCookie.domain != "": qc.setDomain(pyCookie.domain) if pyCookie.expires != 0: t = QDateTime() t.setTime_t(pyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.network_manager.cookieJar()) elif cookie_storage.__class__.__name__.endswith('CookieJar'): toQtCookieJar(cookie_storage, self.network_manager.cookieJar()) else: raise ValueError, 'unsupported cookie_storage type.' def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(qtCookieJar, pyCookieJar): for c in qtCookieJar.allCookies(): pyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(qtCookie): port = None port_specified = False secure = qtCookie.isSecure() name = str(qtCookie.name()) value = str(qtCookie.value()) v = str(qtCookie.path()) path_specified = bool(v != "") path = v if path_specified else None v = str(qtCookie.domain()) domain_specified = bool(v != "") domain = v domain_initial_dot = v.startswith( '.') if domain_specified else None v = long(qtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.network_manager.cookieJar(), cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar'): toPyCookieJar(self.network_manager.cookieJar(), cookie_storage) else: raise ValueError, 'unsupported cookie_storage type.'
def __init__( self, app, network_manager, wait_timeout=20, wait_callback=None, viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', log_level=30, download_images=True, plugins_enabled=False, java_enabled=False, create_page_callback=None, is_popup=False, max_resource_queued=None, *args, **kargs): super(GhostWebPage, self).__init__(parent=app) self._app = app self.pdf_engine = Pdf() self.http_resources = [] self.http_resource_page = None self.max_resource_queued = max_resource_queued self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True self.create_page_callback = create_page_callback self.is_popup = is_popup # Internal library object self.ghostInit = GhostInit() self.setForwardUnsupportedContent(True) self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True) self.settings().setAttribute( QtWebKit.QWebSettings.JavascriptCanOpenWindows, True) self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.loadFinished.connect(self._page_loaded) self.loadStarted.connect(self._page_load_started) self.loadProgress.connect(self._page_load_progress) self.unsupportedContent.connect(self._unsupported_content) self.network_manager = network_manager self.setNetworkAccessManager(self.network_manager) self.network_manager.finished.connect(self._request_ended) # User Agent self.setUserAgent(user_agent) self.main_frame = self.mainFrame() self._unsupported_files = {} self.windowCloseRequested.connect(self._closeWindow) logger.setLevel(log_level)
def pdf_merge(): pdf = Pdf(title='PDF 文档合并', mode='merge') pdf.construct_gui()
class GhostWebPage(QWebPage): """Overrides QtWebKit.QWebPage in order to intercept some graphical behaviours like alert(), confirm(). Also intercepts client side console.log(). :param app: a QApplication that it's running Ghost. :param network_manager: a NetworkManager instance in charge of managing all the network requests. :param wait_timeout: Maximum step duration in second. :param wait_callback: An optional callable that is periodically executed until Ghost stops waiting. :param viewport_size: A tupple that sets initial viewport size. :param user_agent: The default User-Agent header. :param log_level: The optional logging level. :param download_images: Indicate if the browser download or not the images :param plugins_enabled: Enable plugins (like Flash). :param java_enabled: Enable Java JRE. :param create_page_callback: A method called when a popup it's opened :param is_popup: Boolean who indicate if the page it's a popup :param max_resource_queued: Indicates witch it's the max number of resources that can be saved in memory. If None then no limits are applied. If 0 then no resources are kept/ If the number it's > 0 then the number of resources won't be more than max_resource_queued """ user_agent = "" removeWindowFromList = pyqtSignal(object) _alert = None _confirm_expected = None _prompt_expected = None _upload_file = None _app = None def __init__(self, app, network_manager, wait_timeout=20, wait_callback=None, viewport_size=(800, 600), user_agent='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2', log_level=30, download_images=True, plugins_enabled=False, java_enabled=False, create_page_callback=None, is_popup=False, max_resource_queued=None, *args, **kargs): super(GhostWebPage, self).__init__(parent=app) self._app = app self.pdf_engine = Pdf() self.http_resources = [] self.http_resource_page = None self.max_resource_queued = max_resource_queued self.wait_timeout = wait_timeout self.wait_callback = wait_callback self.loaded = True self.create_page_callback = create_page_callback self.is_popup = is_popup # Internal library object self.ghostInit = GhostInit() self.setForwardUnsupportedContent(True) self.settings().setAttribute(QtWebKit.QWebSettings.AutoLoadImages, download_images) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptEnabled, True) self.settings().setAttribute(QtWebKit.QWebSettings.JavascriptCanOpenWindows, True) self.settings().setAttribute(QtWebKit.QWebSettings.PluginsEnabled, plugins_enabled) self.settings().setAttribute(QtWebKit.QWebSettings.JavaEnabled, java_enabled) self.set_viewport_size(*viewport_size) # Page signals self.loadFinished.connect(self._page_loaded) self.loadStarted.connect(self._page_load_started) self.loadProgress.connect(self._page_load_progress) self.unsupportedContent.connect(self._unsupported_content) self.network_manager = network_manager self.setNetworkAccessManager(self.network_manager) self.network_manager.finished.connect(self._request_ended) # User Agent self.setUserAgent(user_agent) self.main_frame = self.mainFrame() self._unsupported_files = {} self.windowCloseRequested.connect(self._closeWindow) logger.setLevel(log_level) class confirm: """Statement that tells Ghost how to deal with javascript confirm(). :param confirm: A bollean that confirm. :param callable: A callable that returns a boolean for confirmation. """ def __init__(self, confirm=True, callback=None): self.confirm = confirm self.callback = callback def __enter__(self): GhostWebPage._confirm_expected = (self.confirm, self.callback) def __exit__(self, type, value, traceback): GhostWebPage._confirm_expected = None class prompt: """Statement that tells Ghost how to deal with javascript prompt(). :param value: A string value to fill in prompt. :param callback: A callable that returns the value to fill in. """ def __init__(self, value='', callback=None): self.value = value self.callback = callback def __enter__(self): GhostWebPage._prompt_expected = (self.value, self.callback) def __exit__(self, type, value, traceback): GhostWebPage._prompt_expected = None def chooseFile(self, frame, suggested_file=None): return self._upload_file def javaScriptConsoleMessage(self, message, line, source): """Prints client console message in current output stream.""" super(GhostWebPage, self).javaScriptConsoleMessage(message, line, source) log_type = "error" if "Error" in message else "info" Logger.log("%s(%d): %s" % (source or '<unknown>', line, message), sender="Frame", level=log_type) def javaScriptAlert(self, frame, message): """Notifies ghost for alert, then pass.""" self._alert = message Logger.log("alert('%s')" % message, sender="Frame") def javaScriptConfirm(self, frame, message): """Checks if ghost is waiting for confirm, then returns the right value. """ if GhostWebPage._confirm_expected is None: raise Exception('You must specified a value to confirm "%s"' % message) confirmation, callback = GhostWebPage._confirm_expected GhostWebPage._confirm_expected = None Logger.log("confirm('%s')" % message, sender="Frame") if callback is not None: return callback() return confirmation def javaScriptPrompt(self, frame, message, defaultValue, result=None): """Checks if ghost is waiting for prompt, then enters the right value. """ if GhostWebPage._prompt_expected is None: raise Exception('You must specified a value for prompt "%s"' % message) result_value, callback = GhostWebPage._prompt_expected Logger.log("prompt('%s')" % message, sender="Frame") if callback is not None: result_value = callback() if result_value == '': Logger.log("'%s' prompt filled with empty string" % message, level='warning') GhostWebPage._prompt_expected = None if result is None: # PySide return True, result_value result.append(result_value) return True def setUserAgent(self, user_agent): self.user_agent = user_agent def userAgentForUrl(self, url): return self.user_agent def acceptNavigationRequest(self, frame, request, ttype): self._lastUrl = request.url() return True def createWindow(self, ttype): page = None if self.create_page_callback is not None: page, name = self.create_page_callback(is_popup=True) page.open(self._lastUrl) return page def _closeWindow(self): #if self._main_window is not None: self.removeWindowFromList.emit(self) def switch_to_sub_window(self, index): """Change the focus to the sub window (popup) :param index: The index of the window, in the order that the window was opened """ if len(self._windows) > index: self._windows[index].mainFrame().setFocus() return self._windows[index] return None def capture(self, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Returns snapshot as QImage. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. """ if region is None and selector is not None: region = self.region_for_selector(selector) if region: x1, y1, x2, y2 = region w, h = (x2 - x1), (y2 - y1) image = QImage(QSize(x2, y2), format) painter = QPainter(image) self.currentFrame().render(painter) painter.end() image = image.copy(x1, y1, w, h) else: self.currentFrame().setScrollBarPolicy(QtCore.Qt.Vertical, QtCore.Qt.ScrollBarAlwaysOff) self.currentFrame().setScrollBarPolicy(QtCore.Qt.Horizontal, QtCore.Qt.ScrollBarAlwaysOff) self.setViewportSize(self.currentFrame().contentsSize()) image = QImage(self.viewportSize(), format) painter = QPainter(image) self.currentFrame().render(painter) painter.end() return image def capture_to(self, path, region=None, selector=None, format=QImage.Format_ARGB32_Premultiplied): """Saves snapshot as image. :param path: The destination path. :param region: An optional tupple containing region as pixel coodinates. :param selector: A selector targeted the element to crop on. :param format: The output image format. The available formats can be found here http://qt-project.org/doc/qt-4.8/qimage.html#Format-enum There is also a "pdf" format that will render the page into a pdf file """ if str(format).startswith("pdf"): return self.pdf_engine.render_pdf(self, path) else: self.capture(region=region, format=format, selector=selector).save(path) @client_utils_required def region_for_selector(self, selector): """Returns frame region for given selector as tupple. :param selector: The targeted element. """ geo = self.currentFrame().findFirstElement(selector).geometry() try: region = (geo.left(), geo.top(), geo.right(), geo.bottom()) except: raise Exception("can't get region for selector '%s'" % selector) return region @client_utils_required @can_load_page def click(self, selector): """Click the targeted element. :param selector: A CSS3 selector to targeted element. """ if not self.exists(selector): raise Exception("Can't find element to click") return self.evaluate('GhostUtils.click("%s");' % selector) @property def content(self): """Returns main_frame HTML as a string.""" return unicode(self.main_frame.toHtml()) def get_current_frame_content(self): """Returns current frame HTML as a string.""" return unicode(self.currentFrame().toHtml()) @can_load_page def evaluate(self, script): """Evaluates script in page frame. :param script: The script to evaluate. """ return self.currentFrame().evaluateJavaScript("%s" % script) def evaluate_js_file(self, path, encoding='utf-8'): """Evaluates javascript file at given path in current frame. Raises native IOException in case of invalid file. :param path: The path of the file. :param encoding: The file's encoding. """ self.evaluate(codecs.open(path, encoding=encoding).read()) def exists(self, selector): """Checks if element exists for given selector. :param string: The element selector. """ return not self.currentFrame().findFirstElement(selector).isNull() @can_load_page def fill(self, selector, values): """Fills a form with provided values. :param selector: A CSS selector to the target form to fill. :param values: A dict containing the values. """ if not self.exists(selector): raise Exception("Can't find form") for field in values: self.set_field_value("%s [name=%s]" % (selector, field), values[field]) return True @client_utils_required @can_load_page def fire_on(self, selector, method): """Call method on element matching given selector. :param selector: A CSS selector to the target element. :param method: The name of the method to fire. :param expect_loading: Specifies if a page loading is expected. """ return self.evaluate('GhostUtils.fireOn("%s", "%s");' % ( selector, method)) def global_exists(self, global_name): """Checks if javascript global exists. :param global_name: The name of the global. """ return self.evaluate('!(typeof %s === "undefined");' % global_name) def _reset_for_loading(self): """Prepare GhostWebPage to load a new url into the Main Frame """ self.http_resources = [] self.http_resource_page = None self.loaded = False def open(self, address, method='get', headers={}, auth=None, wait_onload_event=True, wait_for_loading=True): """Opens a web page. :param address: The resource URL. :param method: The Http method. :param headers: An optional dict of extra request hearders. :param auth: An optional tupple of HTTP auth (username, password). :param wait_onload_event: If it's set to True waits until the OnLoad event from the main page is fired. Otherwise wait until the Dom is ready. :param wait_for_loading: If True waits until the page is Loaded. Note that wait_onload_event isn't valid if wait_for_loading is False. :return: Page resource, All loaded resources. """ if not wait_onload_event: if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) == 0: self.ghostInit.dom_is_ready.connect(self._page_loaded) Logger.log("Waiting until OnReady event is fired") else: if self.ghostInit.receivers(SIGNAL("dom_is_ready(bool)")) > 0: self.ghostInit.dom_is_ready.disconnect(self._page_loaded) #Logger.log("Waiting until OnLoad event is fired") body = QByteArray() try: method = getattr(QNetworkAccessManager, "%sOperation" % method.capitalize()) except AttributeError: raise Exception("Invalid http method %s" % method) request = QNetworkRequest(QUrl(address)) request.CacheLoadControl(QNetworkRequest.AlwaysNetwork) for header in headers: request.setRawHeader(header, headers[header]) if auth is not None: self.network_manager.setAuthCredentials(auth[0], auth[1]) self._reset_for_loading() self.main_frame.load(request, method, body) if not wait_for_loading: return self.get_loaded_page() return self.wait_for_page_loaded() def download(self, path, address, **kwards): page = self.open(address, **kwards) with open(path, "wb") as f: f.write(page.content) return page @can_load_page @client_utils_required def set_field_value(self, selector, value, blur=True): """Sets the value of the field matched by given selector. :param selector: A CSS selector that target the field. :param value: The value to fill in. :param blur: An optional boolean that force blur when filled in. """ def _set_text_value(selector, value): return self.evaluate( 'document.querySelector("%s").value=%s;' % (selector, json.dumps(value))) res, resources = None, [] element = self.main_frame.findFirstElement(selector) if element.isNull(): raise Exception('can\'t find element for %s"' % selector) self.fire_on(selector, 'focus') if element.tagName() in ["TEXTAREA", "SELECT"]: res = _set_text_value(selector, value) elif element.tagName() == "INPUT": if element.attribute('type') in ["color", "date", "datetime", "datetime-local", "email", "hidden", "month", "number", "password", "range", "search", "tel", "text", "time", "url", "week"]: res = _set_text_value(selector, value) elif element.attribute('type') == "checkbox": res = self.evaluate( 'GhostUtils.setCheckboxValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "radio": res = self.evaluate( 'GhostUtils.setRadioValue("%s", %s);' % (selector, json.dumps(value))) elif element.attribute('type') == "file": self._upload_file = value res = self.click(selector) self._upload_file = None else: raise Exception('unsuported field tag') if blur: self.fire_on(selector, 'blur') return res def set_viewport_size(self, width, height): """Sets the page viewport size. :param width: An integer that sets width pixel count. :param height: An integer that sets height pixel count. """ self.setViewportSize(QSize(width, height)) def wait_for(self, condition, timeout_message): """Waits until condition is True. :param condition: A callable that returns the condition. :param timeout_message: The exception message on timeout. """ started_at = time.time() while not condition(): if time.time() > (started_at + self.wait_timeout): raise Exception(timeout_message) time.sleep(0.01) self._app.processEvents() if self.wait_callback is not None: self.wait_callback() def wait_for_alert(self): """Waits for main frame alert(). """ self.wait_for(lambda: self._alert is not None, 'User has not been alerted.') msg = self._alert self._alert = None return msg def wait_for_page_loaded(self): """Waits until page is loaded, assumed that a page as been requested. """ self.wait_for(lambda: self.loaded and len(self._unsupported_files.keys()) == 0, 'Unable to load requested page') return self.get_loaded_page() def get_loaded_page(self): if self.loaded and len(self._unsupported_files.keys()) == 0: return self.http_resource_page return None def wait_for_selector(self, selector): """Waits until selector match an element on the frame. :param selector: The selector to wait for. """ self.wait_for(lambda: self.exists(selector), 'Can\'t find element matching "%s"' % selector) return True def wait_for_text(self, text): """Waits until given text appear on main frame. :param text: The text to wait for. """ self.wait_for(lambda: text in self.currentFrame().toPlainText(), 'Can\'t find "%s" in current frame' % text) return True def _page_load_progress(self, progress): pass def _page_loaded(self, ok): """Called back when page is loaded. """ # FIXME: Check why ok == False when we are trying to load # unsupported content self.loaded = True def _page_load_started(self): """Called back when page load started. """ self.loaded = False def _release_last_resources(self): """Releases last loaded resources. :return: The released resources. """ last_resources = self.http_resources self.http_resources = [] return last_resources def release_last_resources(self): return self._release_last_resources() def _insert_dom_ready_code(self): self.mainFrame().addToJavaScriptWindowObject("GhostInit", self.ghostInit); #self.page.mainFrame().addToJavaScriptWindowObject("ghost_frame", self.page.mainFrame()); self.evaluate_js_file(os.path.join(os.path.dirname(__file__), 'domready.js')) def _request_ended(self, reply): """Adds an HttpResource object to http_resources. :param reply: The QNetworkReply object. """ if reply.url() == self.currentFrame().url(): Logger.log("Injecting DOMReady code") self._insert_dom_ready_code() content = None if unicode(reply.url()) in self._unsupported_files: del self._unsupported_files[unicode(reply.url())] content = reply.readAll() if reply.attribute(QNetworkRequest.HttpStatusCodeAttribute): cache = self.network_manager.cache() http_resource = HttpResource(reply, cache, content) if self.http_resource_page is None: self.http_resource_page = http_resource if self.max_resource_queued is None or self.max_resource_queued > 0: self.http_resources.append(http_resource) if self.max_resource_queued is not None and \ len(self.http_resources) > self.max_resource_queued: self.http_resources.pop(0) #self._del_resources() def _unsupported_content(self, reply): """Adds an HttpResource object to http_resources with unsupported content. :param reply: The QNetworkReply object. """ self._unsupported_files[unicode(reply.url())] = reply def switch_to_frame(self, frameName=None): """Change the focus to the indicated frame :param frameName: The name of the frame """ if frameName is None: self.main_frame.setFocus() return True for frame in self.currentFrame().childFrames(): if frame.frameName() == frameName: frame.setFocus() return True return False def switch_to_frame_nro(self, nro=-1): """Change the focus to the indicated frame :param nro: Number of the frame """ if nro == -1: self.main_frame.setFocus() frames = self.currentFrame().childFrames() if len(frames) <= (nro + 1): frames[nro].setFocus() return nro is None or len(frames) < nro @property def cookies(self): """Returns all cookies.""" return self.network_manager.cookieJar().allCookies() def delete_cookies(self): """Deletes all cookies.""" self.network_manager.cookieJar().setAllCookies([]) def delete_cache(self): self.network_manager.cache().clear() def load_cookies( self, cookie_storage, keep_old=False ): """load from cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string on disk or CookieJar instance. :param keep_old: Don't reset, keep cookies not overriden. """ def toQtCookieJar(pyCookieJar, qtCookieJar): all_cookies = qtCookieJar.cookies if keep_old else [] for pc in pyCookieJar: qc = toQtCookie(pc) all_cookies.append(qc) qtCookieJar.setAllCookies(all_cookies) def toQtCookie(pyCookie): qc = QNetworkCookie(pyCookie.name, pyCookie.value) qc.setSecure(pyCookie.secure) if pyCookie.path_specified: qc.setPath(pyCookie.path) if pyCookie.domain != "" : qc.setDomain(pyCookie.domain) if pyCookie.expires != 0: t = QDateTime() t.setTime_t(pyCookie.expires) qc.setExpirationDate(t) # not yet handled(maybe less useful): # py cookie.rest / QNetworkCookie.setHttpOnly() return qc if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) cj.load() toQtCookieJar(cj, self.network_manager.cookieJar()) elif cookie_storage.__class__.__name__.endswith('CookieJar') : toQtCookieJar(cookie_storage, self.network_manager.cookieJar()) else: raise ValueError, 'unsupported cookie_storage type.' def save_cookies(self, cookie_storage): """Save to cookielib's CookieJar or Set-Cookie3 format text file. :param cookie_storage: file location string or CookieJar instance. """ def toPyCookieJar(qtCookieJar, pyCookieJar): for c in qtCookieJar.allCookies(): pyCookieJar.set_cookie(toPyCookie(c)) def toPyCookie(qtCookie): port = None port_specified = False secure = qtCookie.isSecure() name = str(qtCookie.name()) value = str(qtCookie.value()) v = str(qtCookie.path()) path_specified = bool( v != "" ) path = v if path_specified else None v = str(qtCookie.domain()) domain_specified = bool( v != "" ) domain = v domain_initial_dot = v.startswith('.') if domain_specified else None v = long(qtCookie.expirationDate().toTime_t()) # Long type boundary on 32bit platfroms; avoid ValueError expires = 2147483647 if v > 2147483647 else v rest = {} discard = False return Cookie(0, name, value, port, port_specified, domain, domain_specified, domain_initial_dot, path, path_specified, secure, expires, discard, None, None, rest) if cookie_storage.__class__.__name__ == 'str': cj = LWPCookieJar(cookie_storage) toPyCookieJar(self.network_manager.cookieJar(), cj) cj.save() elif cookie_storage.__class__.__name__.endswith('CookieJar') : toPyCookieJar(self.network_manager.cookieJar(), cookie_storage) else: raise ValueError, 'unsupported cookie_storage type.'
def main(): arg_parser = argparse.ArgumentParser( description= 'Create a pdf document from a exported csv of Pivotal Tracker', formatter_class=argparse.ArgumentDefaultsHelpFormatter) arg_parser.add_argument('csv', help='the file path to the csv file') arg_parser.add_argument('-m', '--margin', type=int, default=5, help='margin of the page in mm') arg_parser.add_argument('-o', '--output', help='file path to the generated pdf') arg_parser.add_argument('-n', '--show-number', action='store_true', help='shows the story number on the bottom left') arg_parser.add_argument('-t', '--show-tasks', action='store_true', help='shows the tasks for each story') arg_parser.add_argument( '-c', '--collate', action='store_true', help='collate stories for easier sorting after cutting all pages') arg_parser.add_argument( '-s', '--strict', action='store_true', help='fails if the csv file does not contain all required columns') args = arg_parser.parse_args() output_file = args.output if args.output is not None \ else os.path.splitext(args.csv)[0] + '.pdf' page_margin = args.margin story_width = (297 - (page_margin * 2)) / 2 story_height = (210 - (page_margin * 2)) / 2 stories = [] with open(args.csv, 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') data = list(reader) if args.strict: validate_columns(data[0]) stories = map(partial(make_pivotal_story, data[0]), enumerate(data[1:], 1)) pdf = Pdf() pdf.set_auto_page_break(False) positions = [(page_margin, page_margin), (page_margin + story_width, page_margin), (page_margin, page_margin + story_height), (page_margin + story_width, page_margin + story_height)] stories = list(iterstories(stories, include_tasks=args.show_tasks)) chunk_function = stacked_chunks if args.collate else chunks for story_chunk in chunk_function(stories, 4): pdf.add_page('Landscape') for story, position in zip(story_chunk, positions): story.draw( pdf, position[0], position[1], story_width, story_height, args.show_number, ) pdf.output(output_file) open_file(output_file) return output_file