def createRequest(self, op, req, outgoingData): if op == QNetworkAccessManager.GetOperation: qDebug('HTTP/1.1 GET Request') elif op == QNetworkAccessManager.PostOperation: qDebug('HTTP/1.1 POST Request') elif op == QNetworkAccessManager.HeadOperation: qDebug('HTTP/1.1 HEAD Request') elif op == QNetworkAccessManager.PutOperation: qDebug('HTTP/1.1 PUT Request') elif op == QNetworkAccessManager.DeleteOperation: qDebug('HTTP/1.1 DELETE Request') elif op == QNetworkAccessManager.CustomOperation: qDebug('HTTP/1.1 CUSTOM Request') else: qWarning('Unexpected HTTP Operation Type') qDebug('URL %s' % req.url().toString()) do_action('NetworkAccessManagerCreateRequestPre', Bunch(locals())) reply = QNetworkAccessManager.createRequest(self, op, req, outgoingData) if self.m_ignoreSslErrors == 'yes': reply.ignoreSslErrors() do_action('NetworkAccessManagerCreateRequestPost', Bunch(locals())) return reply
def createRequest(self, op, req, outgoingData): do_action('NetworkAccessManagerCreateRequestPre', Bunch(locals())) reply = QNetworkAccessManager.createRequest(self, op, req, outgoingData) if self.m_ignoreSslErrors: reply.ignoreSslErrors() headers = [] for header in req.rawHeaderList(): header = {'name': str(header), 'value': str(req.rawHeader(header))} headers.append(header) self.m_idCounter += 1 self.m_ids[reply] = self.m_idCounter data = { 'id': self.m_idCounter, 'url': req.url().toString(), 'method': toString(op), 'headers': headers, 'time': QDateTime.currentDateTime() } reply.readyRead.connect(self.handleStarted) do_action('NetworkAccessManagerCreateRequestPost', Bunch(locals())) self.resourceRequested.emit(data) return reply
def __init__(self, args, parent=None): QObject.__init__(self, parent) # variable declarations self.m_defaultPageSettings = {} self.m_verbose = args.verbose self.m_page = WebPage(self) self.m_returnValue = 0 self.m_terminated = False # setup the values from args self.m_scriptFile = args.script self.m_args = args.script_args do_action('PhantomInitPre', Bunch(locals())) if not args.proxy: QNetworkProxyFactory.setUseSystemConfiguration(True) else: proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1])) QNetworkProxy.setApplicationProxy(proxy) # Provide WebPage with a non-standard Network Access Manager self.m_netAccessMan = NetworkAccessManager(args.disk_cache, args.ignore_ssl_errors, self) self.m_page.setNetworkAccessManager(self.m_netAccessMan) self.m_page.javaScriptConsoleMessageSent.connect( self.printConsoleMessage) self.m_defaultPageSettings['loadImages'] = args.load_images self.m_defaultPageSettings['loadPlugins'] = args.load_plugins self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent() self.m_defaultPageSettings[ 'localAccessRemote'] = args.local_access_remote self.m_page.applySettings(self.m_defaultPageSettings) self.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile)) # inject our properties and slots into javascript self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self) bootstrap = QFile(':/bootstrap.js') if not bootstrap.open(QFile.ReadOnly): sys.exit('Can not bootstrap!') bootstrapper = str(bootstrap.readAll()) bootstrap.close() if not bootstrapper: sys.exit('Can not bootstrap!') self.m_page.mainFrame().evaluateJavaScript(bootstrapper) do_action('PhantomInitPost', Bunch(locals()))
class CustomPage(QWebPage): def __init__(self, parent=None): QWebPage.__init__(self, parent) self.parent = parent self.m_userAgent = QWebPage.userAgentForUrl(self, QUrl()) self.m_uploadFile = '' do_action('CustomPageInit', Bunch(locals())) def chooseFile(self, originatingFrame, oldFile): return self.m_uploadFile def shouldInterruptJavaScript(self): QApplication.processEvents(QEventLoop.AllEvents, 42) return False def javaScriptAlert(self, originatingFrame, msg): self.parent.javaScriptAlertSent.emit(msg) def javaScriptConsoleMessage(self, message, lineNumber, sourceID): self.parent.javaScriptConsoleMessageSent.emit(message, lineNumber, sourceID) def userAgentForUrl(self, url): return self.m_userAgent do_action('CustomPage', Bunch(locals()))
def parseArgs(args): # Handle all command-line options p = argParser() arg_data = p.parse_known_args(args) args = arg_data[0] args.script_args = arg_data[1] args.disk_cache = False if args.disk_cache == 'no' else True args.ignore_ssl_errors = False if args.ignore_ssl_errors == 'no' else True args.load_images = True if args.load_images == 'yes' else False args.load_plugins = False if args.load_plugins == 'no' else True args.local_access_remote = False if args.local_access_remote == 'no' else True if args.proxy: item = args.proxy.split(':') if len(item) < 2 or not len(item[1]): p.print_help() sys.exit(1) args.proxy = item do_action('ParseArgs', Bunch(locals())) if not args.script: p.print_help() sys.exit(1) if not os.path.exists(args.script): sys.exit('No such file or directory: \'%s\'' % args.script) return args
class NetworkAccessManager(QNetworkAccessManager): def __init__(self, diskCacheEnabled, ignoreSslErrors, parent=None): QNetworkAccessManager.__init__(self, parent) self.m_ignoreSslErrors = ignoreSslErrors if parent.m_verbose: self.finished.connect(self.handleFinished) if diskCacheEnabled == 'yes': m_networkDiskCache = QNetworkDiskCache() m_networkDiskCache.setCacheDirectory(QDesktopServices.storageLocation(QDesktopServices.CacheLocation)) self.setCache(m_networkDiskCache) do_action('NetworkAccessManagerInit', Bunch(locals())) def createRequest(self, op, req, outgoingData): if op == QNetworkAccessManager.GetOperation: qDebug('HTTP/1.1 GET Request') elif op == QNetworkAccessManager.PostOperation: qDebug('HTTP/1.1 POST Request') elif op == QNetworkAccessManager.HeadOperation: qDebug('HTTP/1.1 HEAD Request') elif op == QNetworkAccessManager.PutOperation: qDebug('HTTP/1.1 PUT Request') elif op == QNetworkAccessManager.DeleteOperation: qDebug('HTTP/1.1 DELETE Request') elif op == QNetworkAccessManager.CustomOperation: qDebug('HTTP/1.1 CUSTOM Request') else: qWarning('Unexpected HTTP Operation Type') qDebug('URL %s' % req.url().toString()) do_action('NetworkAccessManagerCreateRequestPre', Bunch(locals())) reply = QNetworkAccessManager.createRequest(self, op, req, outgoingData) if self.m_ignoreSslErrors == 'yes': reply.ignoreSslErrors() do_action('NetworkAccessManagerCreateRequestPost', Bunch(locals())) return reply def handleFinished(self, reply): qDebug('HTTP/1.1 Response') qDebug('URL %s' % reply.url().toString()) code = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute) if code: qDebug('Status code: %d' % code) do_action('NetworkAccessManagerHandleFinished', Bunch(locals())) headerPairs = reply.rawHeaderPairs() for pair in headerPairs: qDebug('"%s" = "%s"' % (pair[0], pair[1])) do_action('NetworkAccessManager', Bunch(locals()))
def __init__(self, parent=None): QWebPage.__init__(self, parent) self.parent = parent self.m_userAgent = QWebPage.userAgentForUrl(self, QUrl()) self.m_uploadFile = '' do_action('CustomPageInit', Bunch(locals()))
def handleFinished(self, reply): qDebug('HTTP/1.1 Response') qDebug('URL %s' % reply.url().toString()) code = reply.attribute(QNetworkRequest.HttpStatusCodeAttribute) if code: qDebug('Status code: %d' % code) do_action('NetworkAccessManagerHandleFinished', Bunch(locals())) headerPairs = reply.rawHeaderPairs() for pair in headerPairs: qDebug('"%s" = "%s"' % (pair[0], pair[1]))
def __init__(self, parent=None): QWebPage.__init__(self, parent) self.parent = parent self.m_nextFileTag = '' self.m_userAgent = QWebPage.userAgentForUrl(self, QUrl()) if self.parent.m_verbose: self.currentFrame().urlChanged.connect(self.handleFrameUrlChanged) self.linkClicked.connect(self.handleLinkClicked) do_action('WebPageInit', Bunch(locals()))
def argParser(): parser = argparse.ArgumentParser( description='Minimalistic headless WebKit-based JavaScript-driven tool', usage= '%(prog)s [options] script.[js|coffee] [script argument [script argument ...]]', formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( 'script', metavar='script.[js|coffee]', nargs='?', help='The script to execute, and any args to pass to it') parser.add_argument('--disk-cache', default='no', choices=['yes', 'no'], help='Enable disk cache (default: %(default)s)') parser.add_argument('--ignore-ssl-errors', default='no', choices=['yes', 'no'], help='Ignore SSL errors (default: %(default)s)') parser.add_argument('--load-images', default='yes', choices=['yes', 'no'], help='Load all inlined images (default: %(default)s)') parser.add_argument( '--load-plugins', default='no', choices=['yes', 'no'], help= 'Load all plugins (i.e. Flash, Silverlight, ...) (default: %(default)s)' ) parser.add_argument( '--local-access-remote', default='no', choices=['yes', 'no'], help='Local content can access remote URL (default: %(default)s)') parser.add_argument('--proxy', metavar='address:port', help='Set the network proxy') parser.add_argument('-v', '--verbose', action='store_true', help='Show verbose debug messages') parser.add_argument('--version', action='version', version=license, help='show this program\'s version and license') do_action('ArgParser', Bunch(locals())) return parser
def __init__(self, diskCacheEnabled, ignoreSslErrors, parent=None): QNetworkAccessManager.__init__(self, parent) self.m_ignoreSslErrors = ignoreSslErrors if parent.m_verbose: self.finished.connect(self.handleFinished) if diskCacheEnabled == 'yes': m_networkDiskCache = QNetworkDiskCache() m_networkDiskCache.setCacheDirectory(QDesktopServices.storageLocation(QDesktopServices.CacheLocation)) self.setCache(m_networkDiskCache) do_action('NetworkAccessManagerInit', Bunch(locals()))
def __init__(self, parent=None): QObject.__init__(self, parent) # variable declarations self.m_paperSize = {} self.m_clipRect = QRect() self.m_libraryPath = '' self.setObjectName('WebPage') self.m_webPage = CustomPage(self) self.m_mainFrame = self.m_webPage.mainFrame() self.m_webPage.loadStarted.connect(self.loadStarted) self.m_webPage.loadFinished.connect(self.finish) # Start with transparent background palette = self.m_webPage.palette() palette.setBrush(QPalette.Base, Qt.transparent) self.m_webPage.setPalette(palette) # Page size does not need to take scrollbars into account self.m_webPage.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) self.m_webPage.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) self.m_webPage.settings().setAttribute( QWebSettings.OfflineStorageDatabaseEnabled, True) self.m_webPage.settings().setOfflineStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_webPage.settings().setAttribute( QWebSettings.LocalStorageDatabaseEnabled, True) self.m_webPage.settings().setAttribute( QWebSettings.OfflineWebApplicationCacheEnabled, True) self.m_webPage.settings().setOfflineWebApplicationCachePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_webPage.settings().setAttribute( QWebSettings.FrameFlatteningEnabled, True) self.m_webPage.settings().setAttribute( QWebSettings.LocalStorageEnabled, True) self.m_webPage.settings().setLocalStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) # Ensure we have a document.body. self.m_webPage.mainFrame().setHtml('<html><body></body></html>') self.m_webPage.setViewportSize(QSize(400, 300)) do_action('WebPageInit', Bunch(locals()))
def handleStarted(self): reply = self.sender() if not reply: return if reply in self.m_started: return self.m_started.append(reply) headers = [] for header in reply.rawHeaderList(): header = { 'name': str(header), 'value': str(reply.rawHeader(header)) } headers.append(header) data = { 'stage': 'start', 'id': self.m_ids[reply], 'url': reply.url().toString(), 'status': reply.attribute(QNetworkRequest.HttpStatusCodeAttribute), 'statusText': reply.attribute(QNetworkRequest.HttpReasonPhraseAttribute), 'contentType': reply.header(QNetworkRequest.ContentTypeHeader), 'bodySize': reply.size(), 'redirectURL': reply.header(QNetworkRequest.LocationHeader), 'headers': headers, 'time': QDateTime.currentDateTime() } do_action('NetworkAccessManagerHandleStarted', Bunch(locals())) self.resourceReceived.emit(data)
class WebPage(QWebPage): def __init__(self, parent=None): QWebPage.__init__(self, parent) self.parent = parent self.m_nextFileTag = '' self.m_userAgent = QWebPage.userAgentForUrl(self, QUrl()) if self.parent.m_verbose: self.currentFrame().urlChanged.connect(self.handleFrameUrlChanged) self.linkClicked.connect(self.handleLinkClicked) do_action('WebPageInit', Bunch(locals())) def handleFrameUrlChanged(self, url): qDebug('URL Changed: %s' % url.toString()) def handleLinkClicked(self, url): qDebug('URL Clicked: %s' % url.toString()) def javaScriptAlert(self, webframe, msg): print 'JavaScript alert: %s' % msg def javaScriptConsoleMessage(self, message, lineNumber, sourceID): if sourceID: print '%s:%d %s' % (sourceID, lineNumber, message) else: print message def shouldInterruptJavaScript(self): QApplication.processEvents(QEventLoop.AllEvents, 42) return False def userAgentForUrl(self, url): return self.m_userAgent def chooseFile(self, webframe, suggestedFile): if self.m_nextFileTag in self.parent.m_upload_file: return self.parent.m_upload_file[self.m_nextFileTag] return '' do_action('WebPage', Bunch(locals()))
def main(): args = parseArgs(sys.argv[1:]) # register an alternative Message Handler messageHandler = MessageHandler(args.verbose) qInstallMsgHandler(messageHandler.process) app = QApplication(sys.argv) app.setWindowIcon(QIcon(':/resources/pyphantomjs-icon.png')) app.setApplicationName('PyPhantomJS') app.setOrganizationName('Umaclan Development') app.setOrganizationDomain('www.umaclan.com') app.setApplicationVersion(version) phantom = Phantom(args, app) do_action('Main', Bunch(locals())) phantom.execute() app.exec_() sys.exit(phantom.returnValue())
def __init__(self, args, parent=None): QObject.__init__(self, parent) # variable declarations self.m_loadStatus = self.m_state = '' self.m_var = self.m_paperSize = self.m_loadScript_cache = {} self.m_verbose = args.verbose self.m_page = WebPage(self) self.m_clipRect = QRect() # setup the values from args self.m_script = args.script.read() self.m_scriptFile = args.script.name self.m_scriptDir = os.path.dirname(args.script.name) + '/' self.m_args = args.script_args self.m_upload_file = args.upload_file autoLoadImages = False if args.load_images == 'no' else True pluginsEnabled = True if args.load_plugins == 'yes' else False args.script.close() do_action('PhantomInitPre', Bunch(locals())) palette = self.m_page.palette() palette.setBrush(QPalette.Base, Qt.transparent) self.m_page.setPalette(palette) if not args.proxy: QNetworkProxyFactory.setUseSystemConfiguration(True) else: proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1])) QNetworkProxy.setApplicationProxy(proxy) self.m_page.settings().setAttribute(QWebSettings.AutoLoadImages, autoLoadImages) self.m_page.settings().setAttribute(QWebSettings.PluginsEnabled, pluginsEnabled) self.m_page.settings().setAttribute( QWebSettings.FrameFlatteningEnabled, True) self.m_page.settings().setAttribute( QWebSettings.OfflineStorageDatabaseEnabled, True) self.m_page.settings().setAttribute(QWebSettings.LocalStorageEnabled, True) self.m_page.settings().setLocalStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_page.settings().setOfflineStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) # Ensure we have a document.body. self.m_page.mainFrame().setHtml('<html><body></body></html>') self.m_page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) self.m_page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) m_netAccessMan = NetworkAccessManager(args.disk_cache, args.ignore_ssl_errors, self) self.m_page.setNetworkAccessManager(m_netAccessMan) # inject our properties and slots into javascript self.m_page.mainFrame().javaScriptWindowObjectCleared.connect( self.inject) self.m_page.loadFinished.connect(self.finish) do_action('PhantomInitPost', Bunch(locals()))
class Phantom(QObject): def __init__(self, args, parent=None): QObject.__init__(self, parent) # variable declarations self.m_loadStatus = self.m_state = '' self.m_var = self.m_paperSize = self.m_loadScript_cache = {} self.m_verbose = args.verbose self.m_page = WebPage(self) self.m_clipRect = QRect() # setup the values from args self.m_script = args.script.read() self.m_scriptFile = args.script.name self.m_scriptDir = os.path.dirname(args.script.name) + '/' self.m_args = args.script_args self.m_upload_file = args.upload_file autoLoadImages = False if args.load_images == 'no' else True pluginsEnabled = True if args.load_plugins == 'yes' else False args.script.close() do_action('PhantomInitPre', Bunch(locals())) palette = self.m_page.palette() palette.setBrush(QPalette.Base, Qt.transparent) self.m_page.setPalette(palette) if not args.proxy: QNetworkProxyFactory.setUseSystemConfiguration(True) else: proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1])) QNetworkProxy.setApplicationProxy(proxy) self.m_page.settings().setAttribute(QWebSettings.AutoLoadImages, autoLoadImages) self.m_page.settings().setAttribute(QWebSettings.PluginsEnabled, pluginsEnabled) self.m_page.settings().setAttribute( QWebSettings.FrameFlatteningEnabled, True) self.m_page.settings().setAttribute( QWebSettings.OfflineStorageDatabaseEnabled, True) self.m_page.settings().setAttribute(QWebSettings.LocalStorageEnabled, True) self.m_page.settings().setLocalStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_page.settings().setOfflineStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) # Ensure we have a document.body. self.m_page.mainFrame().setHtml('<html><body></body></html>') self.m_page.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) self.m_page.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) m_netAccessMan = NetworkAccessManager(args.disk_cache, args.ignore_ssl_errors, self) self.m_page.setNetworkAccessManager(m_netAccessMan) # inject our properties and slots into javascript self.m_page.mainFrame().javaScriptWindowObjectCleared.connect( self.inject) self.m_page.loadFinished.connect(self.finish) do_action('PhantomInitPost', Bunch(locals())) def execute(self): if self.m_script.startswith('#!'): self.m_script = '//' + self.m_script if self.m_scriptFile.lower().endswith('.coffee'): coffee = CSConverter(self) self.m_script = coffee.convert(self.m_script) self.m_page.mainFrame().evaluateJavaScript(self.m_script) def finish(self, success): self.m_loadStatus = 'success' if success else 'fail' self.m_page.mainFrame().evaluateJavaScript(self.m_script) def inject(self): self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self) def renderPdf(self, fileName): p = QPrinter() p.setOutputFormat(QPrinter.PdfFormat) p.setOutputFileName(fileName) p.setResolution(pdf_dpi) paperSize = self.m_paperSize if not len(paperSize): pageSize = QSize(self.m_page.mainFrame().contentsSize()) paperSize['width'] = str(pageSize.width()) + 'px' paperSize['height'] = str(pageSize.height()) + 'px' paperSize['border'] = '0px' if paperSize.get('width') and paperSize.get('height'): sizePt = QSizeF(ceil(self.stringToPointSize(paperSize['width'])), ceil(self.stringToPointSize(paperSize['height']))) p.setPaperSize(sizePt, QPrinter.Point) elif 'format' in paperSize: orientation = QPrinter.Landscape if paperSize.get( 'orientation') and paperSize['orientation'].lower( ) == 'landscape' else QPrinter.Portrait orientation = QPrinter.Orientation(orientation) p.setOrientation(orientation) formats = { 'A3': QPrinter.A3, 'A4': QPrinter.A4, 'A5': QPrinter.A5, 'Legal': QPrinter.Legal, 'Letter': QPrinter.Letter, 'Tabloid': QPrinter.Tabloid } p.setPaperSize(QPrinter.A4) # fallback for format, size in formats.items(): if format.lower() == paperSize['format'].lower(): p.setPaperSize(size) break else: return False border = floor(self.stringToPointSize( paperSize['border'])) if paperSize.get('border') else 0 p.setPageMargins(border, border, border, border, QPrinter.Point) self.m_page.mainFrame().print_(p) return True def returnValue(self): return self.m_returnValue def stringToPointSize(self, string): units = (('mm', 72 / 25.4), ('cm', 72 / 2.54), ('in', 72.0), ('px', 72.0 / pdf_dpi / 2.54), ('', 72.0 / pdf_dpi / 2.54)) for unit, format in units: if string.endswith(unit): value = string.rstrip(unit) return float(value) * format return 0 ## # Properties and methods exposed to JavaScript ## @pyqtProperty('QStringList') def args(self): return self.m_args @pyqtProperty('QVariantMap') def clipRect(self): result = { 'width': self.m_clipRect.width(), 'height': self.m_clipRect.height(), 'top': self.m_clipRect.top(), 'left': self.m_clipRect.left() } return result @clipRect.setter def clipRect(self, size): names = ('width', 'height', 'top', 'left') for item in names: try: globals()[item] = int(size[item]) if globals()[item] < 0: if item not in ('top', 'left'): globals()[item] = 0 except KeyError: globals()[item] = getattr(self.m_clipRect, item)() self.m_clipRect = QRect(left, top, width, height) @pyqtProperty(str) def content(self): return self.m_page.mainFrame().toHtml() @content.setter def content(self, content): self.m_page.mainFrame().setHtml(content) @pyqtSlot() @pyqtSlot(int) def exit(self, code=0): self.m_returnValue = code self.m_page.loadFinished.disconnect(self.finish) QTimer.singleShot(0, qApp, SLOT('quit()')) @pyqtProperty(str) def loadStatus(self): return self.m_loadStatus @pyqtSlot(str, result=bool) def loadScript(self, script): if script in self.m_loadScript_cache: self.m_page.mainFrame().evaluateJavaScript( self.m_loadScript_cache[script]) return True scriptFile = script try: script = codecs.open(self.m_scriptDir + script, encoding='utf-8') script = script.read() except IOError: return False if script.startswith('#!'): script = '//' + script if scriptFile.lower().endswith('.coffee'): coffee = CSConverter(self) script = coffee.convert(script) self.m_loadScript_cache[scriptFile] = script self.m_page.mainFrame().evaluateJavaScript(script) return True @pyqtSlot(str, name='open') def open_(self, address): qDebug('Opening address %s' % address) self.m_page.triggerAction(QWebPage.Stop) self.m_loadStatus = 'loading' self.m_page.mainFrame().setUrl(QUrl(address)) @pyqtProperty('QVariantMap') def paperSize(self): return self.m_paperSize @paperSize.setter def paperSize(self, size): self.m_paperSize = size @pyqtSlot(str, result=bool) def render(self, fileName): fileInfo = QFileInfo(fileName) path = QDir() path.mkpath(fileInfo.absolutePath()) if fileName.lower().endswith('.pdf'): return self.renderPdf(fileName) viewportSize = QSize(self.m_page.viewportSize()) pageSize = QSize(self.m_page.mainFrame().contentsSize()) bufferSize = QSize() if not self.m_clipRect.isEmpty(): bufferSize = self.m_clipRect.size() else: bufferSize = self.m_page.mainFrame().contentsSize() if pageSize == '': return False image = QImage(bufferSize, QImage.Format_ARGB32) image.fill(qRgba(255, 255, 255, 0)) p = QPainter(image) p.setRenderHint(QPainter.Antialiasing, True) p.setRenderHint(QPainter.TextAntialiasing, True) p.setRenderHint(QPainter.SmoothPixmapTransform, True) self.m_page.setViewportSize(pageSize) if not self.m_clipRect.isEmpty(): p.translate(-self.m_clipRect.left(), -self.m_clipRect.top()) self.m_page.mainFrame().render(p, QRegion(self.m_clipRect)) else: self.m_page.mainFrame().render(p) p.end() self.m_page.setViewportSize(viewportSize) return image.save(fileName) @pyqtSlot('QWebElement', str) def setFormInputFile(self, el, fileTag): self.m_page.m_nextFileTag = fileTag el.evaluateJavaScript('''(function(target){ var evt = document.createEvent('MouseEvents'); evt.initMouseEvent("click", true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null); target.dispatchEvent(evt);})(this);''') @pyqtSlot(int) def sleep(self, ms): startTime = QTime.currentTime() while True: QApplication.processEvents(QEventLoop.AllEvents, 25) if startTime.msecsTo(QTime.currentTime()) > ms: break usleep(0.005) @pyqtProperty(str) def state(self): return self.m_state @state.setter def state(self, value): self.m_state = value @pyqtProperty(str) def userAgent(self): return self.m_page.m_userAgent @userAgent.setter def userAgent(self, ua): self.m_page.m_userAgent = ua @pyqtSlot(str, result='QVariant') @pyqtSlot(int, result='QVariant') @pyqtSlot(str, 'QVariant') @pyqtSlot(int, 'QVariant') def ctx(self, name, value=None): if not value: return self.m_var.get(name) self.m_var[name] = value @pyqtProperty('QVariantMap') def version(self): version = { 'major': version_major, 'minor': version_minor, 'patch': version_patch } return version @pyqtProperty('QVariantMap') def viewportSize(self): size = self.m_page.viewportSize() result = {'width': size.width(), 'height': size.height()} return result @viewportSize.setter def viewportSize(self, size): names = ('width', 'height') for item in names: try: globals()[item] = int(size[item]) if globals()[item] < 0: globals()[item] = 0 except KeyError: globals()[item] = getattr(self.m_page.viewportSize(), item)() self.m_page.setViewportSize(QSize(width, height)) do_action('Phantom', Bunch(locals()))
def parseArgs(args): # Handle all command-line options p = argParser() arg_data = p.parse_known_args(args) args = arg_data[0] args.script_args = arg_data[1] if args.upload_file: # process the tags item_buffer = {} for i in range(len(args.upload_file)): item = args.upload_file[i].split('=') if len(item) < 2 or not len(item[1]): # if buffer is empty, or tag has no # value 'tag=', print help and exit if not len(item_buffer) or \ item[1:] and not item[1:][0]: p.print_help() sys.exit(1) # this is a bug workaround for argparse. # if you call parse_known_args, and you # have an --option script arg, the args # get jumbled up, and it's inconsistent # # we're just going to check for -- and # swap it all back to the right order if args.script_args: for i in range(len(args.upload_file)): if not args.upload_file[i].count('='): # insert the arg after --option (make sure it's not None) if args.script: args.script_args.insert(1, args.script) # insert value args before --option if args.upload_file[i + 1:]: arg_buffer = args.upload_file[i + 1:] arg_buffer.reverse() for val in arg_buffer: args.script_args.insert(0, val) args.script = args.upload_file[i] break else: args.script = args.upload_file[i] args.script_args = args.upload_file[i + 1:] break # duplicate tag checking if item[0] in item_buffer: sys.exit('Multiple tags named \'%s\' were found' % item[0]) item_buffer[item[0]] = item[1] # make sure files exist for tag in item_buffer: if not os.path.exists(item_buffer[tag]): sys.exit('No such file or directory: \'%s\'' % item_buffer[tag]) args.upload_file = item_buffer if args.proxy: item = args.proxy.split(':') if len(item) < 2 or not len(item[1]): p.print_help() sys.exit(1) args.proxy = item do_action('ParseArgs', Bunch(locals())) if not args.script: p.print_help() sys.exit(1) try: args.script = codecs.open(args.script, encoding='utf-8') except IOError as (errno, stderr): sys.exit('%s: \'%s\'' % (stderr, args.script))
def main(): args = parseArgs(sys.argv[1:]) # register an alternative Message Handler messageHandler = MessageHandler(args.verbose) qInstallMsgHandler(messageHandler.process) app = QApplication(sys.argv) app.setWindowIcon(QIcon(':/resources/pyphantomjs-icon.png')) app.setApplicationName('PyPhantomJS') app.setOrganizationName('Umaclan Development') app.setOrganizationDomain('www.umaclan.com') app.setApplicationVersion(version) phantom = Phantom(args, app) do_action('Main', Bunch(locals())) phantom.execute() app.exec_() sys.exit(phantom.returnValue()) do_action('PyPhantomJS', Bunch(locals())) if __name__ == '__main__': main()
class Phantom(QObject): def __init__(self, args, parent=None): QObject.__init__(self, parent) # variable declarations self.m_defaultPageSettings = {} self.m_verbose = args.verbose self.m_page = WebPage(self) self.m_returnValue = 0 self.m_terminated = False # setup the values from args self.m_scriptFile = args.script self.m_args = args.script_args do_action('PhantomInitPre', Bunch(locals())) if not args.proxy: QNetworkProxyFactory.setUseSystemConfiguration(True) else: proxy = QNetworkProxy(QNetworkProxy.HttpProxy, args.proxy[0], int(args.proxy[1])) QNetworkProxy.setApplicationProxy(proxy) # Provide WebPage with a non-standard Network Access Manager self.m_netAccessMan = NetworkAccessManager(args.disk_cache, args.ignore_ssl_errors, self) self.m_page.setNetworkAccessManager(self.m_netAccessMan) self.m_page.javaScriptConsoleMessageSent.connect( self.printConsoleMessage) self.m_defaultPageSettings['loadImages'] = args.load_images self.m_defaultPageSettings['loadPlugins'] = args.load_plugins self.m_defaultPageSettings['userAgent'] = self.m_page.userAgent() self.m_defaultPageSettings[ 'localAccessRemote'] = args.local_access_remote self.m_page.applySettings(self.m_defaultPageSettings) self.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile)) # inject our properties and slots into javascript self.m_page.mainFrame().addToJavaScriptWindowObject('phantom', self) bootstrap = QFile(':/bootstrap.js') if not bootstrap.open(QFile.ReadOnly): sys.exit('Can not bootstrap!') bootstrapper = str(bootstrap.readAll()) bootstrap.close() if not bootstrapper: sys.exit('Can not bootstrap!') self.m_page.mainFrame().evaluateJavaScript(bootstrapper) do_action('PhantomInitPost', Bunch(locals())) def execute(self): injectJsInFrame(self.m_scriptFile, os.path.dirname(os.path.abspath(__file__)), self.m_page.mainFrame(), True) return not self.m_terminated def printConsoleMessage(self, message, lineNumber, source): if source: message = '%s:%d %s' % (source, lineNumber, message) print message def returnValue(self): return self.m_returnValue ## # Properties and methods exposed to JavaScript ## @pyqtProperty('QStringList') def args(self): return self.m_args @pyqtSlot(result=WebPage) def createWebPage(self): page = WebPage(self) page.applySettings(self.m_defaultPageSettings) page.setNetworkAccessManager(self.m_netAccessMan) page.libraryPath = os.path.dirname(os.path.abspath(self.m_scriptFile)) return page @pyqtProperty('QVariantMap') def defaultPageSettings(self): return self.m_defaultPageSettings @pyqtSlot() @pyqtSlot(int) def exit(self, code=0): self.m_terminated = True self.m_returnValue = code # stop javascript execution; delete C++ object first, # then delete the Python reference sip.delete(self.m_page) del self.m_page QApplication.instance().exit(code) @pyqtSlot(str, result=bool) def injectJs(self, filePath): return injectJsInFrame(filePath, self.libraryPath, self.m_page.mainFrame()) @pyqtProperty(str) def libraryPath(self): return self.m_page.libraryPath @libraryPath.setter def libraryPath(self, dirPath): self.m_page.libraryPath = dirPath @pyqtProperty(str) def scriptName(self): return os.path.basename(self.m_scriptFile) @pyqtProperty('QVariantMap') def version(self): version = { 'major': version_major, 'minor': version_minor, 'patch': version_patch } return version do_action('Phantom', Bunch(locals()))
class WebPage(QObject): javaScriptAlertSent = pyqtSignal(str) javaScriptConsoleMessageSent = pyqtSignal(str, int, str) loadStarted = pyqtSignal() loadFinished = pyqtSignal(str) resourceReceived = pyqtSignal('QVariantMap') resourceRequested = pyqtSignal('QVariantMap') def __init__(self, parent=None): QObject.__init__(self, parent) # variable declarations self.m_paperSize = {} self.m_clipRect = QRect() self.m_libraryPath = '' self.setObjectName('WebPage') self.m_webPage = CustomPage(self) self.m_mainFrame = self.m_webPage.mainFrame() self.m_webPage.loadStarted.connect(self.loadStarted) self.m_webPage.loadFinished.connect(self.finish) # Start with transparent background palette = self.m_webPage.palette() palette.setBrush(QPalette.Base, Qt.transparent) self.m_webPage.setPalette(palette) # Page size does not need to take scrollbars into account self.m_webPage.mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff) self.m_webPage.mainFrame().setScrollBarPolicy(Qt.Vertical, Qt.ScrollBarAlwaysOff) self.m_webPage.settings().setAttribute( QWebSettings.OfflineStorageDatabaseEnabled, True) self.m_webPage.settings().setOfflineStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_webPage.settings().setAttribute( QWebSettings.LocalStorageDatabaseEnabled, True) self.m_webPage.settings().setAttribute( QWebSettings.OfflineWebApplicationCacheEnabled, True) self.m_webPage.settings().setOfflineWebApplicationCachePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) self.m_webPage.settings().setAttribute( QWebSettings.FrameFlatteningEnabled, True) self.m_webPage.settings().setAttribute( QWebSettings.LocalStorageEnabled, True) self.m_webPage.settings().setLocalStoragePath( QDesktopServices.storageLocation(QDesktopServices.DataLocation)) # Ensure we have a document.body. self.m_webPage.mainFrame().setHtml('<html><body></body></html>') self.m_webPage.setViewportSize(QSize(400, 300)) do_action('WebPageInit', Bunch(locals())) def applySettings(self, defaults): opt = self.m_webPage.settings() opt.setAttribute(QWebSettings.AutoLoadImages, defaults['loadImages']) opt.setAttribute(QWebSettings.PluginsEnabled, defaults['loadPlugins']) opt.setAttribute(QWebSettings.LocalContentCanAccessRemoteUrls, defaults['localAccessRemote']) if 'userAgent' in defaults: self.m_webPage.m_userAgent = defaults['userAgent'] def finish(self, ok): status = 'success' if ok else 'fail' self.loadFinished.emit(status) def mainFrame(self): return self.m_mainFrame def renderImage(self): frameRect = QRect(QPoint(0, 0), self.m_mainFrame.contentsSize()) if not self.m_clipRect.isEmpty(): frameRect = self.m_clipRect viewportSize = self.m_webPage.viewportSize() self.m_webPage.setViewportSize(self.m_mainFrame.contentsSize()) image = QImage(frameRect.size(), QImage.Format_ARGB32) image.fill(qRgba(255, 255, 255, 0)) painter = QPainter() # We use tiling approach to work-around Qt software rasterizer bug # when dealing with very large paint device. # See http://code.google.com/p/phantomjs/issues/detail?id=54. tileSize = 4096 htiles = (image.width() + tileSize - 1) / tileSize vtiles = (image.height() + tileSize - 1) / tileSize for x in range(htiles): for y in range(vtiles): tileBuffer = QImage(tileSize, tileSize, QImage.Format_ARGB32) tileBuffer.fill(qRgba(255, 255, 255, 0)) # Render the web page onto the small tile first painter.begin(tileBuffer) painter.setRenderHint(QPainter.Antialiasing, True) painter.setRenderHint(QPainter.TextAntialiasing, True) painter.setRenderHint(QPainter.SmoothPixmapTransform, True) painter.translate(-frameRect.left(), -frameRect.top()) painter.translate(-x * tileSize, -y * tileSize) self.m_mainFrame.render(painter, QRegion(frameRect)) painter.end() # Copy the tile to the main buffer painter.begin(image) painter.setCompositionMode(QPainter.CompositionMode_Source) painter.drawImage(x * tileSize, y * tileSize, tileBuffer) painter.end() self.m_webPage.setViewportSize(viewportSize) return image def renderPdf(self, fileName): p = QPrinter() p.setOutputFormat(QPrinter.PdfFormat) p.setOutputFileName(fileName) p.setResolution(pdf_dpi) paperSize = self.m_paperSize if not len(paperSize): pageSize = QSize(self.m_webPage.mainFrame().contentsSize()) paperSize['width'] = str(pageSize.width()) + 'px' paperSize['height'] = str(pageSize.height()) + 'px' paperSize['border'] = '0px' if paperSize.get('width') and paperSize.get('height'): sizePt = QSizeF(ceil(self.stringToPointSize(paperSize['width'])), ceil(self.stringToPointSize(paperSize['height']))) p.setPaperSize(sizePt, QPrinter.Point) elif 'format' in paperSize: orientation = QPrinter.Landscape if paperSize.get( 'orientation') and paperSize['orientation'].lower( ) == 'landscape' else QPrinter.Portrait orientation = QPrinter.Orientation(orientation) p.setOrientation(orientation) formats = { 'A0': QPrinter.A0, 'A1': QPrinter.A1, 'A2': QPrinter.A2, 'A3': QPrinter.A3, 'A4': QPrinter.A4, 'A5': QPrinter.A5, 'A6': QPrinter.A6, 'A7': QPrinter.A7, 'A8': QPrinter.A8, 'A9': QPrinter.A9, 'B0': QPrinter.B0, 'B1': QPrinter.B1, 'B2': QPrinter.B2, 'B3': QPrinter.B3, 'B4': QPrinter.B4, 'B5': QPrinter.B5, 'B6': QPrinter.B6, 'B7': QPrinter.B7, 'B8': QPrinter.B8, 'B9': QPrinter.B9, 'B10': QPrinter.B10, 'C5E': QPrinter.C5E, 'Comm10E': QPrinter.Comm10E, 'DLE': QPrinter.DLE, 'Executive': QPrinter.Executive, 'Folio': QPrinter.Folio, 'Ledger': QPrinter.Ledger, 'Legal': QPrinter.Legal, 'Letter': QPrinter.Letter, 'Tabloid': QPrinter.Tabloid } p.setPaperSize(QPrinter.A4) # fallback for format, size in formats.items(): if format.lower() == paperSize['format'].lower(): p.setPaperSize(size) break else: return False border = floor(self.stringToPointSize( paperSize['border'])) if paperSize.get('border') else 0 p.setPageMargins(border, border, border, border, QPrinter.Point) self.m_webPage.mainFrame().print_(p) return True def setNetworkAccessManager(self, networkAccessManager): self.m_webPage.setNetworkAccessManager(networkAccessManager) networkAccessManager.resourceRequested.connect(self.resourceRequested) networkAccessManager.resourceReceived.connect(self.resourceReceived) def stringToPointSize(self, string): units = (('mm', 72 / 25.4), ('cm', 72 / 2.54), ('in', 72.0), ('px', 72.0 / pdf_dpi / 2.54), ('', 72.0 / pdf_dpi / 2.54)) for unit, format in units: if string.endswith(unit): value = string.rstrip(unit) return float(value) * format return 0 def userAgent(self): return self.m_webPage.m_userAgent ## # Properties and methods exposed to JavaScript ## @pyqtSlot(str) def _appendScriptElement(self, scriptUrl): self.m_mainFrame.evaluateJavaScript(''' var el = document.createElement('script'); el.onload = function() { alert('%(scriptUrl)s'); }; el.src = '%(scriptUrl)s'; document.body.appendChild(el); ''' % {'scriptUrl': scriptUrl}) @pyqtProperty('QVariantMap') def clipRect(self): result = { 'width': self.m_clipRect.width(), 'height': self.m_clipRect.height(), 'top': self.m_clipRect.top(), 'left': self.m_clipRect.left() } return result @clipRect.setter def clipRect(self, size): names = ('width', 'height', 'top', 'left') for item in names: try: globals()[item] = int(size[item]) if globals()[item] < 0: if item not in ('top', 'left'): globals()[item] = 0 except KeyError: globals()[item] = getattr(self.m_clipRect, item)() self.m_clipRect = QRect(left, top, width, height) @pyqtProperty(str) def content(self): return self.m_mainFrame.toHtml() @content.setter def content(self, content): self.m_mainFrame.setHtml(content) @pyqtSlot(str, result='QVariant') def evaluate(self, code): function = '(%s)()' % code return self.m_mainFrame.evaluateJavaScript(function) @pyqtSlot(str, result=bool) def injectJs(self, filePath): return injectJsInFrame(filePath, self.m_libraryPath, self.m_mainFrame) @pyqtSlot(str, str, 'QVariantMap') @pyqtSlot(str, 'QVariantMap', 'QVariantMap') def openUrl(self, address, op, settings): operation = op body = QByteArray() self.applySettings(settings) self.m_webPage.triggerAction(QWebPage.Stop) if type(op) is dict: operation = op.get('operation') body = QByteArray(op.get('data', '')) if operation == '': operation = 'get' networkOp = QNetworkAccessManager.CustomOperation operation = operation.lower() if operation == 'get': networkOp = QNetworkAccessManager.GetOperation elif operation == 'head': networkOp = QNetworkAccessManager.HeadOperation elif operation == 'put': networkOp = QNetworkAccessManager.PutOperation elif operation == 'post': networkOp = QNetworkAccessManager.PostOperation elif operation == 'delete': networkOp = QNetworkAccessManager.DeleteOperation if networkOp == QNetworkAccessManager.CustomOperation: self.m_mainFrame.evaluateJavaScript( 'console.error("Unknown network operation: %s");' % operation) return self.m_mainFrame.load(QNetworkRequest(QUrl(address)), networkOp, body) @pyqtProperty('QVariantMap') def paperSize(self): return self.m_paperSize @paperSize.setter def paperSize(self, size): self.m_paperSize = size @pyqtSlot(str, result=bool) def render(self, fileName): if self.m_mainFrame.contentsSize() == '': return False fileInfo = QFileInfo(fileName) path = QDir() path.mkpath(fileInfo.absolutePath()) if fileName.lower().endswith('.pdf'): return self.renderPdf(fileName) image = self.renderImage() return image.save(fileName) @pyqtProperty(str) def libraryPath(self): return self.m_libraryPath @libraryPath.setter def libraryPath(self, dirPath): self.m_libraryPath = dirPath @pyqtSlot(str, str) def uploadFile(self, selector, fileName): el = self.m_mainFrame.findFirstElement(selector) if el.isNull(): return self.m_webPage.m_uploadFile = fileName el.evaluateJavaScript(''' (function (el) { var ev = document.createEvent('MouseEvents'); ev.initEvent('click', true, true); el.dispatchEvent(ev); })(this) ''') @pyqtProperty('QVariantMap') def viewportSize(self): size = self.m_webPage.viewportSize() result = {'width': size.width(), 'height': size.height()} return result @viewportSize.setter def viewportSize(self, size): names = ('width', 'height') for item in names: try: globals()[item] = int(size[item]) if globals()[item] < 0: globals()[item] = 0 except KeyError: globals()[item] = getattr(self.m_webPage.viewportSize(), item)() self.m_webPage.setViewportSize(QSize(width, height)) do_action('WebPage', Bunch(locals()))
class NetworkAccessManager(QNetworkAccessManager): resourceReceived = pyqtSignal('QVariantMap') resourceRequested = pyqtSignal('QVariantMap') def __init__(self, diskCacheEnabled, ignoreSslErrors, parent=None): QNetworkAccessManager.__init__(self, parent) self.m_ignoreSslErrors = ignoreSslErrors self.m_idCounter = 0 self.m_ids = {} self.m_started = [] self.finished.connect(self.handleFinished) if diskCacheEnabled: m_networkDiskCache = QNetworkDiskCache() m_networkDiskCache.setCacheDirectory( QDesktopServices.storageLocation( QDesktopServices.CacheLocation)) self.setCache(m_networkDiskCache) do_action('NetworkAccessManagerInit', Bunch(locals())) def createRequest(self, op, req, outgoingData): do_action('NetworkAccessManagerCreateRequestPre', Bunch(locals())) reply = QNetworkAccessManager.createRequest(self, op, req, outgoingData) if self.m_ignoreSslErrors: reply.ignoreSslErrors() headers = [] for header in req.rawHeaderList(): header = {'name': str(header), 'value': str(req.rawHeader(header))} headers.append(header) self.m_idCounter += 1 self.m_ids[reply] = self.m_idCounter data = { 'id': self.m_idCounter, 'url': req.url().toString(), 'method': toString(op), 'headers': headers, 'time': QDateTime.currentDateTime() } reply.readyRead.connect(self.handleStarted) do_action('NetworkAccessManagerCreateRequestPost', Bunch(locals())) self.resourceRequested.emit(data) return reply def handleFinished(self, reply): headers = [] for header in reply.rawHeaderList(): header = { 'name': str(header), 'value': str(reply.rawHeader(header)) } headers.append(header) data = { 'stage': 'end', 'id': self.m_ids[reply], 'url': reply.url().toString(), 'status': reply.attribute(QNetworkRequest.HttpStatusCodeAttribute), 'statusText': reply.attribute(QNetworkRequest.HttpReasonPhraseAttribute), 'contentType': reply.header(QNetworkRequest.ContentTypeHeader), 'redirectURL': reply.header(QNetworkRequest.LocationHeader), 'headers': headers, 'time': QDateTime.currentDateTime() } del self.m_ids[reply] if reply in self.m_started: del self.m_started[self.m_started.index(reply)] do_action('NetworkAccessManagerHandleFinished', Bunch(locals())) self.resourceReceived.emit(data) def handleStarted(self): reply = self.sender() if not reply: return if reply in self.m_started: return self.m_started.append(reply) headers = [] for header in reply.rawHeaderList(): header = { 'name': str(header), 'value': str(reply.rawHeader(header)) } headers.append(header) data = { 'stage': 'start', 'id': self.m_ids[reply], 'url': reply.url().toString(), 'status': reply.attribute(QNetworkRequest.HttpStatusCodeAttribute), 'statusText': reply.attribute(QNetworkRequest.HttpReasonPhraseAttribute), 'contentType': reply.header(QNetworkRequest.ContentTypeHeader), 'bodySize': reply.size(), 'redirectURL': reply.header(QNetworkRequest.LocationHeader), 'headers': headers, 'time': QDateTime.currentDateTime() } do_action('NetworkAccessManagerHandleStarted', Bunch(locals())) self.resourceReceived.emit(data) do_action('NetworkAccessManager', Bunch(locals()))