def __init__( self, client: CDPSession, target: 'Target', frameTree: Dict, ignoreHTTPSErrors: bool = False, screenshotTaskQueue: list = None, ) -> None: super().__init__() self._client = client self._target = target self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, frameTree, self) self._networkManager = NetworkManager(client, self._frameManager) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors self._defaultNavigationTimeout = 30000 # milliseconds self._coverage = Coverage(client) if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on( 'Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed()) client.on('Performance.metrics', lambda event: self._emitMetrics(event))
def __init__(self, client, ignore_https_errors=True, screenshot_task_queue=None): super().__init__() self._client = client self._keyborad = Keyboard(client) self._mouse = Mouse(client, self._keyborad) self._frame_manager = FrameManager(client, self._mouse) self._network_manager = NetworkManager(client) self._emulation_manager = EmulationManager(client) self._tracing = None self._page_bindings = {} self._ignore_https_errors = ignore_https_errors self._screenshot_task_queue = screenshot_task_queue self._frame_manager.on( FrameManager.Events['FrameAttached'], lambda ev: self.emit(Page.Events['FrameAttached'], ev)) self._frame_manager.on( FrameManager.Events['FrameDetached'], lambda ev: self.emit(Page.Events['FrameDetached'], ev)) self._frame_manager.on( FrameManager.Events['FrameNavigated'], lambda ev: self.emit(Page.Events['FrameNavigated'], ev)) self._network_manager.on( NetworkManager.Events['Request'], lambda ev: self.emit(Page.Events['Request'], ev)) self._network_manager.on( NetworkManager.Events['Response'], lambda ev: self.emit(Page.Events['Response'], ev)) self._network_manager.on( NetworkManager.Events['RequestFailed'], lambda ev: self.emit(Page.Events['RequestFailed'], ev)) self._network_manager.on( NetworkManager.Events['RequestFinished'], lambda ev: self.emit(Page.Events['RequestFinished'], ev)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events['Load'])) # client.on( # 'Runtime.consoleAPICalled', # self._on_console_api # ) # client.on( # 'Page.javascriptDialogOpening', # self.on_dialog # ) # client.on( # 'Runtime.exceptionThrown', # self._handle_exception # ) client.on('Security.certificateError', self._on_certificate_error) client.on('Inspector.targetCrashed', self._on_target_crashed)
def __init__( self, client: Session, ignoreHTTPSErrors: bool = True, screenshotTaskQueue: list = None, ) -> None: """Make new page object.""" super().__init__() self._client = client self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, self._mouse, self._touchscreen) # noqa: E501 self._networkManager = NetworkManager(client) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on( 'Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed())
def __init__(self, client: CDPSession, target: 'Target', frameTree: Dict, ignoreHTTPSErrors: bool = False, screenshotTaskQueue: list = None, ) -> None: super().__init__() self._client = client self._target = target self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, frameTree, self) self._networkManager = NetworkManager(client, self._frameManager) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors self._defaultNavigationTimeout = 30000 # milliseconds self._coverage = Coverage(client) if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on('Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed()) client.on('Performance.metrics', lambda event: self._emitMetrics(event))
def __init__(self, client: CDPSession, page: Any, ignoreHTTPSErrors) -> None: """Make new frame manager.""" super().__init__() self._client = client self._page = page self._networkmanager = NetworkManager(client, self, ignoreHTTPSErrors) self._frames: OrderedDict[str, Frame] = OrderedDict() self._mainFrame: Optional[Frame] = None self._contextIdToContext: Dict[str, ExecutionContext] = dict() self._isolatedWorlds = set() client.on( 'Page.frameAttached', lambda event: self._onFrameAttached( event.get('frameId', ''), event.get('parentFrameId', ''))) client.on('Page.frameNavigated', lambda event: self._onFrameNavigated(event.get('frame'))) client.on( 'Page.navigatedWithinDocument', lambda event: self._onFrameNavigatedWithinDocument( event.get('frameId'), event.get('url'))) client.on('Page.frameDetached', lambda event: self._onFrameDetached(event.get('frameId'))) client.on( 'Page.frameStoppedLoading', lambda event: self._onFrameStoppedLoading(event.get('frameId'))) client.on( 'Runtime.executionContextCreated', lambda event: self. _onExecutionContextCreated(event.get('context'))) client.on( 'Runtime.executionContextDestroyed', lambda event: self._onExecutionContextDestroyed( event.get('executionContextId'))) client.on('Runtime.executionContextsCleared', lambda event: self._onExecutionContextsCleared()) client.on('Page.lifecycleEvent', lambda event: self._onLifecycleEvent(event))
class Page(EventEmitter): """Page class.""" Events = SimpleNamespace( Console='console', Dialog='dialog', Error='error', PageError='pageerror', Request='request', Response='response', RequestFailed='requestfailed', RequestFinished='requestfinished', FrameAttached='frameattached', FrameDetached='framedetached', FrameNavigated='framenavigated', Load='load', ) PaperFormats: Dict[str, Dict[str, float]] = dict( letter={ 'width': 8.5, 'height': 11 }, legal={ 'width': 8.5, 'height': 14 }, tabloid={ 'width': 11, 'height': 17 }, ledger={ 'width': 17, 'height': 11 }, a0={ 'width': 33.1, 'height': 46.8 }, a1={ 'width': 23.4, 'height': 33.1 }, a2={ 'width': 16.5, 'height': 23.4 }, a3={ 'width': 11.7, 'height': 16.5 }, a4={ 'width': 8.27, 'height': 11.7 }, a5={ 'width': 5.83, 'height': 8.27 }, ) @staticmethod async def create(client: Session, ignoreHTTPSErrors: bool = False, appMode: bool = False, screenshotTaskQueue: list = None) -> 'Page': """Async function which make new page.""" await client.send('Network.enable', {}), await client.send('Page.enable', {}), await client.send('Runtime.enable', {}), await client.send('Security.enable', {}), await client.send('Performance.enable', {}), if ignoreHTTPSErrors: await client.send('Security.setOverrideCertificateErrors', {'override': True}) page = Page(client, ignoreHTTPSErrors, screenshotTaskQueue) await page.goto('about:blank') if not appMode: await page.setViewport({'width': 800, 'height': 600}) return page def __init__( self, client: Session, ignoreHTTPSErrors: bool = True, screenshotTaskQueue: list = None, ) -> None: """Make new page object.""" super().__init__() self._client = client self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, self) self._networkManager = NetworkManager(client) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on( 'Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed()) client.on('Performance.metrics', lambda event: self._emitMetrics(event)) def _onTargetCrashed(self, *args: Any, **kwargs: Any) -> None: self.emit('error', PageError('Page crashed!')) @property def mainFrame(self) -> Optional['Frame']: """Get main frame.""" return self._frameManager._mainFrame @property def keyboard(self) -> Keyboard: """Get keybord object.""" return self._keyboard @property def touchscreen(self) -> Touchscreen: """Get touchscreen object.""" return self._touchscreen async def tap(self, selector: str) -> None: """Tap the element which matches selector.""" handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.tap() await handle.dispose() @property def tracing(self) -> 'Tracing': """Get tracing object.""" return self._tracing @property def frames(self) -> List['Frame']: """Get frames.""" return list(self._frames.values()) async def setRequestInterceptionEnabled(self, value: bool) -> None: """Enable request interception.""" return await self._networkManager.setRequestInterceptionEnabled(value) def setOfflineMode(self, enabled: bool) -> Awaitable[None]: """Set offline mode enable/disable.""" return self._networkManager.setOfflineMode(enabled) def _onCertificateError(self, event: Any) -> None: if not self._ignoreHTTPSErrors: return asyncio.ensure_future( self._client.send('Security.handleCertificateError', { 'eventId': event.get('eventId'), 'action': 'continue' })) async def querySelector(self, selector: str) -> Optional['ElementHandle']: """Get Element which matches `selector`.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelector(selector) async def evaluateHandle(self, pageFunction: str, *args: Any) -> JSHandle: """Execute function on this page.""" if not self.mainFrame: raise PageError('no main frame.') if not self.mainFrame.executionContext: raise PageError('No context.') return await self.mainFrame.executionContext.evaluateHandle( pageFunction, *args) async def queryObject(self, prototypeHandle: JSHandle) -> JSHandle: """Send query to the object.""" # need better doc if not self.mainFrame: raise PageError('no main frame.') if not self.mainFrame.executionContext: raise PageError('No context.') return await self.mainFrame.executionContext.queryObject( prototypeHandle) async def querySelectorEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Execute function on element which matches selector.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorEval(selector, pageFunction, *args) async def querySelectorAllEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Get Element which matches `selector`.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAllEval(selector, pageFunction, *args) async def querySelectorAll(self, selector: str) -> List['ElementHandle']: """Get Element which matches `selector`.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAll(selector) #: alias to querySelector J = querySelector #: alias to querySelectorEval Jeval = querySelectorEval #: alias to querySelectorAll JJ = querySelectorAll #: alias to querySelectorAllEval JJeval = querySelectorAllEval async def cookies(self, *urls: str) -> dict: """Get cookies.""" if not urls: urls = (self.url, ) resp = await self._client.send('Network.getCookies', { 'urls': urls, }) return resp.get('cookies', {}) async def deleteCookie(self, *cookies: dict) -> None: """Delete cookie.""" pageURL = self.url for cookie in cookies: item = dict(**cookie) if not cookie.get('url') and pageURL.startswith('http'): item['url'] = pageURL await self._client.send('Network.deleteCookies', item) async def setCookie(self, *cookies: dict) -> None: """Set cookies.""" items = [] for cookie in cookies: item = dict(**cookie) pageURL = self.url if 'url' not in item and pageURL.startswith('http'): item['url'] = pageURL items.append(item) await self.deleteCookie(*items) if items: await self._client.send('Network.setCookies', { 'cookies': items, }) async def addScriptTag(self, options: Dict = None, **kwargs: str) -> str: """Add script tag to this page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') if options is None: options = {} options.update(kwargs) return await frame.addScriptTag(options) async def addStyleTag(self, options: Dict = None, **kwargs: str) -> str: """Add script tag to this page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') if options is None: options = {} options.update(kwargs) return await frame.addStyleTag(options) async def injectFile(self, filePath: str) -> str: """[Deprecated] Inject file to this page.""" warnings.warn('Page.injectFile is deprecated.', DeprecationWarning) frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.injectFile(filePath) async def exposeFunction(self, name: str, puppeteerFunction: Callable) -> None: """Execute function on this page.""" if self._pageBindings[name]: raise PageError(f'Failed to add page binding with name {name}: ' 'window["{name}"] already exists!') self._pageBindings[name] = puppeteerFunction addPageBinding = ''' function addPageBinding(bindingName) { window[bindingName] = async(...args) => { const me = window[bindingName]; let callbacks = me['callbacks']; if (!callbacks) { callbacks = new Map(); me['callbacks'] = callbacks; } const seq = (me['lastSeq'] || 0) + 1; me['lastSeq'] = seq; const promise = new Promise(fulfill => callbacks.set(seq, fulfill)); // eslint-disable-next-line no-console console.debug('driver:page-binding', JSON.stringify({name: bindingName, seq, args})); return promise; }; } ''' # noqa: E501 expression = helper.evaluationString(addPageBinding, name) await self._client.send('Page.addScriptToEvaluateOnNewDocument', {'source': expression}) await self._client.send('Runtime.evaluate', { 'expression': expression, 'returnByValue': True }) async def authenticate(self, credentials: Dict[str, str]) -> Any: """Provide credentials for http authentication. `credentials` should be `None` or dict which has `username` and `password` in its keys. """ return await self._networkManager.authenticate(credentials) async def setExtraHTTPHeaders(self, headers: Dict[str, str]) -> None: """Set extra http headers.""" return await self._networkManager.setExtraHTTPHeaders(headers) async def setUserAgent(self, userAgent: str) -> None: """Set user agent.""" return await self._networkManager.setUserAgent(userAgent) async def getMetrics(self) -> Dict[str, Any]: """Get metrics.""" response = await self._client.send('Performance.getMetrics') return self._buildMetricsObject(response['metrics']) def _emitMetrics(self, event: Dict) -> None: self.emit( Page.Events.Metrics, { 'title': event['title'], 'metrics': self._buildMetricsObject(event['metrics']), }) def _buildMetricsObject(self, metrics: List) -> Dict[str, Any]: result = {} for metric in metrics or []: if metric['name'] in supportedMetrics: result[metric['name']] = metric['value'] return result def _handleException(self, exceptionDetails: Dict) -> None: message = helper.getExceptionMessage(exceptionDetails) self.emit(Page.Events.PageError, PageError(message)) async def _onConsoleAPI(self, event: dict) -> None: _args = event.get('args', []) if (event.get('type') == 'debug' and _args and _args[0]['value'] == 'driver:page-binding'): obj = json.loads(_args[1]['value']) name = obj.get('name') seq = obj.get('seq') args = obj.get('args') result = await self._pageBindings[name](*args) deliverResult = ''' function deliverResult(name, seq, result) { window[name]['callbacks'].get(seq)(result) window[name]['callbacks'].delete(seq) } ''' expression = helper.evaluationString(deliverResult, name, seq, result) await self._client.send('Runtime.evaluate', {'expression': expression}) return if not self.listeners(Page.Events.Console): for arg in _args: await helper.releaseObject(self._client, arg) return _values = [] for arg in _args: _values.append( asyncio.ensure_future(helper.valueFromRemoteObject(arg))) values = await asyncio.gather(*_values) self.emit(Page.Events.Console, *values) def _onDialog(self, event: Any) -> None: dialogType = '' _type = event.get('type') if _type == 'alert': dialogType = Dialog.Type.Alert elif (_type == 'confirm'): dialogType = Dialog.Type.Confirm elif (_type == 'prompt'): dialogType = Dialog.Type.Prompt elif (_type == 'beforeunload'): dialogType = Dialog.Type.BeforeUnload dialog = Dialog(self._client, dialogType, event.get('message'), event.get('defaultPrompt')) self.emit(Page.Events.Dialog, dialog) @property def url(self) -> str: """Get url of this page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.url async def content(self) -> str: """Get the whole HTML contents of the page.""" return await self.evaluate(''' () => { let retVal = ''; if (document.doctype) retVal = new XMLSerializer().serializeToString(document.doctype); if (document.documentElement) retVal += document.documentElement.outerHTML; return retVal; } '''.strip()) async def setContent(self, html: str) -> None: """Set content.""" func = ''' function(html) { document.open(); document.write(html); document.close(); } ''' await self.evaluate(func, html) async def goto(self, url: str, options: dict = None, **kwargs: Any) -> Optional[Response]: """Got to url.""" if options is None: options = dict() options.update(kwargs) watcher = NavigatorWatcher(self._client, self._ignoreHTTPSErrors, options) responses: Dict[str, Response] = dict() listener = helper.addEventListener( self._networkManager, NetworkManager.Events.Response, lambda response: responses.__setitem__(response.url, response)) navigationPromise = watcher.waitForNavigation() referrer = self._networkManager.extraHTTPHeaders().get('referer', '') try: await self._client.send('Page.navigate', dict(url=url, referrer=referrer)) except Exception: watcher.cancel() helper.removeEventListeners([listener]) raise error = await navigationPromise helper.removeEventListeners([listener]) if error: raise error if self._frameManager.isMainFrameLoadingFailed(): raise PageError('Failed to navigate: ' + url) return responses.get(self.url) async def reload(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Reload this page.""" if options is None: options = dict() options.update(kwargs) response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.reload'), ))[0] return response async def waitForNavigation(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Wait navigation completes.""" if options is None: options = dict() options.update(kwargs) watcher = NavigatorWatcher(self._client, self._ignoreHTTPSErrors, options) responses: Dict[str, Response] = dict() listener = helper.addEventListener( self._networkManager, NetworkManager.Events.Response, lambda response: responses.__setitem__(response.url, response)) error = await watcher.waitForNavigation() helper.removeEventListeners([listener]) if error: raise error response = responses.get(self.url, None) return response async def goBack(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Go back history.""" if options is None: options = dict() options.update(kwargs) return await self._go(-1, options) async def goForward(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Go forward history.""" if options is None: options = dict() options.update(kwargs) return await self._go(+1, options) async def _go(self, delta: int, options: dict) -> Optional[Response]: history = await self._client.send('Page.getNavigationHistory') _count = history.get('currentIndex', 0) + delta entries = history.get('entries', []) if len(entries) < _count: return None entry = entries[_count] response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.navigateToHistoryEntry', {'entryId': entry.get('id')})))[0] return response async def emulate(self, options: dict = None, **kwargs: Any) -> None: """Emulate viewport and user agent.""" if options is None: options = dict() options.update(kwargs) await self.setViewport(options.get('viewport', {})) await self.setUserAgent(options.get('userAgent', '')) async def setJavaScriptEnabled(self, enabled: bool) -> None: """Set JavaScript enabled/disabled.""" await self._client.send('Emulation.setScriptExecutionDisabled', { 'value': not enabled, }) async def emulateMedia(self, mediaType: str = None) -> None: """Emulate css media type of the page.""" if mediaType not in ['screen', 'print', None, '']: raise ValueError(f'Unsupported media type: {mediaType}') await self._client.send('Emulation.setEmulatedMedia', { 'media': mediaType or '', }) async def setViewport(self, viewport: dict) -> None: """Set viewport.""" needsReload = await self._emulationManager.emulateViewport( self._client, viewport, ) self._viewport = viewport if needsReload: await self.reload() @property def viewport(self) -> dict: """Get viewport.""" return self._viewport async def evaluate(self, pageFunction: str, *args: Any) -> str: """Execute js-function on this page and get result.""" frame = self._frameManager.mainFrame if frame is None: raise PageError('No main frame.') return await frame.evaluate(pageFunction, *args) async def evaluateOnNewDocument(self, pageFunction: str, *args: str) -> None: """Evaluate js-function on new document.""" source = helper.evaluationString(pageFunction, *args) await self._client.send('Page.addScriptToEvaluateOnNewDocument', { 'source': source, }) async def screenshot(self, options: dict = None, **kwargs: Any) -> bytes: """Take screen shot.""" options = options or dict() options.update(kwargs) screenshotType = None if 'path' in options: mimeType, _ = mimetypes.guess_type(options['path']) if mimeType == 'image/png': screenshotType = 'png' elif mimeType == 'image/jpeg': screenshotType = 'jpeg' else: raise PageError('Unsupported screenshot ' f'mime type: {mimeType}') if 'type' in options: screenshotType = options['type'] if not screenshotType: screenshotType = 'png' return await self._screenshotTask(screenshotType, options) async def _screenshotTask(self, format: str, options: dict) -> bytes: # noqa: C901,E501 await self._client.send('Target.activateTarget', { 'targetId': self._client.targetId, }) clip = options.get('clip') if clip: clip['scale'] = 1 if options.get('fullPage'): metrics = await self._client.send('Page.getLayoutMetrics') width = math.ceil(metrics['contentSize']['width']) height = math.ceil(metrics['contentSize']['height']) # Overwrite clip for full page at all times. clip = dict(x=0, y=0, width=width, height=height, scale=1) mobile = self._viewport.get('isMobile', False) deviceScaleFactor = self._viewport.get('deviceScaleFactor', 1) landscape = self._viewport.get('isLandscape', False) if landscape: screenOrientation = dict(angle=90, type='landscapePrimary') else: screenOrientation = dict(angle=0, type='portraitPrimary') await self._client.send( 'Emulation.setDeviceMetricsOverride', { 'mobile': mobile, 'width': width, 'height': height, 'deviceScaleFactor': deviceScaleFactor, 'screenOrientation': screenOrientation, }) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride', {'color': { 'r': 0, 'g': 0, 'b': 0, 'a': 0 }}, ) opt = {'format': format} if clip: opt['clip'] = clip result = await self._client.send('Page.captureScreenshot', opt) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride') if options.get('fullPage'): await self.setViewport(self._viewport) buffer = base64.b64decode(result.get('data', b'')) _path = options.get('path') if _path: with open(_path, 'wb') as f: f.write(buffer) return buffer async def pdf(self, options: dict = None, **kwargs: Any) -> bytes: """Not yet implemented.""" if options is None: options = dict() options.update(kwargs) scale = options.get('scale', 1) displayHeaderFooter = bool(options.get('displayHeaderFooter')) headerTemplate = options.get('headerTemplate', '') footerTemplate = options.get('footerTemplate', '') printBackground = bool(options.get('printBackground')) landscape = bool(options.get('landscape')) pageRanges = options.get('pageRanges', '') paperWidth = 8.5 paperHeight = 11.0 if 'format' in options: fmt = Page.PaperFormats.get(options['format'].lower()) if not fmt: raise ValueError('Unknown paper format: ' + options['format']) paperWidth = fmt['width'] paperHeight = fmt['height'] else: paperWidth = convertPrintParameterToInches( options.get('width')) or paperWidth # noqa: E501 paperHeight = convertPrintParameterToInches( options.get('height')) or paperHeight # noqa: E501 marginOptions = options.get('margin', {}) marginTop = convertPrintParameterToInches( marginOptions.get('top')) or 0 # noqa: E501 marginLeft = convertPrintParameterToInches( marginOptions.get('left')) or 0 # noqa: E501 marginBottom = convertPrintParameterToInches( marginOptions.get('bottom')) or 0 # noqa: E501 marginRight = convertPrintParameterToInches( marginOptions.get('right')) or 0 # noqa: E501 result = await self._client.send( 'Page.printToPDF', dict(landscape=landscape, displayHeaderFooter=displayHeaderFooter, headerTemplate=headerTemplate, footerTemplate=footerTemplate, printBackground=printBackground, scale=scale, paperWidth=paperWidth, paperHeight=paperHeight, marginTop=marginTop, marginBottom=marginBottom, marginLeft=marginLeft, marginRight=marginRight, pageRanges=pageRanges)) buffer = base64.b64decode(result.get('data', b'')) if 'path' in options: with open(options['path'], 'wb') as f: f.write(buffer) return buffer async def plainText(self) -> str: """[Deprecated] Get page content as plain text.""" warnings.warn('page.plainText is deprecated.', DeprecationWarning) return await self.evaluate('() => document.body.innerText') async def title(self) -> str: """Get page title.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.title() async def close(self) -> None: """Close connection.""" await self._client.dispose() @property def mouse(self) -> Mouse: """Get mouse object.""" return self._mouse async def click(self, selector: str, options: dict = None, **kwargs: Any) -> None: """Click element which matches `selector`.""" if options is None: options = dict() options.update(kwargs) handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.click(options) await handle.dispose() async def hover(self, selector: str) -> None: """Mouse hover the element which matches `selector`.""" handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.hover() await handle.dispose() async def focus(self, selector: str) -> None: """Focus the element which matches `selector`.""" handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await self.evaluate('element => element.focus()', handle) await handle.dispose() async def select(self, selector: str, *values: Any) -> None: """Select option(s).""" await self.querySelectorEval( selector, ''' (element, values) => { if (element.nodeName.toLowerCase() !== 'select') throw new Error('Element is not a <select> element.'); const options = Array.from(element.options); if (element.multiple) { for (const option of options) option.selected = values.includes(option.value); } else { element.value = values.shift(); } element.dispatchEvent(new Event('input', { 'bubbles': true })); element.dispatchEvent(new Event('change', { 'bubbles': true })); } ''', values) async def type(self, selector: str, text: str, options: dict = None, **kwargs: Any) -> None: """Type text on the selected element.""" options = options or dict() options.update(kwargs) handle = await self.querySelector(selector) if handle is None: raise PageError('Cannot find {} on this page'.format(selector)) await handle.type(text, options) await handle.dispose() def waitFor(self, selectorOrFunctionOrTimeout: Union[str, int, float], options: dict = None, *args: Any, **kwargs: Any) -> Awaitable: """Wait for function, timeout, or element which matches on page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitFor(selectorOrFunctionOrTimeout, options, *args, **kwargs) def waitForSelector(self, selector: str, options: dict = None, **kwargs: Any) -> Awaitable: """Wait until element which matches selector appears on page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForSelector(selector, options, **kwargs) def waitForFunction(self, pageFunction: str, options: dict = None, *args: str, **kwargs: Any) -> Awaitable: """Wait for function.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForFunction(pageFunction, options, *args, **kwargs)
class Page(EventEmitter): """Page class. This class provides methods to interact with a single tab of chrome. One :class:`~pyppeteer.browser.Browser` object might have multiple Page object. The :class:`Page` class emits various :attr:`~Page.Events` which can be handled by using ``on`` or ``once`` method, which is inherited from `pyee <https://pyee.readthedocs.io/en/latest/>`_'s ``EventEmitter`` class. """ #: Available events. Events = SimpleNamespace( Console='console', Dialog='dialog', Error='error', PageError='pageerror', Request='request', Response='response', RequestFailed='requestfailed', RequestFinished='requestfinished', FrameAttached='frameattached', FrameDetached='framedetached', FrameNavigated='framenavigated', Load='load', Metrics='metrics', ) PaperFormats: Dict[str, Dict[str, float]] = dict( letter={'width': 8.5, 'height': 11}, legal={'width': 8.5, 'height': 14}, tabloid={'width': 11, 'height': 17}, ledger={'width': 17, 'height': 11}, a0={'width': 33.1, 'height': 46.8}, a1={'width': 23.4, 'height': 33.1}, a2={'width': 16.5, 'height': 23.4}, a3={'width': 11.7, 'height': 16.5}, a4={'width': 8.27, 'height': 11.7}, a5={'width': 5.83, 'height': 8.27}, ) @staticmethod async def create(client: CDPSession, target: 'Target', ignoreHTTPSErrors: bool = False, appMode: bool = False, screenshotTaskQueue: list = None) -> 'Page': """Async function which makes new page object.""" await client.send('Page.enable'), frameTree = (await client.send('Page.getFrameTree'))['frameTree'] page = Page(client, target, frameTree, ignoreHTTPSErrors, screenshotTaskQueue) await asyncio.wait([ client.send('Page.setLifecycleEventsEnabled', {'enabled': True}), client.send('Network.enable', {}), client.send('Runtime.enable', {}), client.send('Security.enable', {}), client.send('Performance.enable', {}), ]) if ignoreHTTPSErrors: await client.send('Security.setOverrideCertificateErrors', {'override': True}) if not appMode: await page.setViewport({'width': 800, 'height': 600}) return page def __init__(self, client: CDPSession, target: 'Target', frameTree: Dict, ignoreHTTPSErrors: bool = False, screenshotTaskQueue: list = None, ) -> None: super().__init__() self._client = client self._target = target self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, frameTree, self) self._networkManager = NetworkManager(client, self._frameManager) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors self._defaultNavigationTimeout = 30000 # milliseconds self._coverage = Coverage(client) if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on('Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed()) client.on('Performance.metrics', lambda event: self._emitMetrics(event)) @property def target(self) -> 'Target': """Return a target this page created from.""" return self._target def _onTargetCrashed(self, *args: Any, **kwargs: Any) -> None: self.emit('error', PageError('Page crashed!')) @property def mainFrame(self) -> Optional['Frame']: """Get main :class:`~pyppeteer.frame_manager.Frame` of this page.""" return self._frameManager._mainFrame @property def keyboard(self) -> Keyboard: """Get :class:`~pyppeteer.input.Keyboard` object.""" return self._keyboard @property def touchscreen(self) -> Touchscreen: """Get :class:`~pyppeteer.input.Touchscreen` object.""" return self._touchscreen @property def coverage(self) -> Coverage: """Return :class:`~pyppeteer.coverage.Coverage`.""" return self._coverage async def tap(self, selector: str) -> None: """Tap the element which matches the ``selector``. :arg str selector: A selector to search element to touch. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.tap() await handle.dispose() @property def tracing(self) -> 'Tracing': """Get tracing object.""" return self._tracing @property def frames(self) -> List['Frame']: """Get all frames of this page.""" return list(self._frameManager.frames()) async def setRequestInterception(self, value: bool) -> None: """Enable/disable request interception.""" return await self._networkManager.setRequestInterception(value) async def setOfflineMode(self, enabled: bool) -> None: """Set offline mode enable/disable.""" await self._networkManager.setOfflineMode(enabled) def setDefaultNavigationTimeout(self, timeout: int) -> None: """Change the default maximum navigation timeout. This method changes the default timeout of 30 seconds for the following methods: * :meth:`goto` * :meth:`goBack` * :meth:`goForward` * :meth:`reload` * :meth:`waitForNavigation` :arg int timeout: Maximum navigation time in milliseconds. """ self._defaultNavigationTimeout = timeout def _onCertificateError(self, event: Any) -> None: if not self._ignoreHTTPSErrors: return asyncio.ensure_future( self._client.send('Security.handleCertificateError', { 'eventId': event.get('eventId'), 'action': 'continue' }) ) async def querySelector(self, selector: str) -> Optional['ElementHandle']: """Get an Element which matches ``selector``. :arg str selector: A selector to search element. :return Optional[ElementHandle]: If element which matches the ``selector`` is found, return its :class:`~pyppeteer.element_handle.ElementHandle`. If not found, returns ``None``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelector(selector) async def evaluateHandle(self, pageFunction: str, *args: Any ) -> JSHandle: """Execute function on this page. Difference between :meth:`~pyppeteer.page.Page.evaluate` and :meth:`~pyppeteer.page.Page.evaluateHandle` is that ``evaluateHandle`` returns JSHandle object (not value). :arg str pageFunction: JavaScript function to be executed. """ if not self.mainFrame: raise PageError('no main frame.') context = await self.mainFrame.executionContext() if not context: raise PageError('No context.') return await context.evaluateHandle(pageFunction, *args) async def queryObjects(self, prototypeHandle: JSHandle) -> JSHandle: """Iterate js heap and finds all the objects with the handle. :arg JSHandle prototypeHandle: JSHandle of prototype object. """ if not self.mainFrame: raise PageError('no main frame.') context = await self.mainFrame.executionContext() if not context: raise PageError('No context.') return await context.queryObjects(prototypeHandle) async def querySelectorEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Execute function with an element which matches ``selector``. :arg str selector: A selector to query page for. :arg str pageFunction: String of JavaScript function to be evaluated on browser. This function takes an element which matches the selector as a first argument. :arg Any args: Arguments to pass to ``pageFunction``. This method raises error if no element matched the ``selector``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorEval(selector, pageFunction, *args) async def querySelectorAllEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Execute function with all elements which matches ``selector``. :arg str selector: A selector to query page for. :arg str pageFunction: String of JavaScript function to be evaluated on browser. This function takes Array of the matched elements as the first argument. :arg Any args: Arguments to pass to ``pageFunction``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAllEval(selector, pageFunction, *args) async def querySelectorAll(self, selector: str) -> List['ElementHandle']: """Get all element which matches `selector` as a list. :arg str selector: A selector to search element. :return List[ElementHandle]: List of :class:`~pyppeteer.element_handle.ElementHandle` which matches the ``selector``. If no element is matched to the ``selector``, return empty list. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAll(selector) async def xpath(self, expression: str) -> List[ElementHandle]: """Evaluate XPath expression. If there is no such element in this page, return None. :arg str expression: XPath string to be evaluated. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.xpath(expression) #: alias to :meth:`querySelector` J = querySelector #: alias to :meth:`querySelectorEval` Jeval = querySelectorEval #: alias to :meth:`querySelectorAll` JJ = querySelectorAll #: alias to :meth:`querySelectorAllEval` JJeval = querySelectorAllEval #: alias to :meth:`xpath` Jx = xpath async def cookies(self, *urls: str) -> dict: """Get cookies.""" if not urls: urls = (self.url, ) resp = await self._client.send('Network.getCookies', { 'urls': urls, }) return resp.get('cookies', {}) async def deleteCookie(self, *cookies: dict) -> None: """Delete cookie.""" pageURL = self.url for cookie in cookies: item = dict(**cookie) if not cookie.get('url') and pageURL.startswith('http'): item['url'] = pageURL await self._client.send('Network.deleteCookies', item) async def setCookie(self, *cookies: dict) -> None: """Set cookies.""" pageURL = self.url startsWithHTTP = pageURL.startswith('http') items = [] for cookie in cookies: item = dict(**cookie) if 'url' not in item and startsWithHTTP: item['url'] = pageURL if item.get('url') == 'about:blank': name = item.get('name', '') raise PageError(f'Blank page can not have cookie "{name}"') if item.get('url', '').startswith('data:'): name = item.get('name', '') raise PageError(f'Data URL page can not have cookie "{name}"') items.append(item) await self.deleteCookie(*items) if items: await self._client.send('Network.setCookies', { 'cookies': items, }) async def addScriptTag(self, options: Dict = None, **kwargs: str ) -> ElementHandle: """Add script tag to this page. One of ``url``, ``path`` or ``content`` option is necessary. * ``url`` (string): URL of a script to add. * ``path`` (string): Path to the local JavaScript file to add. * ``content`` (string): JavaScript string to add. :return ElementHandle: :class:`~pyppeteer.element_handle.ElementHandle` of added tag. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') options = merge_dict(options, kwargs) return await frame.addScriptTag(options) async def addStyleTag(self, options: Dict = None, **kwargs: str ) -> ElementHandle: """Add style or link tag to this page. One of ``url``, ``path`` or ``content`` option is necessary. * ``url`` (string): URL of the link tag to add. * ``path`` (string): Path to the local CSS file to add. * ``content`` (string): CSS string to add. :return ElementHandle: :class:`~pyppeteer.element_handle.ElementHandle` of added tag. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') options = merge_dict(options, kwargs) return await frame.addStyleTag(options) async def injectFile(self, filePath: str) -> str: """[Deprecated] Inject file to this page. This method is deprecated. Use :meth:`addScriptTag` instead. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.injectFile(filePath) async def exposeFunction(self, name: str, pyppeteerFunction: Callable ) -> None: """Add python function to the browser's ``window`` object as ``name``. Registered function can be called from chrome process. :arg string name: Name of the function on the window object. :arg Callable pyppeteerFunction: Function which will be called on python process. """ if self._pageBindings.get(name): raise PageError(f'Failed to add page binding with name {name}: ' f'window["{name}"] already exists!') self._pageBindings[name] = pyppeteerFunction addPageBinding = ''' function addPageBinding(bindingName) { window[bindingName] = async(...args) => { const me = window[bindingName]; let callbacks = me['callbacks']; if (!callbacks) { callbacks = new Map(); me['callbacks'] = callbacks; } const seq = (me['lastSeq'] || 0) + 1; me['lastSeq'] = seq; const promise = new Promise(fulfill => callbacks.set(seq, fulfill)); // eslint-disable-next-line no-console console.debug('driver:page-binding', JSON.stringify({name: bindingName, seq, args})); return promise; }; } ''' # noqa: E501 expression = helper.evaluationString(addPageBinding, name) await self._client.send('Page.addScriptToEvaluateOnNewDocument', {'source': expression}) await asyncio.wait([ frame.evaluate(expression, force_expr=True) for frame in self.frames ]) async def authenticate(self, credentials: Dict[str, str]) -> Any: """Provide credentials for http authentication. ``credentials`` should be ``None`` or dict which has ``username`` and ``password`` field. """ return await self._networkManager.authenticate(credentials) async def setExtraHTTPHeaders(self, headers: Dict[str, str]) -> None: """Set extra http headers.""" return await self._networkManager.setExtraHTTPHeaders(headers) async def setUserAgent(self, userAgent: str) -> None: """Set user agent to use in this page.""" return await self._networkManager.setUserAgent(userAgent) async def metrics(self) -> Dict[str, Any]: """Get metrics.""" response = await self._client.send('Performance.getMetrics') return self._buildMetricsObject(response['metrics']) def _emitMetrics(self, event: Dict) -> None: self.emit(Page.Events.Metrics, { 'title': event['title'], 'metrics': self._buildMetricsObject(event['metrics']), }) def _buildMetricsObject(self, metrics: List) -> Dict[str, Any]: result = {} for metric in metrics or []: if metric['name'] in supportedMetrics: result[metric['name']] = metric['value'] return result def _handleException(self, exceptionDetails: Dict) -> None: message = helper.getExceptionMessage(exceptionDetails) self.emit(Page.Events.PageError, PageError(message)) def _onConsoleAPI(self, event: dict) -> None: _args = event.get('args', []) if (event.get('type') == 'debug' and _args and _args[0]['value'] == 'driver:page-binding'): obj = json.loads(_args[1]['value']) name = obj.get('name') seq = obj.get('seq') args = obj.get('args') result = self._pageBindings[name](*args) deliverResult = ''' function deliverResult(name, seq, result) { window[name]['callbacks'].get(seq)(result); window[name]['callbacks'].delete(seq); } ''' expression = helper.evaluationString( deliverResult, name, seq, result) asyncio.ensure_future(self._client.send('Runtime.evaluate', { 'expression': expression, 'contextId': event['executionContextId'], })) return if not self.listeners(Page.Events.Console): for arg in _args: asyncio.ensure_future(helper.releaseObject(self._client, arg)) return _id = event['executionContextId'] values = [] for arg in _args: values.append(self._frameManager.createJSHandle(_id, arg)) textTokens = [] for arg, value in zip(_args, values): if arg.get('objectId'): textTokens.append(value.toString()) else: textTokens.append(str(helper.valueFromRemoteObject(arg))) message = ConsoleMessage(event['type'], ' '.join(textTokens), values) self.emit(Page.Events.Console, message) def _onDialog(self, event: Any) -> None: dialogType = '' _type = event.get('type') if _type == 'alert': dialogType = Dialog.Type.Alert elif (_type == 'confirm'): dialogType = Dialog.Type.Confirm elif (_type == 'prompt'): dialogType = Dialog.Type.Prompt elif (_type == 'beforeunload'): dialogType = Dialog.Type.BeforeUnload dialog = Dialog(self._client, dialogType, event.get('message'), event.get('defaultPrompt')) self.emit(Page.Events.Dialog, dialog) @property def url(self) -> str: """Get url of this page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.url async def content(self) -> str: """Get the whole HTML contents of the page.""" frame = self._frameManager.mainFrame if frame is None: raise PageError('No main frame.') return await frame.content() async def setContent(self, html: str) -> None: """Set content to this page.""" frame = self._frameManager.mainFrame if frame is None: raise PageError('No main frame.') await frame.setContent(html) async def goto(self, url: str, options: dict = None, **kwargs: Any ) -> Optional[Response]: """Go to the ``url``. :arg string url: URL to navigate page to. The url should include scheme, e.g. ``https://``. Available options are: * ``timeout`` (int): Maximum navigation time in milliseconds, defaults to 30 seconds, pass ``0`` to desable timeout. The default value can be changed by using the :meth:`setDefaultNavigationTimeout` method. * ``waitUntil`` (str|List[str]): When to consider navigation succeeded, defaults to ``load``. Given a list of event strings, navigation is considered to be successful after all events have been fired. Events can be either: * ``load``: when ``load`` event is fired. * ``documentloaded``: when the ``DOMContentLoaded`` event is fired. * ``networkidle0``: when there are no more than 0 network connections for at least 500 ms. * ``networkidle2``: when there are no more than 2 network connections for at least 500 ms. """ options = merge_dict(options, kwargs) referrer = self._networkManager.extraHTTPHeaders().get('referer', '') requests: Dict[str, Request] = dict() eventListeners = [helper.addEventListener( self._networkManager, NetworkManager.Events.Request, lambda request: requests.__setitem__(request.url, request) )] mainFrame = self._frameManager.mainFrame if mainFrame is None: raise PageError('No main frame.') timeout = options.get('timeout', self._defaultNavigationTimeout) watcher = NavigatorWatcher(self._frameManager, mainFrame, timeout, options) result = await self._navigate(url, referrer) if result is not None: raise PageError(result) result = await watcher.navigationPromise() watcher.cancel() helper.removeEventListeners(eventListeners) error = result[0].pop().exception() # type: ignore if error: raise error request = requests.get(mainFrame.url) return request.response if request else None async def _navigate(self, url: str, referrer: str) -> Optional[str]: response = await self._client.send( 'Page.navigate', {'url': url, 'referrer': referrer}) if response.get('errorText'): return response['errorText'] return None async def reload(self, options: dict = None, **kwargs: Any ) -> Optional[Response]: """Reload this page. Available options are same as :meth:`goto` method. """ options = merge_dict(options, kwargs) response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.reload'), ))[0] return response async def waitForNavigation(self, options: dict = None, **kwargs: Any ) -> Optional[Response]: """Wait for navigation. Available options are same as :meth:`goto` method. """ options = merge_dict(options, kwargs) mainFrame = self._frameManager.mainFrame if mainFrame is None: raise PageError('No main frame.') timeout = options.get('timeout', self._defaultNavigationTimeout) watcher = NavigatorWatcher(self._frameManager, mainFrame, timeout, options) responses: Dict[str, Response] = dict() listener = helper.addEventListener( self._networkManager, NetworkManager.Events.Response, lambda response: responses.__setitem__(response.url, response) ) result = await watcher.navigationPromise() helper.removeEventListeners([listener]) error = result[0].pop().exception() if error: raise error response = responses.get(self.url, None) return response async def goBack(self, options: dict = None, **kwargs: Any ) -> Optional[Response]: """Navigate to the previous page in history. Available options are same as :meth:`goto` method. """ options = merge_dict(options, kwargs) return await self._go(-1, options) async def goForward(self, options: dict = None, **kwargs: Any ) -> Optional[Response]: """Navigate to the next page in history. Available options are same as :meth:`goto` method. """ options = merge_dict(options, kwargs) return await self._go(+1, options) async def _go(self, delta: int, options: dict) -> Optional[Response]: history = await self._client.send('Page.getNavigationHistory') _count = history.get('currentIndex', 0) + delta entries = history.get('entries', []) if len(entries) <= _count: return None entry = entries[_count] response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.navigateToHistoryEntry', { 'entryId': entry.get('id') }) ))[0] return response async def bringToFront(self) -> None: """Bring page to front (activate tab).""" await self._client.send('Page.bringToFront') async def emulate(self, options: dict = None, **kwargs: Any) -> None: """Emulate viewport and user agent.""" options = merge_dict(options, kwargs) # TODO: if options does not have viewport or userAgent, # skip its setting. await self.setViewport(options.get('viewport', {})) await self.setUserAgent(options.get('userAgent', '')) async def setJavaScriptEnabled(self, enabled: bool) -> None: """Set JavaScript enable/disable.""" await self._client.send('Emulation.setScriptExecutionDisabled', { 'value': not enabled, }) async def emulateMedia(self, mediaType: str = None) -> None: """Emulate css media type of the page.""" if mediaType not in ['screen', 'print', None, '']: raise ValueError(f'Unsupported media type: {mediaType}') await self._client.send('Emulation.setEmulatedMedia', { 'media': mediaType or '', }) async def setViewport(self, viewport: dict) -> None: """Set viewport. Available options are: * ``width`` (int): page width in pixel. * ``height`` (int): page height in pixel. * ``deviceScaleFactor`` (float): Default to 1.0. * ``isMobile`` (bool): Default to ``False``. * ``hasTouch`` (bool): Default to ``False``. * ``isLandscape`` (bool): Default to ``False``. """ needsReload = await self._emulationManager.emulateViewport( self._client, viewport, ) self._viewport = viewport if needsReload: await self.reload() @property def viewport(self) -> dict: """Get viewport dict. Field of returned dict is same as :meth:`setViewport`. """ return self._viewport async def evaluate(self, pageFunction: str, *args: Any, force_expr: bool = False) -> Any: """Execute js-function or js-expression on browser and get result. :arg str pageFunction: String of js-function/expression to be executed on the browser. :arg bool force_expr: If True, evaluate `pageFunction` as expression. If False (default), try to automatically detect function or expression. note: ``force_expr`` option is a keyword only argument. """ frame = self._frameManager.mainFrame if frame is None: raise PageError('No main frame.') return await frame.evaluate(pageFunction, *args, force_expr=force_expr) async def evaluateOnNewDocument(self, pageFunction: str, *args: str ) -> None: """Add a JavaScript function to the document. This function would be invoked in one of the following scenarios: * whenever the page is navigated * whenever the child frame is attached or navigated. Inthis case, the function is invoked in the context of the newly attached frame. """ source = helper.evaluationString(pageFunction, *args) await self._client.send('Page.addScriptToEvaluateOnNewDocument', { 'source': source, }) async def screenshot(self, options: dict = None, **kwargs: Any) -> bytes: """Take a screen shot. The following options are available: * ``path`` (str): The file path to save the image to. The screenshot type will be inferred from the file extension. * ``type`` (str): Specify screenshot type, can be either ``jpeg`` or ``png``. Defaults to ``png``. * ``quality`` (int): The quality of the image, between 0-100. Not applicable to ``png`` image. * ``fullPage`` (bool): When true, take a screenshot of the full scrollable page. Defaults to ``False``. * ``clip`` (dict): An object which specifies clipping region of the page. This option should have the following fields: * ``x`` (int): x-coordinate of top-left corner of clip area. * ``y`` (int): y-coordinate of top-left corner of clip area. * ``width`` (int): width of clipping area. * ``height`` (int): height of clipping area. * ``omitBackground`` (bool): Hide default white background and allow capturing screenshot with transparency. """ options = merge_dict(options, kwargs) screenshotType = None if 'type' in options: screenshotType = options['type'] if screenshotType not in ['png', 'jpeg']: raise ValueError(f'Unknown type value: {screenshotType}') elif 'path' in options: mimeType, _ = mimetypes.guess_type(options['path']) if mimeType == 'image/png': screenshotType = 'png' elif mimeType == 'image/jpeg': screenshotType = 'jpeg' else: raise ValueError('Unsupported screenshot ' f'mime type: {mimeType}') if not screenshotType: screenshotType = 'png' return await self._screenshotTask(screenshotType, options) async def _screenshotTask(self, format: str, options: dict) -> bytes: # noqa: C901,E501 await self._client.send('Target.activateTarget', { 'targetId': self._target._targetId, }) clip = options.get('clip') if clip: clip['scale'] = 1 if options.get('fullPage'): metrics = await self._client.send('Page.getLayoutMetrics') width = math.ceil(metrics['contentSize']['width']) height = math.ceil(metrics['contentSize']['height']) # Overwrite clip for full page at all times. clip = dict(x=0, y=0, width=width, height=height, scale=1) mobile = self._viewport.get('isMobile', False) deviceScaleFactor = self._viewport.get('deviceScaleFactor', 1) landscape = self._viewport.get('isLandscape', False) if landscape: screenOrientation = dict(angle=90, type='landscapePrimary') else: screenOrientation = dict(angle=0, type='portraitPrimary') await self._client.send('Emulation.setDeviceMetricsOverride', { 'mobile': mobile, 'width': width, 'height': height, 'deviceScaleFactor': deviceScaleFactor, 'screenOrientation': screenOrientation, }) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride', {'color': {'r': 0, 'g': 0, 'b': 0, 'a': 0}}, ) opt = {'format': format} if clip: opt['clip'] = clip result = await self._client.send('Page.captureScreenshot', opt) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride') if options.get('fullPage'): await self.setViewport(self._viewport) buffer = base64.b64decode(result.get('data', b'')) _path = options.get('path') if _path: with open(_path, 'wb') as f: f.write(buffer) return buffer async def pdf(self, options: dict = None, **kwargs: Any) -> bytes: """Generate a pdf of the page. Options: * ``path`` (str): The file path to save the PDF. * ``scale`` (float): Scale of the webpage rendering, defaults to ``1``. * ``displayHeaderFooter`` (bool): Display header and footer. Defaults to ``False``. * ``headerTemplate`` (str): HTML template for the print header. Should be valid HTML markup with following classes. * ``data``: formatted print date * ``title``: document title * ``url``: document location * ``pageNumber``: current page number * ``totalPages``: total pages in the document * ``footerTemplate`` (str): HTML template for the print footer. Should use the same template as ``headerTemplate``. * ``printBackground`` (bool): Print background graphics. Defaults to ``False``. * ``landscape`` (bool): Paper orientation. Defaults to ``False``. * ``pageRanges`` (string): Paper ranges to print, e.g., '1-5,8,11-13'. Defaults to empty string, which means all pages. * ``foramt`` (str): Paper format. If set, takes prioprity over ``width`` or ``height``. Defaults to ``Letter``. * ``width`` (str): Paper width, accepts values labeled with units. * ``height`` (str): Paper height, accepts values labeled with units. * ``margin`` (dict): Paper margins, defaults to ``None``. * ``top`` (str): Top margin, accepts values labeled with units. * ``right`` (str): Right margin, accepts values labeled with units. * ``bottom`` (str): Bottom margin, accepts values labeled with units. * ``left`` (str): Left margin, accepts values labeled with units. :return bytes: Return generated PDF ``bytes`` object. """ options = merge_dict(options, kwargs) scale = options.get('scale', 1) displayHeaderFooter = bool(options.get('displayHeaderFooter')) headerTemplate = options.get('headerTemplate', '') footerTemplate = options.get('footerTemplate', '') printBackground = bool(options.get('printBackground')) landscape = bool(options.get('landscape')) pageRanges = options.get('pageRanges', '') paperWidth = 8.5 paperHeight = 11.0 if 'format' in options: fmt = Page.PaperFormats.get(options['format'].lower()) if not fmt: raise ValueError('Unknown paper format: ' + options['format']) paperWidth = fmt['width'] paperHeight = fmt['height'] else: paperWidth = convertPrintParameterToInches(options.get('width')) or paperWidth # noqa: E501 paperHeight = convertPrintParameterToInches(options.get('height')) or paperHeight # noqa: E501 marginOptions = options.get('margin', {}) marginTop = convertPrintParameterToInches(marginOptions.get('top')) or 0 # noqa: E501 marginLeft = convertPrintParameterToInches(marginOptions.get('left')) or 0 # noqa: E501 marginBottom = convertPrintParameterToInches(marginOptions.get('bottom')) or 0 # noqa: E501 marginRight = convertPrintParameterToInches(marginOptions.get('right')) or 0 # noqa: E501 result = await self._client.send('Page.printToPDF', dict( landscape=landscape, displayHeaderFooter=displayHeaderFooter, headerTemplate=headerTemplate, footerTemplate=footerTemplate, printBackground=printBackground, scale=scale, paperWidth=paperWidth, paperHeight=paperHeight, marginTop=marginTop, marginBottom=marginBottom, marginLeft=marginLeft, marginRight=marginRight, pageRanges=pageRanges )) buffer = base64.b64decode(result.get('data', b'')) if 'path' in options: with open(options['path'], 'wb') as f: f.write(buffer) return buffer async def plainText(self) -> str: """[Deprecated] Get page content as plain text.""" logger.warning('`Page.plainText` is deprecated.') return await self.evaluate('() => document.body.innerText') async def title(self) -> str: """Get page title.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.title() async def close(self) -> None: """Close connection.""" conn = self._client._connection if conn is None: raise PageError('Protocol Error: Connectoin Closed. ' 'Most likely the page has been closed.') await conn.send('Target.closeTarget', {'targetId': self._target._targetId}) @property def mouse(self) -> Mouse: """Get :class:`~pyppeteer.input.Mouse` object.""" return self._mouse async def click(self, selector: str, options: dict = None, **kwargs: Any ) -> None: """Click element which matches ``selector``. This method fetches an element with ``selector``, scrolls it into view if needed, and then uses :attr:`mouse` to click in the center of the element. If there's no element matching ``selector``, the method raises ``PageError``. Available options are: * ``button`` (str): ``left``, ``right``, or ``middle``, defaults to ``left``. * ``clickCount`` (int): defaults to 1. * ``delay`` (int|float): Time to wait between ``mousedown`` and ``mouseup`` in milliseconds. defaults to 0. .. note:: If this method triggers a navigation event and there's a separate :meth:`waitForNavigation`, you may end up with a race condition that yields unexpected results. The correct pattern for click and wait for navigation is the following:: await asyncio.wait([ page.waitForNavigation(waitOptions), page.click(selector, clickOptions), ]) """ options = merge_dict(options, kwargs) handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.click(options) await handle.dispose() async def hover(self, selector: str) -> None: """Mouse hover the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.hover() await handle.dispose() async def focus(self, selector: str) -> None: """Focus the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await self.evaluate('element => element.focus()', handle) await handle.dispose() async def select(self, selector: str, *values: str) -> List[str]: """Select options and return selected values. If no element matched the ``selector``, raise ``ElementHandleError``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.select(selector, *values) async def type(self, selector: str, text: str, options: dict = None, **kwargs: Any) -> None: """Type ``text`` on the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. Details see :meth:`pyppeteer.input.Keyboard.type`. """ options = merge_dict(options, kwargs) handle = await self.querySelector(selector) if handle is None: raise PageError('Cannot find {} on this page'.format(selector)) await handle.type(text, options) await handle.dispose() def waitFor(self, selectorOrFunctionOrTimeout: Union[str, int, float], options: dict = None, *args: Any, **kwargs: Any) -> Awaitable: """Wait for function, timeout, or element which matches on page. This method behaves differently with respect to the first argument: * If ``selectorOrFunctionOrTimeout`` is number (int or float), then it is treated as a timeout in milliseconds and this returns future which will be done after the timeout. * If ``selectorOrFunctionOrTimeout`` is a string of JavaScript function, this method is a shortcut to :meth:`waitForFunction`. * If ``selectorOrFunctionOrTimeout`` is a selector string, this method is a shortcut to :meth:`waitForSelector`. Pyppeteer tries to automatically detect function or selector, but sometimes miss-detects. If not work as you expected, use :meth:`waitForFunction` or :meth:`waitForSelector` dilectly. :arg selectorOrFunctionOrTimeout: A selector or function string, or timeout (milliseconds). :arg Any args: Arguments to pass the function. :return: Return awaitable object which resolves to a JSHandle of the success value. Available options: see :meth:`waitForFunction` or :meth:`waitForSelector` """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitFor( selectorOrFunctionOrTimeout, options, *args, **kwargs) def waitForSelector(self, selector: str, options: dict = None, **kwargs: Any) -> Awaitable: """Wait until element which matches ``selector`` appears on page. Wait for the ``selector`` to appear in page. If at the moment of callingthe method the ``selector`` already exists, the method will return immediately. If the selector doesn't appear after the ``timeout`` milliseconds of waiting, the function will raise error. :arg str selector: A selector of an element to wait for. :return: Return awaitable object which resolves when element specified by selector string is added to DOM. This method accepts the following options: * ``visible`` (bool): Wait for element to be present in DOM and to be visible; i.e. to not have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``hidden`` (bool): Wait for eleemnt to not be found in the DOM or to be hidden, i.e. have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``timeout`` (int|float): Maximum time to wait for in milliseconds. Defaults to 30000 (30 seconds). """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForSelector(selector, options, **kwargs) def waitForFunction(self, pageFunction: str, options: dict = None, *args: str, **kwargs: Any) -> Awaitable: """Wait until the function completes and returns a truethy value. :arg Any args: Arguments to pass to ``pageFunction``. :return: Return awaitable object which resolves when the ``pageFunction`` returns a truethy value. It resolves to a :class:`~pyppeteer.execution_context.JSHandle` of the truethy value. This method accepts the following options: * ``polling`` (str|number): An interval at which the ``pageFunction`` is executed, defaults to ``raf``. If ``polling`` is a number, then it is treated as an interval in milliseconds at which the function would be executed. If ``polling`` is a string, then it can be one of the following values: * ``raf``: to constantly execute ``pageFunction`` in ``requestAnimationFrame`` callback. This is the tightest polling mode which is suitable to observe styling changes. * ``mutation``: to execute ``pageFunction`` on every DOM mutation. * ``timeout`` (int|float): maximum time to wait for in milliseconds. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForFunction(pageFunction, options, *args, **kwargs)
class Page(EventEmitter): """Page class. This class provides methods to interact with a single tab of chrome. One :class:`~pyppeteer.browser.Browser` object might have multiple Page object. The :class:`Page` class emits various :attr:`~Page.Events` which can be handled by using ``on`` or ``once`` method, which is inherited from `pyee <https://pyee.readthedocs.io/en/latest/>`_'s ``EventEmitter`` class. """ #: Available events. Events = SimpleNamespace( Console='console', Dialog='dialog', Error='error', PageError='pageerror', Request='request', Response='response', RequestFailed='requestfailed', RequestFinished='requestfinished', FrameAttached='frameattached', FrameDetached='framedetached', FrameNavigated='framenavigated', Load='load', Metrics='metrics', ) PaperFormats: Dict[str, Dict[str, float]] = dict( letter={ 'width': 8.5, 'height': 11 }, legal={ 'width': 8.5, 'height': 14 }, tabloid={ 'width': 11, 'height': 17 }, ledger={ 'width': 17, 'height': 11 }, a0={ 'width': 33.1, 'height': 46.8 }, a1={ 'width': 23.4, 'height': 33.1 }, a2={ 'width': 16.5, 'height': 23.4 }, a3={ 'width': 11.7, 'height': 16.5 }, a4={ 'width': 8.27, 'height': 11.7 }, a5={ 'width': 5.83, 'height': 8.27 }, ) @staticmethod async def create(client: Session, ignoreHTTPSErrors: bool = False, appMode: bool = False, screenshotTaskQueue: list = None) -> 'Page': """Async function which makes new page object.""" await client.send('Page.enable'), frameTree = (await client.send('Page.getFrameTree'))['frameTree'] page = Page(client, frameTree, ignoreHTTPSErrors, screenshotTaskQueue) await asyncio.wait([ client.send('Page.setLifecycleEventsEnabled', {'enabled': True}), client.send('Network.enable', {}), client.send('Runtime.enable', {}), client.send('Security.enable', {}), client.send('Performance.enable', {}), ]) if ignoreHTTPSErrors: await client.send('Security.setOverrideCertificateErrors', {'override': True}) if not appMode: await page.setViewport({'width': 800, 'height': 600}) return page def __init__( self, client: Session, frameTree: Dict, ignoreHTTPSErrors: bool = False, screenshotTaskQueue: list = None, ) -> None: super().__init__() self._client = client self._keyboard = Keyboard(client) self._mouse = Mouse(client, self._keyboard) self._touchscreen = Touchscreen(client, self._keyboard) self._frameManager = FrameManager(client, frameTree, self) self._networkManager = NetworkManager(client) self._emulationManager = EmulationManager(client) self._tracing = Tracing(client) self._pageBindings: Dict[str, Callable] = dict() self._ignoreHTTPSErrors = ignoreHTTPSErrors if screenshotTaskQueue is None: screenshotTaskQueue = list() self._screenshotTaskQueue = screenshotTaskQueue _fm = self._frameManager _fm.on(FrameManager.Events.FrameAttached, lambda event: self.emit(Page.Events.FrameAttached, event)) _fm.on(FrameManager.Events.FrameDetached, lambda event: self.emit(Page.Events.FrameDetached, event)) _fm.on(FrameManager.Events.FrameNavigated, lambda event: self.emit(Page.Events.FrameNavigated, event)) _nm = self._networkManager _nm.on(NetworkManager.Events.Request, lambda event: self.emit(Page.Events.Request, event)) _nm.on(NetworkManager.Events.Response, lambda event: self.emit(Page.Events.Response, event)) _nm.on(NetworkManager.Events.RequestFailed, lambda event: self.emit(Page.Events.RequestFailed, event)) _nm.on(NetworkManager.Events.RequestFinished, lambda event: self.emit(Page.Events.RequestFinished, event)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events.Load)) client.on('Runtime.consoleAPICalled', lambda event: self._onConsoleAPI(event)) client.on('Page.javascriptDialogOpening', lambda event: self._onDialog(event)) client.on( 'Runtime.exceptionThrown', lambda exception: self._handleException( exception.get('exceptionDetails'))) client.on('Security.certificateError', lambda event: self._onCertificateError(event)) client.on('Inspector.targetCrashed', lambda event: self._onTargetCrashed()) client.on('Performance.metrics', lambda event: self._emitMetrics(event)) def _onTargetCrashed(self, *args: Any, **kwargs: Any) -> None: self.emit('error', PageError('Page crashed!')) @property def mainFrame(self) -> Optional['Frame']: """Get main :class:`~pyppeteer.frame_manager.Frame` of this page.""" return self._frameManager._mainFrame @property def keyboard(self) -> Keyboard: """Get :class:`~pyppeteer.input.Keyboard` object.""" return self._keyboard @property def touchscreen(self) -> Touchscreen: """Get :class:`~pyppeteer.input.Touchscreen` object.""" return self._touchscreen async def tap(self, selector: str) -> None: """Tap the element which matches the ``selector``. :arg str selector: A selector to search element to touch. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.tap() await handle.dispose() @property def tracing(self) -> 'Tracing': """Get tracing object.""" return self._tracing @property def frames(self) -> List['Frame']: """Get all frames of this page.""" return list(self._frameManager.frames()) async def setRequestInterception(self, value: bool) -> None: """Enable/disable request interception.""" return await self._networkManager.setRequestInterception(value) async def setOfflineMode(self, enabled: bool) -> None: """Set offline mode enable/disable.""" await self._networkManager.setOfflineMode(enabled) def _onCertificateError(self, event: Any) -> None: if not self._ignoreHTTPSErrors: return asyncio.ensure_future( self._client.send('Security.handleCertificateError', { 'eventId': event.get('eventId'), 'action': 'continue' })) async def querySelector(self, selector: str) -> Optional['ElementHandle']: """Get an Element which matches ``selector``. :arg str selector: A selector to search element. :return Optional[ElementHandle]: If element which matches the ``selector`` is found, return its :class:`~pyppeteer.element_handle.ElementHandle`. If not found, returns ``None``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelector(selector) async def evaluateHandle(self, pageFunction: str, *args: Any) -> JSHandle: """Execute function on this page. Difference between :meth:`~pyppeteer.page.Page.evaluate` and :meth:`~pyppeteer.page.Page.evaluateHandle` is that ``evaluateHandle`` returns JSHandle object (not value). :arg str pageFunction: JavaScript function to be executed. """ if not self.mainFrame: raise PageError('no main frame.') if not self.mainFrame.executionContext: raise PageError('No context.') return await self.mainFrame.executionContext.evaluateHandle( pageFunction, *args) async def queryObjects(self, prototypeHandle: JSHandle) -> JSHandle: """Iterate js heap and finds all the objects with the handle. :arg JSHandle prototypeHandle: JSHandle of prototype object. """ if not self.mainFrame: raise PageError('no main frame.') if not self.mainFrame.executionContext: raise PageError('No context.') return await self.mainFrame.executionContext.queryObjects( prototypeHandle) async def querySelectorEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Execute function with an element which matches ``selector``. :arg str selector: A selector to query page for. :arg str pageFunction: String of JavaScript function to be evaluated on browser. This function takes an element which matches the selector as a first argument. :arg Any args: Arguments to pass to ``pageFunction``. This method raises error if no element matched the ``selector``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorEval(selector, pageFunction, *args) async def querySelectorAllEval(self, selector: str, pageFunction: str, *args: Any) -> Optional[Any]: """Execute function with all elements which matches ``selector``. :arg str selector: A selector to query page for. :arg str pageFunction: String of JavaScript function to be evaluated on browser. This function takes Array of the matched elements as the first argument. :arg Any args: Arguments to pass to ``pageFunction``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAllEval(selector, pageFunction, *args) async def querySelectorAll(self, selector: str) -> List['ElementHandle']: """Get all element which matches `selector` as a list. :arg str selector: A selector to search element. :return List[ElementHandle]: List of :class:`~pyppeteer.element_handle.ElementHandle` which matches the ``selector``. If no element is matched to the ``selector``, return empty list. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.querySelectorAll(selector) #: alias to :meth:`Page.querySelector` J = querySelector #: alias to :meth:`Page.querySelectorEval` Jeval = querySelectorEval #: alias to :meth:`Page.querySelectorAll` JJ = querySelectorAll #: alias to :meth:`Page.querySelectorAllEval` JJeval = querySelectorAllEval async def cookies(self, *urls: str) -> dict: """Get cookies.""" if not urls: urls = (self.url, ) resp = await self._client.send('Network.getCookies', { 'urls': urls, }) return resp.get('cookies', {}) async def deleteCookie(self, *cookies: dict) -> None: """Delete cookie.""" pageURL = self.url for cookie in cookies: item = dict(**cookie) if not cookie.get('url') and pageURL.startswith('http'): item['url'] = pageURL await self._client.send('Network.deleteCookies', item) async def setCookie(self, *cookies: dict) -> None: """Set cookies.""" items = [] for cookie in cookies: item = dict(**cookie) pageURL = self.url if 'url' not in item and pageURL.startswith('http'): item['url'] = pageURL items.append(item) await self.deleteCookie(*items) if items: await self._client.send('Network.setCookies', { 'cookies': items, }) async def addScriptTag(self, options: Dict = None, **kwargs: str) -> ElementHandle: """Add script tag to this page. One of ``url``, ``path`` or ``content`` option is necessary. * ``url`` (string): URL of a script to add. * ``path`` (string): Path to the local JavaScript file to add. * ``content`` (string): JavaScript string to add. :return ElementHandle: :class:`~pyppeteer.element_handle.ElementHandle` of added tag. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') options = merge_dict(options, kwargs) return await frame.addScriptTag(options) async def addStyleTag(self, options: Dict = None, **kwargs: str) -> ElementHandle: """Add style or link tag to this page. One of ``url``, ``path`` or ``content`` option is necessary. * ``url`` (string): URL of the link tag to add. * ``path`` (string): Path to the local CSS file to add. * ``content`` (string): CSS string to add. :return ElementHandle: :class:`~pyppeteer.element_handle.ElementHandle` of added tag. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') options = merge_dict(options, kwargs) return await frame.addStyleTag(options) async def injectFile(self, filePath: str) -> str: """[Deprecated] Inject file to this page. This method is deprecated. Use :meth:`addScriptTag` instead. """ warnings.warn('Page.injectFile is deprecated.', DeprecationWarning) frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.injectFile(filePath) async def exposeFunction(self, name: str, pyppeteerFunction: Callable) -> None: """Add python function to the browser's ``window`` object as ``name``. Registered function can be called from chrome process. :arg string name: Name of the function on the window object. :arg Callable pyppeteerFunction: Function which will be called on python process. """ if self._pageBindings.get(name): raise PageError(f'Failed to add page binding with name {name}: ' f'window["{name}"] already exists!') self._pageBindings[name] = pyppeteerFunction addPageBinding = ''' function addPageBinding(bindingName) { window[bindingName] = async(...args) => { const me = window[bindingName]; let callbacks = me['callbacks']; if (!callbacks) { callbacks = new Map(); me['callbacks'] = callbacks; } const seq = (me['lastSeq'] || 0) + 1; me['lastSeq'] = seq; const promise = new Promise(fulfill => callbacks.set(seq, fulfill)); // eslint-disable-next-line no-console console.debug('driver:page-binding', JSON.stringify({name: bindingName, seq, args})); return promise; }; } ''' # noqa: E501 expression = helper.evaluationString(addPageBinding, name) await self._client.send('Page.addScriptToEvaluateOnNewDocument', {'source': expression}) await asyncio.wait([ frame.evaluate(expression, force_expr=True) for frame in self.frames ]) async def authenticate(self, credentials: Dict[str, str]) -> Any: """Provide credentials for http authentication. ``credentials`` should be ``None`` or dict which has ``username`` and ``password`` field. """ return await self._networkManager.authenticate(credentials) async def setExtraHTTPHeaders(self, headers: Dict[str, str]) -> None: """Set extra http headers.""" return await self._networkManager.setExtraHTTPHeaders(headers) async def setUserAgent(self, userAgent: str) -> None: """Set user agent.""" return await self._networkManager.setUserAgent(userAgent) async def metrics(self) -> Dict[str, Any]: """Get metrics.""" response = await self._client.send('Performance.getMetrics') return self._buildMetricsObject(response['metrics']) def _emitMetrics(self, event: Dict) -> None: self.emit( Page.Events.Metrics, { 'title': event['title'], 'metrics': self._buildMetricsObject(event['metrics']), }) def _buildMetricsObject(self, metrics: List) -> Dict[str, Any]: result = {} for metric in metrics or []: if metric['name'] in supportedMetrics: result[metric['name']] = metric['value'] return result def _handleException(self, exceptionDetails: Dict) -> None: message = helper.getExceptionMessage(exceptionDetails) self.emit(Page.Events.PageError, PageError(message)) def _onConsoleAPI(self, event: dict) -> None: _args = event.get('args', []) if (event.get('type') == 'debug' and _args and _args[0]['value'] == 'driver:page-binding'): obj = json.loads(_args[1]['value']) name = obj.get('name') seq = obj.get('seq') args = obj.get('args') result = self._pageBindings[name](*args) deliverResult = ''' function deliverResult(name, seq, result) { window[name]['callbacks'].get(seq)(result); window[name]['callbacks'].delete(seq); } ''' expression = helper.evaluationString(deliverResult, name, seq, result) asyncio.ensure_future( self._client.send( 'Runtime.evaluate', { 'expression': expression, 'contextId': event['executionContextId'], })) return if not self.listeners(Page.Events.Console): for arg in _args: asyncio.ensure_future(helper.releaseObject(self._client, arg)) return _id = event['executionContextId'] values = [] for arg in _args: values.append(self._frameManager.createJSHandle(_id, arg)) textTokens = [] for arg, value in zip(_args, values): if arg.get('objectId'): textTokens.append(value.toString()) else: textTokens.append(str(helper.valueFromRemoteObject(arg))) message = ConsoleMessage(event['type'], ' '.join(textTokens), values) self.emit(Page.Events.Console, message) def _onDialog(self, event: Any) -> None: dialogType = '' _type = event.get('type') if _type == 'alert': dialogType = Dialog.Type.Alert elif (_type == 'confirm'): dialogType = Dialog.Type.Confirm elif (_type == 'prompt'): dialogType = Dialog.Type.Prompt elif (_type == 'beforeunload'): dialogType = Dialog.Type.BeforeUnload dialog = Dialog(self._client, dialogType, event.get('message'), event.get('defaultPrompt')) self.emit(Page.Events.Dialog, dialog) @property def url(self) -> str: """Get url of this page.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.url async def content(self) -> str: """Get the whole HTML contents of the page.""" return await self.evaluate(''' () => { let retVal = ''; if (document.doctype) retVal = new XMLSerializer().serializeToString(document.doctype); if (document.documentElement) retVal += document.documentElement.outerHTML; return retVal; } '''.strip()) async def setContent(self, html: str) -> None: """Set content to this page.""" func = ''' function(html) { document.open(); document.write(html); document.close(); } ''' await self.evaluate(func, html) async def goto(self, url: str, options: dict = None, **kwargs: Any) -> Optional[Response]: """Go to the url. :arg string url: URL to go. """ options = merge_dict(options, kwargs) referrer = self._networkManager.extraHTTPHeaders().get('referer', '') requests: Dict[str, Request] = dict() eventListeners = [ helper.addEventListener( self._networkManager, NetworkManager.Events.Request, lambda request: requests.__setitem__(request.url, request)) ] mainFrame = self._frameManager.mainFrame if mainFrame is None: raise PageError('No main frame.') watcher = NavigatorWatcher(self._frameManager, mainFrame, options) result = await self._navigate(url, referrer) if result is not None: raise PageError(result) result = await watcher.navigationPromise() watcher.cancel() helper.removeEventListeners(eventListeners) error = result[0].pop().exception() # type: ignore if error: raise error request = requests.get(mainFrame.url) return request.response if request else None async def _navigate(self, url: str, referrer: str) -> Optional[str]: response = await self._client.send('Page.navigate', { 'url': url, 'referrer': referrer }) if response.get('errorText'): return response['errorText'] return None async def reload(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Reload this page.""" options = merge_dict(options, kwargs) response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.reload'), ))[0] return response async def waitForNavigation(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Wait for navigation completes.""" options = merge_dict(options, kwargs) mainFrame = self._frameManager.mainFrame if mainFrame is None: raise PageError('No main frame.') watcher = NavigatorWatcher(self._frameManager, mainFrame, options) responses: Dict[str, Response] = dict() listener = helper.addEventListener( self._networkManager, NetworkManager.Events.Response, lambda response: responses.__setitem__(response.url, response)) result = await watcher.navigationPromise() helper.removeEventListeners([listener]) error = result[0].pop().exception() if error: raise error response = responses.get(self.url, None) return response async def goBack(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Go back history.""" options = merge_dict(options, kwargs) return await self._go(-1, options) async def goForward(self, options: dict = None, **kwargs: Any) -> Optional[Response]: """Go forward history.""" options = merge_dict(options, kwargs) return await self._go(+1, options) async def _go(self, delta: int, options: dict) -> Optional[Response]: history = await self._client.send('Page.getNavigationHistory') _count = history.get('currentIndex', 0) + delta entries = history.get('entries', []) if len(entries) < _count: return None entry = entries[_count] response = (await asyncio.gather( self.waitForNavigation(options), self._client.send('Page.navigateToHistoryEntry', {'entryId': entry.get('id')})))[0] return response async def bringToFront(self) -> None: """Bring page to front (activate tab).""" await self._client.send('Page.bringToFront') async def emulate(self, options: dict = None, **kwargs: Any) -> None: """Emulate viewport and user agent.""" options = merge_dict(options, kwargs) # TODO: if options does not have viewport or userAgent, # skip its setting. await self.setViewport(options.get('viewport', {})) await self.setUserAgent(options.get('userAgent', '')) async def setJavaScriptEnabled(self, enabled: bool) -> None: """Set JavaScript enable/disable.""" await self._client.send('Emulation.setScriptExecutionDisabled', { 'value': not enabled, }) async def emulateMedia(self, mediaType: str = None) -> None: """Emulate css media type of the page.""" if mediaType not in ['screen', 'print', None, '']: raise ValueError(f'Unsupported media type: {mediaType}') await self._client.send('Emulation.setEmulatedMedia', { 'media': mediaType or '', }) async def setViewport(self, viewport: dict) -> None: """Set viewport. Available options are: * ``width`` (int): page width in pixel. * ``height`` (int): page height in pixel. * ``deviceScaleFactor`` (float): Default to 1.0. * ``isMobile`` (bool): Default to ``False``. * ``hasTouch`` (bool): Default to ``False``. * ``isLandscape`` (bool): Default to ``False``. """ needsReload = await self._emulationManager.emulateViewport( self._client, viewport, ) self._viewport = viewport if needsReload: await self.reload() @property def viewport(self) -> dict: """Get viewport dict. Field of returned dict is same as :meth:`setViewport`. """ return self._viewport async def evaluate(self, pageFunction: str, *args: Any, force_expr: bool = False) -> Any: """Execute js-function or js-expression on browser and get result. :arg str pageFunction: String of js-function/expression to be executed on the browser. :arg bool force_expr: If True, evaluate `pageFunction` as expression. If False (default), try to automatically detect function or expression. note: ``force_expr`` option is a keyword only argument. """ frame = self._frameManager.mainFrame if frame is None: raise PageError('No main frame.') return await frame.evaluate(pageFunction, *args, force_expr=force_expr) async def evaluateOnNewDocument(self, pageFunction: str, *args: str) -> None: """Add a JavaScript function to the document. This function would be invoked in one of the following scenarios: * whenever the page is navigated * whenever the child frame is attached or navigated. Inthis case, the function is invoked in the context of the newly attached frame. """ source = helper.evaluationString(pageFunction, *args) await self._client.send('Page.addScriptToEvaluateOnNewDocument', { 'source': source, }) async def screenshot(self, options: dict = None, **kwargs: Any) -> bytes: """Take a screen shot. The following options are available: * ``path`` (str): The file path to save the image to. The screenshot type will be inferred from the file extension. * ``type`` (str): Specify screenshot type, can be either ``jpeg`` or ``png``. Defaults to ``png``. * ``quality`` (int): The quality of the image, between 0-100. Not applicable to ``png`` image. * ``fullpage`` (bool): When true, take a screenshot of the full scrollable page. Defaults to ``False``. * ``clip`` (dict): An object which specifies clipping region of the page. This option should have the following fields: * ``x`` (int): x-coordinate of top-left corner of clip area. * ``y`` (int): y-coordinate of top-left corner of clip area. * ``width`` (int): width of clipping area. * ``height`` (int): height of clipping area. * ``omitBackground`` (bool): Hide default white background and allow capturing screenshot with transparency. """ options = merge_dict(options, kwargs) screenshotType = None if 'path' in options: mimeType, _ = mimetypes.guess_type(options['path']) if mimeType == 'image/png': screenshotType = 'png' elif mimeType == 'image/jpeg': screenshotType = 'jpeg' else: raise PageError('Unsupported screenshot ' f'mime type: {mimeType}') if 'type' in options: screenshotType = options['type'] if not screenshotType: screenshotType = 'png' return await self._screenshotTask(screenshotType, options) async def _screenshotTask(self, format: str, options: dict) -> bytes: # noqa: C901,E501 await self._client.send('Target.activateTarget', { 'targetId': self._client.targetId, }) clip = options.get('clip') if clip: clip['scale'] = 1 if options.get('fullPage'): metrics = await self._client.send('Page.getLayoutMetrics') width = math.ceil(metrics['contentSize']['width']) height = math.ceil(metrics['contentSize']['height']) # Overwrite clip for full page at all times. clip = dict(x=0, y=0, width=width, height=height, scale=1) mobile = self._viewport.get('isMobile', False) deviceScaleFactor = self._viewport.get('deviceScaleFactor', 1) landscape = self._viewport.get('isLandscape', False) if landscape: screenOrientation = dict(angle=90, type='landscapePrimary') else: screenOrientation = dict(angle=0, type='portraitPrimary') await self._client.send( 'Emulation.setDeviceMetricsOverride', { 'mobile': mobile, 'width': width, 'height': height, 'deviceScaleFactor': deviceScaleFactor, 'screenOrientation': screenOrientation, }) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride', {'color': { 'r': 0, 'g': 0, 'b': 0, 'a': 0 }}, ) opt = {'format': format} if clip: opt['clip'] = clip result = await self._client.send('Page.captureScreenshot', opt) if options.get('omitBackground'): await self._client.send( 'Emulation.setDefaultBackgroundColorOverride') if options.get('fullPage'): await self.setViewport(self._viewport) buffer = base64.b64decode(result.get('data', b'')) _path = options.get('path') if _path: with open(_path, 'wb') as f: f.write(buffer) return buffer async def pdf(self, options: dict = None, **kwargs: Any) -> bytes: """Generate a pdf of the page. Options: * ``path`` (str): The file path to save the PDF. * ``scale`` (float): Scale of the webpage rendering, defaults to ``1``. * ``displayHeaderFooter`` (bool): Display header and footer. Defaults to ``False``. * ``headerTemplate`` (str): HTML template for the print header. Should be valid HTML markup with following classes. * ``data``: formatted print date * ``title``: document title * ``url``: document location * ``pageNumber``: current page number * ``totalPages``: total pages in the document * ``footerTemplate`` (str): HTML template for the print footer. Should use the same template as ``headerTemplate``. * ``printBackground`` (bool): Print background graphics. Defaults to ``False``. * ``landscape`` (bool): Paper orientation. Defaults to ``False``. * ``pageRanges`` (string): Paper ranges to print, e.g., '1-5,8,11-13'. Defaults to empty string, which means all pages. * ``foramt`` (str): Paper format. If set, takes prioprity over ``width`` or ``height``. Defaults to ``Letter``. * ``width`` (str): Paper width, accepts values labeled with units. * ``height`` (str): Paper height, accepts values labeled with units. * ``margin`` (dict): Paper margins, defaults to ``None``. * ``top`` (str): Top margin, accepts values labeled with units. * ``right`` (str): Right margin, accepts values labeled with units. * ``bottom`` (str): Bottom margin, accepts values labeled with units. * ``left`` (str): Left margin, accepts values labeled with units. :return bytes: Return generated PDF ``bytes`` object. """ options = merge_dict(options, kwargs) scale = options.get('scale', 1) displayHeaderFooter = bool(options.get('displayHeaderFooter')) headerTemplate = options.get('headerTemplate', '') footerTemplate = options.get('footerTemplate', '') printBackground = bool(options.get('printBackground')) landscape = bool(options.get('landscape')) pageRanges = options.get('pageRanges', '') paperWidth = 8.5 paperHeight = 11.0 if 'format' in options: fmt = Page.PaperFormats.get(options['format'].lower()) if not fmt: raise ValueError('Unknown paper format: ' + options['format']) paperWidth = fmt['width'] paperHeight = fmt['height'] else: paperWidth = convertPrintParameterToInches( options.get('width')) or paperWidth # noqa: E501 paperHeight = convertPrintParameterToInches( options.get('height')) or paperHeight # noqa: E501 marginOptions = options.get('margin', {}) marginTop = convertPrintParameterToInches( marginOptions.get('top')) or 0 # noqa: E501 marginLeft = convertPrintParameterToInches( marginOptions.get('left')) or 0 # noqa: E501 marginBottom = convertPrintParameterToInches( marginOptions.get('bottom')) or 0 # noqa: E501 marginRight = convertPrintParameterToInches( marginOptions.get('right')) or 0 # noqa: E501 result = await self._client.send( 'Page.printToPDF', dict(landscape=landscape, displayHeaderFooter=displayHeaderFooter, headerTemplate=headerTemplate, footerTemplate=footerTemplate, printBackground=printBackground, scale=scale, paperWidth=paperWidth, paperHeight=paperHeight, marginTop=marginTop, marginBottom=marginBottom, marginLeft=marginLeft, marginRight=marginRight, pageRanges=pageRanges)) buffer = base64.b64decode(result.get('data', b'')) if 'path' in options: with open(options['path'], 'wb') as f: f.write(buffer) return buffer async def plainText(self) -> str: """[Deprecated] Get page content as plain text.""" warnings.warn('page.plainText is deprecated.', DeprecationWarning) return await self.evaluate('() => document.body.innerText') async def title(self) -> str: """Get page title.""" frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.title() async def close(self) -> None: """Close connection.""" await self._client.dispose() @property def mouse(self) -> Mouse: """Get :class:`~pyppeteer.input.Mouse` object.""" return self._mouse async def click(self, selector: str, options: dict = None, **kwargs: Any) -> None: """Click element which matches `selector`.""" options = merge_dict(options, kwargs) handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.click(options) await handle.dispose() async def hover(self, selector: str) -> None: """Mouse hover the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await handle.hover() await handle.dispose() async def focus(self, selector: str) -> None: """Focus the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. """ handle = await self.J(selector) if not handle: raise PageError('No node found for selector: ' + selector) await self.evaluate('element => element.focus()', handle) await handle.dispose() async def select(self, selector: str, *values: str) -> List[str]: """Select options and return selected values. If no element matched the ``selector``, raise ``ElementHandleError``. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return await frame.select(selector, *values) async def type(self, selector: str, text: str, options: dict = None, **kwargs: Any) -> None: """Type ``text`` on the element which matches ``selector``. If no element matched the ``selector``, raise ``PageError``. Details see :meth:`pyppeteer.input.Keyboard.type`. """ options = merge_dict(options, kwargs) handle = await self.querySelector(selector) if handle is None: raise PageError('Cannot find {} on this page'.format(selector)) await handle.type(text, options) await handle.dispose() def waitFor(self, selectorOrFunctionOrTimeout: Union[str, int, float], options: dict = None, *args: Any, **kwargs: Any) -> Awaitable: """Wait for function, timeout, or element which matches on page. This method behaves differently with respect to the first argument: * If ``selectorOrFunctionOrTimeout`` is number (int or float), then it is treated as a timeout in milliseconds and this returns future which will be done after the timeout. * If ``selectorOrFunctionOrTimeout`` is a string of JavaScript function, this method is a shortcut to :meth:`waitForFunction`. * If ``selectorOrFunctionOrTimeout`` is a selector string, this method is a shortcut to :meth:`waitForSelector`. Pyppeteer tries to automatically detect function or selector, but sometimes miss-detects. If not work as you expected, use :meth:`waitForFunction` or :meth:`waitForSelector` dilectly. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitFor(selectorOrFunctionOrTimeout, options, *args, **kwargs) def waitForSelector(self, selector: str, options: dict = None, **kwargs: Any) -> Awaitable: """Wait until element which matches ``selector`` appears on page. This method accepts the following options: * ``visible`` (bool): Wait for element to be present in DOM and to be visible; i.e. to not have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``hidden`` (bool): Wait for eleemnt to not be found in the DOM or to be hidden, i.e. have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``timeout`` (int|float): Maximum time to wait for in milliseconds. Defaults to 30000 (30 seconds). """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForSelector(selector, options, **kwargs) def waitForFunction(self, pageFunction: str, options: dict = None, *args: str, **kwargs: Any) -> Awaitable: """Wait until the function completes. This method accepts the following options: * ``polling`` (str|number): **Not Implemented Yet** * ``timeout`` (int|float): maximum time to wait for in milliseconds. """ frame = self.mainFrame if not frame: raise PageError('no main frame.') return frame.waitForFunction(pageFunction, options, *args, **kwargs)
class FrameManager(AsyncIOEventEmitter): """FrameManager class.""" Events = SimpleNamespace( FrameAttached='frameattached', FrameNavigated='framenavigated', FrameDetached='framedetached', LifecycleEvent='lifecycleevent', FrameNavigatedWithinDocument='framenavigatedwithindocument', ) def __init__(self, client: CDPSession, page: Any, ignoreHTTPSErrors) -> None: """Make new frame manager.""" super().__init__() self._client = client self._page = page self._networkmanager = NetworkManager(client, self, ignoreHTTPSErrors) self._frames: OrderedDict[str, Frame] = OrderedDict() self._mainFrame: Optional[Frame] = None self._contextIdToContext: Dict[str, ExecutionContext] = dict() self._isolatedWorlds = set() client.on( 'Page.frameAttached', lambda event: self._onFrameAttached( event.get('frameId', ''), event.get('parentFrameId', ''))) client.on('Page.frameNavigated', lambda event: self._onFrameNavigated(event.get('frame'))) client.on( 'Page.navigatedWithinDocument', lambda event: self._onFrameNavigatedWithinDocument( event.get('frameId'), event.get('url'))) client.on('Page.frameDetached', lambda event: self._onFrameDetached(event.get('frameId'))) client.on( 'Page.frameStoppedLoading', lambda event: self._onFrameStoppedLoading(event.get('frameId'))) client.on( 'Runtime.executionContextCreated', lambda event: self. _onExecutionContextCreated(event.get('context'))) client.on( 'Runtime.executionContextDestroyed', lambda event: self._onExecutionContextDestroyed( event.get('executionContextId'))) client.on('Runtime.executionContextsCleared', lambda event: self._onExecutionContextsCleared()) client.on('Page.lifecycleEvent', lambda event: self._onLifecycleEvent(event)) async def initialize(self): _, FrameTree = await asyncio.gather( self._client.send('Page.enable'), self._client.send('Page.getFrameTree')) frameTree = FrameTree['frameTree'] self._handleFrameTree(frameTree) runtime = asyncio.ensure_future(self._client.send('Runtime.enable')) runtime.add_done_callback(lambda x: self._client._loop.create_task( self._ensureIsolatedWorld(UTILITY_WORLD_NAME))) await asyncio.gather( self._client.send('Page.setLifecycleEventsEnabled', {"enabled": True}), runtime, self._networkmanager.initialize()) async def _ensureIsolatedWorld(self, name: str): if name in self._isolatedWorlds: return self._isolatedWorlds.add(name) await self._client.send('Page.addScriptToEvaluateOnNewDocument', { "source": f'//# sourceURL={EVALUATION_SCRIPT_URL}', "worldName": name }) await asyncio.gather(*map( lambda frame: self._client.send( 'Page.createIsolatedWorld', { "frameId": frame._id, "grantUniveralAccess": True, "worldName": name }), self.frames())) def _onLifecycleEvent(self, event: Dict) -> None: frame = self._frames.get(event['frameId']) if not frame: return frame._onLifecycleEvent(event['loaderId'], event['name']) self.emit(FrameManager.Events.LifecycleEvent, frame) def _onFrameStoppedLoading(self, frameId: str) -> None: frame = self._frames.get(frameId) if not frame: return frame._onLoadingStopped() self.emit(FrameManager.Events.LifecycleEvent, frame) def _handleFrameTree(self, frameTree: Dict) -> None: frame = frameTree['frame'] if 'parentId' in frame: self._onFrameAttached( frame['id'], frame['parentId'], ) self._onFrameNavigated(frame) if 'childFrames' not in frameTree: return for child in frameTree['childFrames']: self._handleFrameTree(child) @property def mainFrame(self) -> Optional['Frame']: """Return main frame.""" return self._mainFrame def frames(self) -> List['Frame']: """Return all frames.""" return list(self._frames.values()) def frame(self, frameId: str) -> Optional['Frame']: """Return :class:`Frame` of ``frameId``.""" return self._frames.get(frameId) def _onFrameAttached(self, frameId: str, parentFrameId: str) -> None: if frameId in self._frames: return parentFrame = self._frames.get(parentFrameId) frame = Frame(self, self._client, parentFrame, frameId) self._frames[frameId] = frame self.emit(FrameManager.Events.FrameAttached, frame) def _onFrameNavigated(self, framePayload: dict) -> None: isMainFrame = not framePayload.get('parentId') if isMainFrame: frame = self._mainFrame else: frame = self._frames.get(framePayload.get('id', '')) if not (isMainFrame or frame): raise PageError('We either navigate top level or have old version ' 'of the navigated frame') # Detach all child frames first. if frame: for child in frame.childFrames: self._removeFramesRecursively(child) # Update or create main frame. _id = framePayload.get('id', '') if isMainFrame: if frame: # Update frame id to retain frame identity on cross-process navigation. # noqa: E501 self._frames.pop(frame._id, None) frame._id = _id else: # Initial main frame navigation. frame = Frame(self, self._client, None, _id) self._frames[_id] = frame self._mainFrame = frame # Update frame payload. frame._navigated(framePayload) # type: ignore self.emit(FrameManager.Events.FrameNavigated, frame) def _onFrameNavigatedWithinDocument(self, frameId: str, url: str) -> None: frame = self._frames.get(frameId) if not frame: return frame._navigatedWithinDocument(url) self.emit(FrameManager.Events.FrameNavigatedWithinDocument, frame) self.emit(FrameManager.Events.FrameNavigated, frame) def _onFrameDetached(self, frameId: str) -> None: frame = self._frames.get(frameId) if frame: self._removeFramesRecursively(frame) def _onExecutionContextCreated(self, contextPayload: Dict) -> None: if (contextPayload.get('auxData') and contextPayload['auxData']['isDefault']): frameId = contextPayload['auxData']['frameId'] else: frameId = None frame = self._frames.get(frameId) if frameId else None context = ExecutionContext( self._client, contextPayload, frame, ) self._contextIdToContext[contextPayload['id']] = context if frame: frame._setDefaultContext(context) def _removeContext(self, context: ExecutionContext) -> None: frame = self._frames[context._frameId] if context._frameId else None if frame and context._isDefault: frame._setDefaultContext(None) def _onExecutionContextDestroyed(self, executionContextId: str) -> None: context = self._contextIdToContext.get(executionContextId) if not context: return del self._contextIdToContext[executionContextId] self._removeContext(context) def _onExecutionContextsCleared(self) -> None: for context in self._contextIdToContext.values(): self._removeContext(context) self._contextIdToContext.clear() def createJSHandle(self, contextId: str, remoteObject: Dict = None) -> JSHandle: """Create JS handle associated to the context id and remote object.""" if remoteObject is None: remoteObject = dict() context = self._contextIdToContext.get(contextId) if not context: raise ElementHandleError(f'missing context with id = {contextId}') return createJSHandle(context, remoteObject) def _removeFramesRecursively(self, frame: 'Frame') -> None: for child in frame.childFrames: self._removeFramesRecursively(child) frame._detach() self._frames.pop(frame._id, None) self.emit(FrameManager.Events.FrameDetached, frame) @property def NetworkManager(self): return self._networkmanager
class Page(EventEmitter): Events = { 'Console': 'console', 'Dialog': 'dialog', 'Error': 'error', 'PageError': 'pageerror', 'Request': 'request', 'Response': 'response', 'RequestFailed': 'requestfailed', 'RequestFinished': 'requestfinished', 'FrameAttached': 'frameattached', 'FrameDetached': 'framedetached', 'FrameNavigated': 'framenavigated', 'Load': 'load', } @staticmethod async def create(client, ignore_https_errors, screenshot_task_queue): await asyncio.gather(client.send('Network.enable', {}), client.send('Page.enable', {}), client.send('Runtime.enable', {}), client.send('Security.enable', {})) if ignore_https_errors: await client.send('Security.setOverrideCertificateErrors', {'override': True}) page = Page(client, ignore_https_errors, screenshot_task_queue) await page.goto('about:blank') await page.set_viewport({'width': 800, 'height': 600}) return page def __init__(self, client, ignore_https_errors=True, screenshot_task_queue=None): super().__init__() self._client = client self._keyborad = Keyboard(client) self._mouse = Mouse(client, self._keyborad) self._frame_manager = FrameManager(client, self._mouse) self._network_manager = NetworkManager(client) self._emulation_manager = EmulationManager(client) self._tracing = None self._page_bindings = {} self._ignore_https_errors = ignore_https_errors self._screenshot_task_queue = screenshot_task_queue self._frame_manager.on( FrameManager.Events['FrameAttached'], lambda ev: self.emit(Page.Events['FrameAttached'], ev)) self._frame_manager.on( FrameManager.Events['FrameDetached'], lambda ev: self.emit(Page.Events['FrameDetached'], ev)) self._frame_manager.on( FrameManager.Events['FrameNavigated'], lambda ev: self.emit(Page.Events['FrameNavigated'], ev)) self._network_manager.on( NetworkManager.Events['Request'], lambda ev: self.emit(Page.Events['Request'], ev)) self._network_manager.on( NetworkManager.Events['Response'], lambda ev: self.emit(Page.Events['Response'], ev)) self._network_manager.on( NetworkManager.Events['RequestFailed'], lambda ev: self.emit(Page.Events['RequestFailed'], ev)) self._network_manager.on( NetworkManager.Events['RequestFinished'], lambda ev: self.emit(Page.Events['RequestFinished'], ev)) client.on('Page.loadEventFired', lambda event: self.emit(Page.Events['Load'])) # client.on( # 'Runtime.consoleAPICalled', # self._on_console_api # ) # client.on( # 'Page.javascriptDialogOpening', # self.on_dialog # ) # client.on( # 'Runtime.exceptionThrown', # self._handle_exception # ) client.on('Security.certificateError', self._on_certificate_error) client.on('Inspector.targetCrashed', self._on_target_crashed) def _get_scope(self, responses): def _tmp(response): print('Resp cb----------') responses[response.url] = response return _tmp def _on_target_crashed(self): self.emit('error', Exception('Page crashed!')) def main_frame(self): return self._frame_manager.main_frame() @property def keyboard(self): return self._keyborad @property def tracing(self): return self._tracing def frames(self): return self._frame_manager.frames() async def set_request_interception_enabled(self, value): return await self._network_manager.set_request_interception_enabled( value) def _on_certificate_error(self, event): print(event) async def S(selector): return self.main_frame().S(selector) async def SS(selector): return self.main_frame().SS(selector) async def goto(self, url, options={}): watcher = NavigatorWatcher(self._client, self._ignore_https_errors, options) responses = {} listener = Helper.add_event_listener(self._network_manager, NetworkManager.Events['Response'], self._get_scope(responses)) result = watcher.wait_for_navigation() referrer = self._network_manager.extra_http_headers().get( 'referer', '') try: await self._client.send('Page.navigate', { 'url': url, 'referrer': referrer }) except Exception as e: watcher.cancel() raise e await result Helper.remove_event_listeners([listener]) if self._frame_manager.is_main_frame_loading_failed(): raise Exception('Failed to navigate: {}'.format(url)) return responses.get(self.main_frame().url(), None) async def set_viewport(self, viewport={}): needs_reload = await self._emulation_manager.emulate_viewport( self._client, viewport) self._viewport = viewport if needs_reload: await self.reload() def viewport(): return self._viewport async def reload(options): await self._client.send('Page.reload') return await self.wait_for_navigation(options) async def wait_for_navigation(self, options): watcher = NavigatorWatcher(self._client, self.ignore_https_errors, options) responses = {} def set_resp(response): global responses responses[response['url']] = response listener = Helper.add_event_listener(self._network_manager, NetworkManager.Events['Response'], set_resp) fut = await watcher.wait_for_navigation() Helper.remove_event_listeners([listener]) return responses.get(self.main_frame().url(), None) async def screenshot(self, options={}): screenshot_type = None print(options) if 'path' in options and options['path']: mime_type, enc = mimetypes.guess_type(options['path']) if mime_type == 'image/png': screenshot_type = 'png' elif mime_type == 'image/jpeg': screenshot_type = 'jpeg' assert screenshot_type if 'type' in options and options['type']: assert not screenshot_type or options['type'] == screenshot_type assert options['type'] in ['png', 'jpeg'] screenshot_type = options['type'] if not screenshot_type: screenshot_type = 'png' if 'quality' in options and options['quality']: assert screenshot_type == 'jpeg' assert isinstance(options['quality'], int) assert options['quality'] >= 0 and options['quality'] <= 100 if 'clip' in options and options['clip']: assert isinstance(options['clip']['x'], (int, float)) assert isinstance(options['clip']['y'], (int, float)) assert isinstance(options['clip']['height'], (int, float)) assert isinstance(options['clip']['width'], (int, float)) # return await self._screenshot_task_queue.post_task( # self._screenshot_task( # screenshot_type, # options # ) # ) res = await self._screenshot_task(screenshot_type, options) return res async def _screenshot_task(self, _format, options={}): await self._client.send('Target.activateTarget', {'targetId': self._client.target_id()}) clip = options['clip'] if 'clip' in options else None if clip: clip['scale'] = 1 if 'fullPage' in options and options['fullPage']: metrics = await self._client.send('Page.getLayoutMetrics') width = math.ceil(metrics['contentSize']['width']) height = math.ceil(metrics['contentSize']['height']) clip = { 'x': 0, 'y': 0, 'width': width, 'height': height, 'scale': 1 } mobile = self._viewport['isMobile'] \ if 'isMobile' in self._viewport else False device_scale_factor = self._viewport['deviceScaleFactor'] \ if 'deviceScaleFactor' in self._viewport else 1 landscape = self._viewport['isLandscape'] \ if 'isLandscape' in self._viewport else False screen_orientation = {'angle': 90, 'type': 'landscapePrimary'} \ if landscape else {'angle': 0, 'type': 'portraitPrimary'} await self._client.send( 'Emulation.setDeviceMetricsOverride', { 'mobile': mobile, 'width': width, 'height': height, 'deviceScaleFactor': device_scale_factor, 'screenOrientation': screen_orientation }) if 'omitBackground' in options and options['omitBackground']: await self._client.send( 'Emulation.setDefaultBackgroundColorOverride', {'color': { 'r': 0, 'g': 0, 'b': 0, 'a': 0 }}) screenshot_data = {'format': _format} if 'quality' in options: screenshot_data['quality'] = options['quality'] if clip: screenshot_data['clip'] = clip result = await self._client.send('Page.captureScreenshot', screenshot_data) print(result) if 'omitBackground' in options and options['omitBackground']: await self._client.send( 'Emulation.setDefaultBackgroundColorOverride') if 'fullPage' in options and options['fullPage']: await self.set_viewport(self._viewport) print(type(result['data'])) buffr = base64.decodebytes(bytes(result['data'], 'utf-8')) if 'path' in options and options['path']: with open(options['path'], 'wb') as fl: fl.write(buffr) return buffr async def title(self): return await self.main_frame().title() @property def mouse(self): return self._mouse async def close(self): await self._client.dispose()