def launch(self) -> Browser: """Start chromium process.""" self.proc = subprocess.Popen( self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) atexit.register(self.killChrome) import time for _ in range(100): # wait for DevTools port to open for at least 10sec # setting timeout timer is bettter time.sleep(0.1) if self.proc.poll() is not None: raise BrowserError('Unexpectedly chrome process closed with ' f'return code: {self.proc.returncode}') msg = self.proc.stdout.readline().decode() if not msg: continue m = re.match(r'DevTools listening on (ws://.*)$', msg) if m is not None: break else: # This block called only when `for`-loop does not `break` raise BrowserError('Failed to connect DevTools port.') logger.debug(m.group(0)) connectionDelay = self.options.get('slowMo', 0) connection = Connection(m.group(1).strip(), connectionDelay) return Browser(connection, self.options.get('ignoreHTTPSErrors', False), self.killChrome)
async def newPage(self) -> Page: """Make new page on this browser and return its object.""" targetId = (await self._connection.send( 'Target.createTarget', {'url': 'about:blank'})).get('targetId') target = self._targets.get(targetId) if target is None: raise BrowserError('Failed to create target for page.') if not await target._initializedPromise: raise BrowserError('Failed to create target for page.') page = await target.page() if page is None: raise BrowserError('Failed to create page.') return page
async def _rawEvaluate(self, pageFunction: str, *args: Any) -> dict: if not args: expression = helper.evaluationString(pageFunction, *args) contextId = self._defaultContextId obj = await self._client.send( 'Runtime.evaluate', { 'expression': expression, 'contextId': contextId, 'returnByValue': False, 'awaitPromise': True, }) else: obj = await self._client.send( 'Runtime.callFunctionOn', { 'functionDeclaration': pageFunction, 'executionContextId': self._defaultContextId, 'arguments': [self._convertArgument(arg) for arg in args], 'returnByValue': False, 'awaitPromise': True }) exceptionDetails = obj.get('exceptionDetails', dict()) remoteObject = obj.get('result', dict()) if exceptionDetails: raise BrowserError('Evaluation failed: ' + helper.getExceptionMessage(exceptionDetails) + f'\npageFunction:\n{pageFunction}') return remoteObject
def __init__(self, frame: Frame, _type: str, expr: str, timeout: float, *args: Any, interval: float = 0) -> None: """Make new wait task. :arg float timeout: msec to wait for task [default 30_000 [msec]]. :arg float interval: msec to poll for task [default timeout / 1000]. """ if _type not in ['function', 'selector']: raise ValueError('Unsupported type for WaitTask: ' + _type) super().__init__() self.__frame: Frame = frame self.__type = _type self.expr = expr self.__timeout = timeout / 1000 # sec self.__interval = interval / 1000 or self.__timeout / 100 # sec self.__runCount: int = 0 self.__terminated = False self.__done = False frame._waitTasks.add(self) # Since page navigation requires us to re-install the pageScript, # we should track timeout on our end. self.__loop = asyncio.get_event_loop() self.__timeoutTimer = self.__loop.call_later( self.__timeout, lambda: self.terminate( BrowserError(f'waiting failed: timeout {timeout}ms exceeded'))) asyncio.ensure_future(self.rerun(True))
async def _createPageInContext(self, contextId: Optional[str]) -> Page: options = {'url': 'about:blank'} if contextId: options['browserContextId'] = contextId targetId = (await self._connection.send('Target.createTarget', options)).get('targetId') target = self._targets.get(targetId) if target is None: raise BrowserError('Failed to create target for page.') if not await target._initializedPromise: raise BrowserError('Failed to create target for page.') page = await target.page() if page is None: raise BrowserError('Failed to create page.') return page
async def _targetCreated(self, event: Dict) -> None: target = Target(self, event['targetInfo']) if event['targetInfo']['targetId'] in self._targets: raise BrowserError('target should not exist before create.') self._targets[event['targetInfo']['targetId']] = target if await target._initializedPromise: self.emit(Browser.Events.TargetCreated, target)
async def connect(options: dict = None, **kwargs: Any) -> Browser: """Connect to the existing chrome. ``browserWSEndpoint`` option is necessary to connect to the chrome. The format is ``ws://${host}:${port}/devtools/browser/<id>``. This value can get by :attr:`~pyppeteer.browser.Browser.wsEndpoint`. Available options are: * ``browserWSEndpoint`` (str): A browser websocket endpoint to connect to. (**required**) * ``ignoreHTTPSErrors`` (bool): Whether to ignore HTTPS errors. Defaults to ``False``. * ``slowMo`` (int|float): Slow down pyppeteer's by the specified amount of milliseconds. * ``logLevel`` (int|str): Log level to print logs. Defaults to same as the root logger. * ``loop`` (asyncio.AbstractEventLoop): Event loop (**experimental**). """ options = merge_dict(options, kwargs) logLevel = options.get('logLevel') if logLevel: logging.getLogger('pyppeteer').setLevel(logLevel) browserWSEndpoint = options.get('browserWSEndpoint') if not browserWSEndpoint: raise BrowserError('Need `browserWSEndpoint` option.') connectionDelay = options.get('slowMo', 0) connection = Connection(browserWSEndpoint, options.get('loop', asyncio.get_event_loop()), connectionDelay) return await Browser.create(connection, options, None, lambda: connection.send('Browser.close'))
async def evaluate(self, pageFunction: str, *args: Any) -> Any: """[Deprecated] Evaluate the pageFunction on browser.""" deprecation_msg = ( 'ElementHandle.evaluate is dropped in puppeteer. ' 'Use Page.evaluate(..., ElementHandle) instead.' ) logger.warning('[DEPRECATED] ' + deprecation_msg) warnings.warn(deprecation_msg, DeprecationWarning) if self._disposed: raise ElementHandleError('ElementHandle is disposed!') _args = ['this'] _args.extend(json.dumps(x) for x in args) stringifiedArgs = ','.join(_args) functionDeclaration = f''' function() {{ return ({pageFunction})({stringifiedArgs}) }} ''' objectId = self._remoteObject.get('objectId') obj = await self._client.send( 'Runtime.callFunctionOn', { 'objectId': objectId, 'functionDeclaration': functionDeclaration, 'returnByValue': False, 'awaitPromise': True, } ) exceptionDetails = obj.get('exceptionDetails', dict()) remoteObject = obj.get('result', dict()) if exceptionDetails: raise BrowserError( 'Evaluation failed: ' + helper.getExceptionMessage(exceptionDetails) ) return await helper.serializeRemoteObject(self._client, remoteObject)
async def connect(options: dict = None, **kwargs: Any) -> Browser: "Connect to the existing chrome.\n\n ``browserWSEndpoint`` option is necessary to connect to the chrome. The\n format is ``ws://${host}:${port}/devtools/browser/<id>``. This value can\n get by :attr:`~pyppeteer.browser.Browser.wsEndpoint`.\n\n Available options are:\n\n * ``browserWSEndpoint`` (str): A browser websocket endpoint to connect to.\n (**required**)\n * ``ignoreHTTPSErrors`` (bool): Whether to ignore HTTPS errors. Defaults to\n ``False``.\n * ``slowMo`` (int|float): Slow down pyppeteer's by the specified amount of\n milliseconds.\n " options = merge_dict(options, kwargs) browserWSEndpoint = options.get('browserWSEndpoint') if (not browserWSEndpoint): raise BrowserError('Need `browserWSEndpoint` option.') connection = Connection(browserWSEndpoint) return (await Browser.create(connection, options, None, (lambda: connection.send('Browser.close'))))
async def connect(options: dict = None, **kwargs: Any) -> Browser: """Connect to the existing chrome. ``browserWSEndpoint`` or ``browserURL`` option is necessary to connect to the chrome. The format of ``browserWSEndpoint`` is ``ws://${host}:${port}/devtools/browser/<id>`` and format of ``browserURL`` is ``http://127.0.0.1:9222```. The value of ``browserWSEndpoint`` can get by :attr:`~pyppeteer.browser.Browser.wsEndpoint`. Available options are: * ``browserWSEndpoint`` (str): A browser websocket endpoint to connect to. * ``browserURL`` (str): A browser URL to connect to. * ``ignoreHTTPSErrors`` (bool): Whether to ignore HTTPS errors. Defaults to ``False``. * ``defaultViewport`` (dict): Set a consistent viewport for each page. Defaults to an 800x600 viewport. ``None`` disables default viewport. * ``width`` (int): page width in pixels. * ``height`` (int): page height in pixels. * ``deviceScaleFactor`` (int|float): Specify device scale factor (can be thought as dpr). Defaults to ``1``. * ``isMobile`` (bool): Whether the ``meta viewport`` tag is taken into account. Defaults to ``False``. * ``hasTouch`` (bool): Specify if viewport supports touch events. Defaults to ``False``. * ``isLandscape`` (bool): Specify if viewport is in landscape mode. Defaults to ``False``. * ``slowMo`` (int|float): Slow down pyppeteer's by the specified amount of milliseconds. * ``logLevel`` (int|str): Log level to print logs. Defaults to same as the root logger. * ``loop`` (asyncio.AbstractEventLoop): Event loop (**experimental**). """ options = merge_dict(options, kwargs) logLevel = options.get('logLevel') if logLevel: logging.getLogger('pyppeteer').setLevel(logLevel) browserWSEndpoint = options.get('browserWSEndpoint') if not browserWSEndpoint: browserURL = options.get('browserURL') if not browserURL: raise BrowserError( 'Need `browserWSEndpoint` or `browserURL` option.') browserWSEndpoint = get_ws_endpoint(browserURL) connectionDelay = options.get('slowMo', 0) connection = Connection(browserWSEndpoint, options.get('loop', asyncio.get_event_loop()), connectionDelay) browserContextIds = (await connection.send('Target.getBrowserContexts')).get( 'browserContextIds', []) ignoreHTTPSErrors = bool(options.get('ignoreHTTPSErrors', False)) defaultViewport = options.get('defaultViewport', { 'width': 800, 'height': 600 }) return await Browser.create(connection, browserContextIds, ignoreHTTPSErrors, defaultViewport, None, lambda: connection.send('Browser.close'))
async def _targetInfoChanged(self, event: Dict) -> None: target = self._targets.get(event['targetInfo']['targetId']) if not target: raise BrowserError('target should exist before targetInfoChanged') previousURL = target.url wasInitialized = target._isInitialized target._targetInfoChanged(event['targetInfo']) if wasInitialized and previousURL != target.url: self.emit(Browser.Events.TargetChanged, target)
async def launch(self) -> Browser: """Start chrome process and return `Browser` object.""" env = self.options.get('env', {}) self.chromeClosed = False self.connection: Optional[Connection] = None self.proc = subprocess.Popen( self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, ) def _close_process() -> None: if not self.chromeClosed: asyncio.get_event_loop().run_until_complete(self.killChrome()) # dont forget to close browser process atexit.register(_close_process) import time for _ in range(100): # wait for DevTools port to open for at least 10sec # setting timeout timer is bettter time.sleep(0.1) if self.proc.poll() is not None: self._cleanup_tmp_user_data_dir() raise BrowserError('Unexpectedly chrome process closed with ' f'return code: {self.proc.returncode}') msg = self.proc.stdout.readline().decode() if not msg: continue m = re.match(r'DevTools listening on (ws://.*)$', msg) if m is not None: break else: # This block called only when `for`-loop does not `break` raise BrowserError('Failed to connect DevTools port.') logger.debug(m.group(0)) connectionDelay = self.options.get('slowMo', 0) self.browserWSEndpoint = m.group(1).strip() self.connection = Connection(self.browserWSEndpoint, connectionDelay) return await Browser.create(self.connection, self.options, self.killChrome)
async def close(self) -> None: """Close the browser context. All the targets that belongs to the browser context will be closed. .. note:: Only incognito browser context can be closed. """ if self._id is None: raise BrowserError('Non-incognito profile cannot be closed') await self._browser._disposeContext(self._id)
def _get_ws_endpoint(self) -> str: url = self.url + '/json/version' while self.proc.poll() is None: time.sleep(0.1) try: with urlopen(url) as f: data = json.loads(f.read().decode()) break except URLError as e: continue else: raise BrowserError('Browser closed unexpectedly:\n{}'.format( self.proc.stdout.read().decode())) return data['webSocketDebuggerUrl']
def _get_ws_endpoint(self) -> str: url = (self.url + '/json/version') for i in range(100): time.sleep(0.1) try: with urlopen(url) as f: data = json.loads(f.read().decode()) break except URLError as e: continue else: raise BrowserError(''.join( ['Failed to connect to browser port: ', '{}'.format(url)])) return data['webSocketDebuggerUrl']
async def _targetCreated(self, event: Dict) -> None: targetInfo = event['targetInfo'] target = Target( targetInfo, lambda: self._connection.createSession(targetInfo['targetId']), self._ignoreHTTPSErrors, self._appMode, self._screenshotTaskQueue, ) if targetInfo['targetId'] in self._targets: raise BrowserError('target should not exist before create.') self._targets[targetInfo['targetId']] = target if await target._initializedPromise: self.emit(Browser.Events.TargetCreated, target)
def _get_ws_endpoint(self) -> str: url = self.url + '/json/version' for i in range(100): time.sleep(0.1) try: with urlopen(url) as f: data = json.loads(f.read().decode()) break except URLError as e: continue else: # cannot connet to browser for 10 seconds raise BrowserError(f'Failed to connect to browser port: {url}') return data['webSocketDebuggerUrl']
def get_ws_endpoint(url) -> str: url = url + '/json/version' timeout = time.time() + 30 while (True): if time.time() > timeout: raise BrowserError('Browser closed unexpectedly:\n') try: with urlopen(url) as f: data = json.loads(f.read().decode()) break except (URLError, HTTPException): pass time.sleep(0.1) return data['webSocketDebuggerUrl']
async def _visibleCenter(self) -> Dict[str, int]: center = await self.evaluate(''' element => { if (!element.ownerDocument.contains(element)) return null; element.scrollIntoViewIfNeeded(); let rect = element.getBoundingClientRect(); return { x: (Math.max(rect.left, 0) + Math.min(rect.right, window.innerWidth)) / 2, y: (Math.max(rect.top, 0) + Math.min(rect.bottom, window.innerHeight)) / 2 }; } ''') # noqa: E501 if not center: # raise Exception('No node found for selector: ' + selector) raise BrowserError('No node found for selector: ') return center
async def _targetCreated(self, event: Dict) -> None: targetInfo = event['targetInfo'] browserContextId = targetInfo.get('browserContextId') if (browserContextId and (browserContextId in self._contexts)): context = self._contexts[browserContextId] else: context = self._defaultContext target = Target(targetInfo, context, (lambda: self._connection.createSession(targetInfo)), self._ignoreHTTPSErrors, self._setDefaultViewport, self._screenshotTaskQueue, self._connection._loop) if (targetInfo['targetId'] in self._targets): raise BrowserError('target should not exist before create.') self._targets[targetInfo['targetId']] = target if (await target._initializedPromise): self.emit(Browser.Events.TargetCreated, target) context.emit(BrowserContext.Events.TargetCreated, target)
async def _rawEvaluate(self, pageFunction: str, *args: str) -> dict: expression = helper.evaluationString(pageFunction, *args) contextId = self._defaultContextId obj = await self._client.send('Runtime.evaluate', { 'expression': expression, 'contextId': contextId, 'returnByValue': False, 'awaitPromise': True, }) exceptionDetails = obj.get('exceptionDetails', dict()) remoteObject = obj.get('result', dict()) if exceptionDetails: raise BrowserError( 'Evaluation failed: ' + helper.getExceptionMessage(exceptionDetails) + f'\npageFunction:\n{pageFunction}' ) return remoteObject
async def evaluate(self, pageFunction: str, *args: Any) -> Any: """Evaluate the pageFunction on browser.""" if self._disposed: raise ElementHandleError('ElementHandle is disposed!') _args = ['this'] _args.extend(json.dumps(x) for x in args) stringifiedArgs = ','.join(_args) functionDeclaration = f''' function() {{ return ({pageFunction})({stringifiedArgs}) }} ''' objectId = self._remoteObject.get('objectId') obj = await self._client.send( 'Runtime.callFunctionOn', { 'objectId': objectId, 'functionDeclaration': functionDeclaration, 'returnByValue': False, 'awaitPromise': True, }) exceptionDetails = obj.get('exceptionDetails', dict()) remoteObject = obj.get('result', dict()) if exceptionDetails: raise BrowserError('Evaluation failed: ' + helper.getExceptionMessage(exceptionDetails)) return await helper.serializeRemoteObject(self._client, remoteObject)
async def _targetInfoChanged(self, event: Dict) -> None: target = self._targets.get(event['targetInfo']['targetId']) if not target: raise BrowserError('target should exist before targetInfoChanged') target._targetInfoChanged(event['targetInfo'])
async def close(self) -> None: 'Close the browser context.\n\n All the targets that belongs to the browser context will be closed.\n\n .. note::\n Only incognito browser context can be closed.\n ' if (self._id is None): raise BrowserError('Non-incognito profile cannot be closed') (await self._browser._disposeContext(self._id))