async def screenshot_driver(printer: Printer, tasks=[]) -> Union[List, Tuple[str, dict]]: if len(tasks) != 0: LOGGER.info( f'WEB_SCRS:{printer.PID} --> browser object >> yielded from existing task list' ) browser = tasks[0] else: LOGGER.info( f'WEB_SCRS:{printer.PID} --> no browser object exists >> creating new' ) try: browser = await launch_chrome() tasks.append(browser) except Exception as e: LOGGER.critical(e) raise ResponseNotReady(e) page = await browser.newPage() LOGGER.debug( f'WEB_SCRS:{printer.PID} --> created new page object >> now setting viewport' ) await page.setViewport(printer.resolution) LOGGER.debug(f'WEB_SCRS:{printer.PID} --> fetching received link') try: await page.goto(printer.link) LOGGER.debug( f'WEB_SCRS:{printer.PID} --> link fetched successfully >> now rendering page' ) if printer.type == "pdf": end_file = await page.pdf(printer.arguments_to_print) elif printer.type == "statics": LOGGER.debug( f'WEB_SCRS:{printer.PID} --> site metrics detected >> now rendering image' ) end_file = (await page.title(), await page.metrics()) else: end_file = await page.screenshot(printer.arguments_to_print) return end_file except errors.PageError: LOGGER.info( f'WEB_SCRS:{printer.PID} --> request failed -> Excepted PageError >> invalid link' ) raise ResponseNotReady("Not a valid link 😓🤔") finally: await asyncio.sleep(2) LOGGER.debug( f'WEB_SCRS:{printer.PID} --> page rendered successfully >> now closing page object' ) await page.close() if len(await browser.pages()) == 1: LOGGER.info( f'WEB_SCRS:{printer.PID} --> no task pending >> closing browser object' ) if browser in tasks: tasks.remove(browser) await browser.close() elif len(await browser.pages()) < 2: LOGGER.info( f'WEB_SCRS:{printer.PID} --> task pending >> leaving browser intact' )
async def launch_chrome(retry=False) -> Browser: try: browser = await launch( headless=True, logLevel=50, executablePath=EXEC_PATH, args=[ "ignoreHTTPSErrors=True", "--no-sandbox", "--single-process", "--disable-dev-shm-usage", "--disable-gpu", "--no-zygote", ], ) return browser except BadStatusLine: if not retry: LOGGER.info( "WEB_SCRS --> request failed -> Excepted BadStatusLine >> retrying..." ) await asyncio.sleep(1.5) return await launch_chrome(True) elif retry: LOGGER.info( "WEB_SCRS --> request failed -> Excepted BadStatusLine >> max retry exceeded" ) raise ResponseNotReady("Soory the site is not responding")
def download(self, task_uuid, output=Output.JSON): url = self._build_url_for_endpoint('retrieve-bulk-search') url = url.format(task_uuid=task_uuid) response: Response = self.datalake_requests( url, 'get', headers=self._get_headers(output=output)) if response.status_code == 202: raise ResponseNotReady(response.json().get('message', '')) return parse_response(response)
def get_header(self, key, default=''): if self.headers is None: raise ResponseNotReady() headers = self.headers.get_all(key) or default if isinstance(headers, str) or not hasattr(headers, '__iter__'): return headers return ', '.join(headers)
async def screenshot_engine(browser: Browser, printer: Printer, user_lock: asyncio.Event): page = await browser.newPage() await page.setViewport(printer.resolution) try: await page.goto(printer.link, dict(timeout=60000)) title, _ = await asyncio.gather( page.title(), page.addScriptTag(dict(path="assets/inject.js"))) printer.slugify(title[:14]) if printer.type == "statics": (height, width), metrics = await asyncio.gather( page.evaluate("[get_height(), get_width()]"), page.metrics(), ) page_data = dict(Height=height, Width=width) page_data.update(metrics) byteio_file = await asyncio.get_running_loop().run_in_executor( None, draw_statics, title[:25], page_data) printer.set_location(byteio_file) else: if printer.scroll_control is not None and printer.fullpage is True: if printer.scroll_control is False: await page.evaluate("scroll(get_height());") elif printer.scroll_control is True: scroll_task = asyncio.create_task( page.evaluate("progressive_scroll();")) await asyncio.wait( {scroll_task, user_lock.wait()}, return_when=asyncio.tasks.FIRST_COMPLETED, ) await page.evaluate("cancel_scroll()") if printer.type == "pdf": await page.pdf(printer.arguments_to_print, path=printer.file) else: await page.screenshot(printer.arguments_to_print, path=printer.file) except errors.PageError: raise ResponseNotReady("This is not a valid link 🤔") except asyncio.CancelledError: raise ResponseNotReady( "server got interuppted, please try again later") finally: await page.close()
async def launch_chrome(retry=False) -> Browser: try: browser = await launch(headless=True, logLevel=50, executablePath=EXEC_PATH, args=[ '--no-sandbox', '--single-process', '--disable-dev-shm-usage', '--disable-gpu', '--no-zygote' ]) return browser except BadStatusLine: if not retry: LOGGER.info( 'WEB_SCRS --> request failed -> Excepted BadStatusLine >> retrying...' ) await asyncio.sleep(1.5) return await launch_chrome(True) elif retry: LOGGER.info( 'WEB_SCRS --> request failed -> Excepted BadStatusLine >> max retry exceeded' ) raise ResponseNotReady("Sorry, the site is not responding!")
async def screenshot_driver( printer: Printer, tasks=[] ) -> Optional[tuple[str, dict]]: # pylint: disable=unsubscriptable-object if len(tasks) != 0: LOGGER.info( f"WEB_SCRS:{printer.PID} --> browser object >> yielded from existing task list" ) browser = tasks[0] else: LOGGER.info( f"WEB_SCRS:{printer.PID} --> no browser object exists >> creating new" ) try: browser = await launch_chrome() tasks.append(browser) except Exception as e: LOGGER.critical(e) raise ResponseNotReady(e) page = await browser.newPage() LOGGER.debug( f"WEB_SCRS:{printer.PID} --> created new page object >> now setting viewport" ) await page.setViewport(printer.resolution) LOGGER.debug(f"WEB_SCRS:{printer.PID} --> fetching received link") try: await page.goto(printer.link) title = await page.title() await printer.slugify(title[:14]) LOGGER.debug( f"WEB_SCRS:{printer.PID} --> link fetched successfully -> set filename({printer.filename}) >> now rendering page" ) if printer.type == "pdf": await page.pdf(printer.arguments_to_print, path=printer.filename) elif printer.type == "statics": LOGGER.debug( f"WEB_SCRS:{printer.PID} --> site metrics detected >> now rendering image" ) return (title, await page.metrics()) else: await page.screenshot(printer.arguments_to_print, path=printer.filename) except errors.PageError: LOGGER.info( f"WEB_SCRS:{printer.PID} --> request failed -> Excepted PageError >> invalid link" ) raise ResponseNotReady("Not 🚫 A valid link 😓🤔") finally: await asyncio.sleep(2) LOGGER.debug( f"WEB_SCRS:{printer.PID} --> page rendered successfully >> now closing page object" ) await page.close() if len(await browser.pages()) == 1: LOGGER.info( f"WEB_SCRS:{printer.PID} --> no task pending >> closing browser object" ) if browser in tasks: tasks.remove(browser) await browser.close() elif len(await browser.pages()) < 2: LOGGER.info( f"WEB_SCRS:{printer.PID} --> task pending >> leaving browser intact" )