Ejemplo n.º 1
0
async def screenshot_driver(printer: Printer,
                            tasks=[]) -> Union[List, Tuple[str, dict]]:
    if len(tasks) != 0:
        LOGGER.info(
            f'WEB_SCRS:{printer.PID} --> browser object >> yielded from existing task list'
        )
        browser = tasks[0]
    else:
        LOGGER.info(
            f'WEB_SCRS:{printer.PID} --> no browser object exists >> creating new'
        )
        try:
            browser = await launch_chrome()
            tasks.append(browser)
        except Exception as e:
            LOGGER.critical(e)
            raise ResponseNotReady(e)
    page = await browser.newPage()
    LOGGER.debug(
        f'WEB_SCRS:{printer.PID} --> created new page object >> now setting viewport'
    )
    await page.setViewport(printer.resolution)
    LOGGER.debug(f'WEB_SCRS:{printer.PID} --> fetching received link')
    try:
        await page.goto(printer.link)
        LOGGER.debug(
            f'WEB_SCRS:{printer.PID} --> link fetched successfully >> now rendering page'
        )
        if printer.type == "pdf":
            end_file = await page.pdf(printer.arguments_to_print)
        elif printer.type == "statics":
            LOGGER.debug(
                f'WEB_SCRS:{printer.PID} --> site metrics detected >> now rendering image'
            )
            end_file = (await page.title(), await page.metrics())
        else:
            end_file = await page.screenshot(printer.arguments_to_print)
        return end_file
    except errors.PageError:
        LOGGER.info(
            f'WEB_SCRS:{printer.PID} --> request failed -> Excepted PageError >> invalid link'
        )
        raise ResponseNotReady("Not a valid link 😓🤔")
    finally:
        await asyncio.sleep(2)
        LOGGER.debug(
            f'WEB_SCRS:{printer.PID} --> page rendered successfully >> now closing page object'
        )
        await page.close()
        if len(await browser.pages()) == 1:
            LOGGER.info(
                f'WEB_SCRS:{printer.PID} --> no task pending >> closing browser object'
            )
            if browser in tasks:
                tasks.remove(browser)
            await browser.close()
        elif len(await browser.pages()) < 2:
            LOGGER.info(
                f'WEB_SCRS:{printer.PID} --> task pending >> leaving browser intact'
            )
Ejemplo n.º 2
0
async def launch_chrome(retry=False) -> Browser:
    try:
        browser = await launch(
            headless=True,
            logLevel=50,
            executablePath=EXEC_PATH,
            args=[
                "ignoreHTTPSErrors=True",
                "--no-sandbox",
                "--single-process",
                "--disable-dev-shm-usage",
                "--disable-gpu",
                "--no-zygote",
            ],
        )
        return browser
    except BadStatusLine:
        if not retry:
            LOGGER.info(
                "WEB_SCRS --> request failed -> Excepted BadStatusLine >> retrying..."
            )
            await asyncio.sleep(1.5)
            return await launch_chrome(True)
        elif retry:
            LOGGER.info(
                "WEB_SCRS --> request failed -> Excepted BadStatusLine >> max retry exceeded"
            )
            raise ResponseNotReady("Soory the site is not responding")
Ejemplo n.º 3
0
 def download(self, task_uuid, output=Output.JSON):
     url = self._build_url_for_endpoint('retrieve-bulk-search')
     url = url.format(task_uuid=task_uuid)
     response: Response = self.datalake_requests(
         url, 'get', headers=self._get_headers(output=output))
     if response.status_code == 202:
         raise ResponseNotReady(response.json().get('message', ''))
     return parse_response(response)
Ejemplo n.º 4
0
    def get_header(self, key, default=''):
        if self.headers is None:
            raise ResponseNotReady()

        headers = self.headers.get_all(key) or default
        if isinstance(headers, str) or not hasattr(headers, '__iter__'):
            return headers

        return ', '.join(headers)
Ejemplo n.º 5
0
async def screenshot_engine(browser: Browser, printer: Printer,
                            user_lock: asyncio.Event):
    page = await browser.newPage()
    await page.setViewport(printer.resolution)
    try:
        await page.goto(printer.link, dict(timeout=60000))
        title, _ = await asyncio.gather(
            page.title(), page.addScriptTag(dict(path="assets/inject.js")))
        printer.slugify(title[:14])
        if printer.type == "statics":
            (height, width), metrics = await asyncio.gather(
                page.evaluate("[get_height(), get_width()]"),
                page.metrics(),
            )
            page_data = dict(Height=height, Width=width)
            page_data.update(metrics)
            byteio_file = await asyncio.get_running_loop().run_in_executor(
                None, draw_statics, title[:25], page_data)
            printer.set_location(byteio_file)
        else:
            if printer.scroll_control is not None and printer.fullpage is True:
                if printer.scroll_control is False:
                    await page.evaluate("scroll(get_height());")
                elif printer.scroll_control is True:
                    scroll_task = asyncio.create_task(
                        page.evaluate("progressive_scroll();"))
                    await asyncio.wait(
                        {scroll_task, user_lock.wait()},
                        return_when=asyncio.tasks.FIRST_COMPLETED,
                    )
                    await page.evaluate("cancel_scroll()")
            if printer.type == "pdf":
                await page.pdf(printer.arguments_to_print, path=printer.file)
            else:
                await page.screenshot(printer.arguments_to_print,
                                      path=printer.file)
    except errors.PageError:
        raise ResponseNotReady("This is not a valid link 🤔")
    except asyncio.CancelledError:
        raise ResponseNotReady(
            "server got interuppted, please try again later")
    finally:
        await page.close()
Ejemplo n.º 6
0
async def launch_chrome(retry=False) -> Browser:
    try:
        browser = await launch(headless=True,
                               logLevel=50,
                               executablePath=EXEC_PATH,
                               args=[
                                   '--no-sandbox', '--single-process',
                                   '--disable-dev-shm-usage', '--disable-gpu',
                                   '--no-zygote'
                               ])
        return browser
    except BadStatusLine:
        if not retry:
            LOGGER.info(
                'WEB_SCRS --> request failed -> Excepted BadStatusLine >> retrying...'
            )
            await asyncio.sleep(1.5)
            return await launch_chrome(True)
        elif retry:
            LOGGER.info(
                'WEB_SCRS --> request failed -> Excepted BadStatusLine >> max retry exceeded'
            )
            raise ResponseNotReady("Sorry, the site is not responding!")
Ejemplo n.º 7
0
async def screenshot_driver(
    printer: Printer,
    tasks=[]
) -> Optional[tuple[str, dict]]:  # pylint: disable=unsubscriptable-object
    if len(tasks) != 0:
        LOGGER.info(
            f"WEB_SCRS:{printer.PID} --> browser object >> yielded from existing task list"
        )
        browser = tasks[0]
    else:
        LOGGER.info(
            f"WEB_SCRS:{printer.PID} --> no browser object exists >> creating new"
        )
        try:
            browser = await launch_chrome()
            tasks.append(browser)
        except Exception as e:
            LOGGER.critical(e)
            raise ResponseNotReady(e)
    page = await browser.newPage()
    LOGGER.debug(
        f"WEB_SCRS:{printer.PID} --> created new page object >> now setting viewport"
    )
    await page.setViewport(printer.resolution)
    LOGGER.debug(f"WEB_SCRS:{printer.PID} --> fetching received link")
    try:
        await page.goto(printer.link)
        title = await page.title()
        await printer.slugify(title[:14])
        LOGGER.debug(
            f"WEB_SCRS:{printer.PID} --> link fetched successfully -> set filename({printer.filename}) >> now rendering page"
        )
        if printer.type == "pdf":
            await page.pdf(printer.arguments_to_print, path=printer.filename)
        elif printer.type == "statics":
            LOGGER.debug(
                f"WEB_SCRS:{printer.PID} --> site metrics detected >> now rendering image"
            )
            return (title, await page.metrics())
        else:
            await page.screenshot(printer.arguments_to_print,
                                  path=printer.filename)
    except errors.PageError:
        LOGGER.info(
            f"WEB_SCRS:{printer.PID} --> request failed -> Excepted PageError >> invalid link"
        )
        raise ResponseNotReady("Not 🚫 A valid link 😓🤔")
    finally:
        await asyncio.sleep(2)
        LOGGER.debug(
            f"WEB_SCRS:{printer.PID} --> page rendered successfully >> now closing page object"
        )
        await page.close()
        if len(await browser.pages()) == 1:
            LOGGER.info(
                f"WEB_SCRS:{printer.PID} --> no task pending >> closing browser object"
            )
            if browser in tasks:
                tasks.remove(browser)
            await browser.close()
        elif len(await browser.pages()) < 2:
            LOGGER.info(
                f"WEB_SCRS:{printer.PID} --> task pending >> leaving browser intact"
            )