async def download_offer(url: str, semaphore: asyncio.Semaphore) -> None:

    async with semaphore:
        # create folder for output files
        _path = pathlib.Path(f"{url.replace('/','.')}/")
        _path.mkdir(parents=True, exist_ok=True)

        async with async_playwright() as p:
            browser_type = p.chromium
            browser = await browser_type.launch()
            page = await browser.newPage()
            await page.goto(url, timeout=0)

            # save the offer to PDF file
            fn = "offer.pdf"  # I don't know what is your fn
            filepath = _path / fn
            filepath.open("w", encoding="utf-8")
            await page.pdf(format="A4", path=filepath)
            await browser.close()

        async with async_playwright() as p:
            browser_type = p.chromium
            browser = await browser_type.launch(headless=False)
            page = await browser.newPage()
            await page.goto(url, timeout=0)

            # click on the main picture
            await page.click(
                "//div[contains(@class,'download-cover')][contains(@ng-click,'showEntity(SHOW_ENTITY.FULLSCREEN)')]",
                timeout=0,
            )

            # get current location in pictures
            current, num_of_pictures = await get_current_image_order(page)

            # shift to the beggining of album
            while current != 1:
                await page.click(
                    "//button[contains(@class,'icon-arr-left')]", timeout=0
                )
                current, _ = await get_current_image_order(page)

            # make screenshot of all pictures
            for i in range(num_of_pictures):
                await page.waitForTimeout(time_in_ms_between_every_screenshot)

                fn = f"{i}.png"  # I don't know what is your fn
                filepath = _path / fn
                with filepath.open("w", encoding="utf-8") as f:
                    await page.screenshot(path=filepath)

                await page.click(
                    "//button[contains(@class,'icon-arr-right')]", timeout=0
                )
Exemple #2
0
    async def parse_links_task(self, link):

        async with async_playwright() as p:
            browser_type = self.browser_by_type(p)

            useragent = UserAgent().random

            browser = await browser_type.launch(
                headless=self.config.get('headless', True))
            page = await browser.newPage(userAgent=useragent,
                                         viewport={
                                             "width": 1600,
                                             "height": 900
                                         })

            delay = random.uniform(self.min_delay, self.max_delay)
            await asyncio.sleep(delay)

            try:
                parser_obj = self.parser_class(page, config=self.config)
                parser = await parser_obj.parse(link)

                if parser is not None:
                    print('Success')
                else:
                    print('Probably captcha')
                return parser

            except BaseException as e:
                print('except', link, e)
                print('Fail')

            await browser.close()
async def main():
    async with async_playwright() as p:
        for browser_type in [p.chromium, p.firefox, p.webkit]:
            browser = await browser_type.launch()
            page = await browser.newPage()
            await page.goto('http://whatsmyuseragent.org/')
            await page.screenshot(path=f'example-{browser_type.name}.png')
            await browser.close()
async def main():
    print('start main')
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        tasks = list()
        for thread in range(10):
            tasks.append(open_test(browser, random.choice(PAGES), thread))
        await asyncio.gather(*tasks)
        await browser.close()
Exemple #5
0
async def test_launch_browser(monkeypatch):
    monkeypatch.setattr("uzen.core.settings.BROWSER_WS_ENDPOINT",
                        "wss://chrome.browserless.io")
    assert settings.BROWSER_WS_ENDPOINT == "wss://chrome.browserless.io"

    try:
        async with async_playwright() as p:
            browser = await launch_browser(p)
            assert isinstance(browser, playwright.browser.Browser)
            assert browser.wsEndpoint == "wss://chrome.browserless.io"
            await browser.close()
    except Exception:
        pass
Exemple #6
0
    async def get_all_pdfs(specs, max_concurrent):
        async with async_playwright() as p:
            browser = await p.chromium.launch()
            running = set()
            for spec in specs:
                if len(running) >= max_concurrent:
                    done, running = await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED)
                    for t in done:
                        if t.exception():
                            print('Error: {}'.format(t.exception()))
                running.add(asyncio.create_task(get_pdf(browser, spec)))

            await asyncio.wait(running)
            await browser.close()
Exemple #7
0
async def main():
    async with async_playwright() as pw:
        # for browser_type in [pw.chromium, pw.firefox, pw.webkit]:
        for browser_type in [pw.webkit]:
            print("*" * 100)
            print(browser_type.name)
            browser = await browser_type.launch(
                executablePath=browsers[browser_type.name],
                timeout=5000,
                # args=["--no-sandbox"],  # --no-sandbox is not recognized in webkit
            )
            page = await browser.newPage()
            await page.goto("http://httpbin.org/get")
            print(await page.content())
            await browser.close()
Exemple #8
0
        async def _preview(hostname: str, protocol="http") -> Screenshot:
            try:
                async with async_playwright() as p:
                    browser = await launch_browser(p)
                    page = await browser.newPage()
                    # try with http
                    await page.goto(
                        f"{protocol}://{hostname}",
                        waitUntil=settings.BROWSER_WAIT_UNTIL,
                    )
                    screenshot_data = await page.screenshot()
                    await browser.close()

                    screenshot = Screenshot()
                    screenshot.data = base64.b64encode(
                        screenshot_data).decode()
                    return screenshot
            except Error as e:
                raise (e)
        async def playweight_demo():
            async with async_playwright() as p:
                width, height = 1366, 768
                browser = await p.chromium.launch(
                    headless=False,
                    devtools=True,
                    args=[
                        '--disable-infobars', f'--window-size={width},{height}'
                    ])
                page = await browser.newPage()
                await page.setExtraHTTPHeaders(headers=self.headers)
                await page.setViewportSize(width, height)
                await page.goto(response.url)

                await page.waitForSelector(selector='feed-list',
                                           timeout=5000,
                                           state='attached')

                await browser.close()
Exemple #10
0
async def main():
    async with async_playwright() as p:
        for browser_type in [p.chromium, p.firefox, p.webkit]:
            # 指定为有头模式,方便查看
            browser = await browser_type.launch(headless=False)
            page = await browser.newPage()

            await page.goto('http://baidu.com')

            # 执行一次搜索操作
            await page.fill("input[name=\"wd\"]", "AirPython")
            await page.press("input[name=\"wd\"]", "Enter")

            # 等待页面加载完全
            await page.waitForSelector("text=百度热榜")

            # 截图
            await page.screenshot(path=f'example-{browser_type.name}.png')

            await browser.close()
Exemple #11
0
async def generate_image_batch(
    targets: List[Union[str, Path, HtmlDoc]],
    *,
    resolution=None,
    query_parameters_list: Optional[List[Optional[Dict]]] = None,
    output_files: Optional[List[Optional[Union[Path, str]]]] = None,
) -> List[bytes]:
    """
    target could be url, path or html doc
    """
    if resolution is None:
        resolution = {"width": 1920, "height": 1080}

    async with async_playwright() as p:
        browser = await p.chromium.launch()
        context = await browser.newContext(viewport=resolution)
        screenshots: List[bytes] = []
        for target, query_parameters, output_file in zip(
                targets, query_parameters_list, output_files):

            if isinstance(target, Path):
                url_address = f"file://{target.absolute()}"

            elif isinstance(target, HtmlDoc):
                with tempfile.NamedTemporaryFile(mode="w",
                                                 suffix=".html",
                                                 delete=False) as tf:
                    tf.write(target.html)
                    url_address = f"file://{tf.name}"

            else:
                url_address = target

            await _generate(context, output_file, query_parameters,
                            screenshots, url_address)

        await browser.close()
        return screenshots
async def main():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False, slowMo=200)
        page = await browser.newPage()
        await page.addInitScript(source='''
                                const newProto = navigator.__proto__;
                                delete newProto.webdriver;
                                navigator.__proto__ = newProto;
        ''')
        #page.on('dialog',lambda dialog:print(dialog.message,dialog.type))
        await page.goto('https://antispider1.scrape.center/',
                        waitUntil='networkidle')
        # js2 = '''() => {
        #             alert (
        #                 window.navigator.webdriver
        #             )
        #         }'''
        # async with page.expect_dialog() as dialog:
        #         await page.evaluate(js2)
        #     #print("弹出窗口,查询navigator.wendriver的值")
        # await page.keyboard.press("Enter")
        await page.screenshot(path='example-firefox.png')
        await browser.close()
async def main():
    email = os.getenv("EMAIL")
    senha = os.getenv("SENHA")
    produto = os.getenv("PRODUTO")
    async with async_playwright() as p:

        browser = await p.firefox.launch(headless=False)
        page = await browser.newPage()
        page.setDefaultTimeout(0)

        access = asyncio.create_task(
            login(browser, page, email, senha, produto))
        #order  =  asyncio.create_task(order(browser,page,email,senha))
        #pay    =  asyncio.create_task(pay(browser,page,email,senha))
        tasks = [access]

        for task in tasks:
            done, pending = await asyncio.wait({task},
                                               return_when="FIRST_EXCEPTION")
            if task in done:
                resultado = task.result()
                if not resultado['status']:
                    print(resultado['mensagem'])
                    break
Exemple #14
0
    async def take_snapshot(
        url: str,
        accept_language: Optional[str] = None,
        ignore_https_errors: bool = False,
        referer: Optional[str] = None,
        timeout: Optional[int] = None,
        user_agent: Optional[str] = None,
    ) -> SnapshotResult:
        """Take a snapshot of a website by puppeteer

        Arguments:
            url {str} -- A URL of a website

        Keyword Arguments:
            accept_language {Optional[str]} -- Accept-language header to use (default: {None})
            ignore_https_errors {bool} -- Whether to ignore HTTPS errors (default: {False})
            referer {Optional[str]} -- Referer header to use (default: {None})
            timeout {Optional[int]} -- Maximum time to wait for in seconds (default: {None})
            user_agent {Optional[str]} -- User-agent header to use (default: {None})

        Returns:
            SnapshotResult
        """
        submitted_url: str = url
        try:
            async with async_playwright() as p:
                browser: playwright.browser.Browser = await launch_browser(p)
                page: Page = await browser.newPage(
                    ignoreHTTPSErrors=ignore_https_errors,
                    userAgent=user_agent)

                headers = {}
                if accept_language is not None:
                    headers["Accept-Language"] = accept_language
                await page.setExtraHTTPHeaders(headers)

                # intercept responses on page to get scripts
                scripts: List[Script] = []

                async def handle_response(response: Response) -> None:
                    content_type: str = response.headers.get(
                        "content-type", "")
                    if response.ok and is_js_content_type(content_type):
                        content = await response.text()
                        scripts.append(
                            Script(
                                url=response.url,
                                content=content,
                                sha256=calculate_sha256(content),
                            ))

                page.on(
                    "response",
                    lambda response: asyncio.create_task(
                        handle_response(response)),
                )

                # default timeout = 30 seconds
                timeout = timeout or 30 * 1000
                res: Response = await page.goto(
                    url,
                    referer=referer,
                    timeout=timeout,
                    waitUntil=settings.BROWSER_WAIT_UNTIL,
                )

                request = {
                    "accept_language": accept_language,
                    "browser": browser.version,
                    "ignore_https_errors": ignore_https_errors,
                    "referer": referer,
                    "timeout": timeout,
                    "user_agent": await
                    page.evaluate("() => navigator.userAgent"),
                }

                url = page.url
                status = res.status
                screenshot_data = await page.screenshot()
                body = await page.content()
                sha256 = calculate_sha256(body)
                headers = res.headers

                await browser.close()
        except Error as e:
            raise (e)

        server = headers.get("server")
        content_type = headers.get("content-type")
        content_length = headers.get("content-length")

        hostname = cast(str, get_hostname_from_url(url))
        certificate = Certificate.load_and_dump_from_url(url)
        ip_address = cast(str, get_ip_address_by_hostname(hostname))
        asn = get_asn_by_ip_address(ip_address) or ""
        whois = Whois.whois(hostname)

        snapshot = Snapshot(
            url=url,
            submitted_url=submitted_url,
            status=status,
            body=body,
            sha256=sha256,
            headers=headers,
            hostname=hostname,
            ip_address=ip_address,
            asn=asn,
            server=server,
            content_length=content_length,
            content_type=content_type,
            whois=whois,
            certificate=certificate,
            request=request,
        )
        screenshot = Screenshot()
        screenshot.data = base64.b64encode(screenshot_data).decode()

        return SnapshotResult(
            screenshot=screenshot,
            snapshot=snapshot,
            scripts=scripts,
        )
Exemple #15
0
async def get_car_list(url=None, is_all_pages=False):
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=True,
            # proxy={"server": "", "username": "", "password": "",},
            args=[
                "--no-sandbox",
                "--disable-setuid-sandbox",
                "--disable-dev-shm-usage",
                "--disable-accelerated-2d-canvas",
                "--no-first-run",
                "--no-zygote",
                "--single-process",
                "--disable-gpu",
            ],
        )

        user_agent = shadow_useragent.ShadowUserAgent()
        context = await browser.newContext(
            userAgent=user_agent.random,
            ignoreHTTPSErrors=True,
            # viewport={"width": 1920, "height": 1080},
        )

        page = await context.newPage()
        await page.goto(url)

        try:
            await page.selectOption(".top [name='serverSideDataTable_length']",
                                    "100")
            await page.waitForFunction(
                "document.querySelector('#serverSideDataTable_processing').style.cssText == 'display: none;'"
            )

            if page.url != "https://www.copart.com/notfound-error":
                next_status = await page.evaluate(
                    """document.querySelector('#serverSideDataTable_next').getAttribute('class')"""
                )
                car_list = []
                if is_all_pages and next_status != "paginate_button next disabled":
                    page_numbers = await page.evaluate(
                        """document.querySelector('#serverSideDataTable_last>a').getAttribute('data-dt-idx')"""
                    )

                    for i in range(int(page_numbers) - 3):
                        car_list = await get_row_data(page, car_list)
                        if (await page.evaluate(
                                """document.querySelector('#serverSideDataTable_next').getAttribute('class')"""
                        ) != "paginate_button next disabled"):
                            await page.click("#serverSideDataTable_next>a")
                            await page.waitForFunction(
                                "document.querySelector('#serverSideDataTable_processing').style.cssText == 'display: none;'"
                            )
                    return car_list
                else:
                    car_list = await get_row_data(page, car_list)
                    return car_list
            else:
                return 404
        except Exception:
            return 404
        finally:
            await browser.close()
Exemple #16
0
async def get_car_info(lot_id, member=False):
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            headless=True,
            # proxy={"server": "", "username": "", "password": "",},
            args=[
                "--no-sandbox",
                "--disable-setuid-sandbox",
                "--disable-dev-shm-usage",
                "--disable-accelerated-2d-canvas",
                "--no-first-run",
                "--no-zygote",
                "--single-process",
                "--disable-gpu",
            ],
        )

        user_agent = shadow_useragent.ShadowUserAgent()
        context = await browser.newContext(
            userAgent=user_agent.random,
            ignoreHTTPSErrors=True,
            # viewport={"width": 1920, "height": 1080},
        )

        page = await context.newPage()

        async def login(page):
            await page.goto("https://www.copart.com/login/")
            await page.type("input#username", "")
            await page.type("input#password", "")
            await page.click(".loginfloatright.margin15")
            await page.waitForSelector(".welcomeMsg")

        if member:
            await login(page)

        url = f"https://www.copart.com/lot/{lot_id}"
        await page.goto(url)

        if page.url != "https://www.copart.com/notfound-error":
            keys = list(
                filter(
                    lambda x: x != "Notes:",
                    [
                        await page.evaluate(
                            "(elem) => elem.innerText.replace(':', '')", v)
                        for v in await page.querySelectorAll(
                            ".lot-detail-section label")
                    ],
                ))

            values = [
                await page.evaluate("(elem) => elem.innerText", v) for v in
                await page.querySelectorAll(".lot-detail-section label+span")
            ]

            car_info = {}
            car_info = dict(zip(keys, values))
            car_info["Bid Price"] = await get_text(page, ".bid-price")
            car_info["Sale Location"] = await get_text(
                page,
                ".panel.clr [data-uname='lotdetailSaleinformationlocationvalue']",
            )
            car_info["Sale Date"] = await get_text(
                page,
                "[data-uname='lotdetailSaleinformationsaledatevalue'] [ng-if^='validateDate']",
            )
            response = car_info
        else:
            response = 404

        await browser.close()

        return response
Exemple #17
0
async def capture_pdf(url, filename):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await Browser.init(browser, url)
        await Browser.capture(page, filename)
        await browser.close()
Exemple #18
0
async def playwright():
    async with async_playwright() as playwright_object:
        yield playwright_object