async def download_offer(url: str, semaphore: asyncio.Semaphore) -> None: async with semaphore: # create folder for output files _path = pathlib.Path(f"{url.replace('/','.')}/") _path.mkdir(parents=True, exist_ok=True) async with async_playwright() as p: browser_type = p.chromium browser = await browser_type.launch() page = await browser.newPage() await page.goto(url, timeout=0) # save the offer to PDF file fn = "offer.pdf" # I don't know what is your fn filepath = _path / fn filepath.open("w", encoding="utf-8") await page.pdf(format="A4", path=filepath) await browser.close() async with async_playwright() as p: browser_type = p.chromium browser = await browser_type.launch(headless=False) page = await browser.newPage() await page.goto(url, timeout=0) # click on the main picture await page.click( "//div[contains(@class,'download-cover')][contains(@ng-click,'showEntity(SHOW_ENTITY.FULLSCREEN)')]", timeout=0, ) # get current location in pictures current, num_of_pictures = await get_current_image_order(page) # shift to the beggining of album while current != 1: await page.click( "//button[contains(@class,'icon-arr-left')]", timeout=0 ) current, _ = await get_current_image_order(page) # make screenshot of all pictures for i in range(num_of_pictures): await page.waitForTimeout(time_in_ms_between_every_screenshot) fn = f"{i}.png" # I don't know what is your fn filepath = _path / fn with filepath.open("w", encoding="utf-8") as f: await page.screenshot(path=filepath) await page.click( "//button[contains(@class,'icon-arr-right')]", timeout=0 )
async def parse_links_task(self, link): async with async_playwright() as p: browser_type = self.browser_by_type(p) useragent = UserAgent().random browser = await browser_type.launch( headless=self.config.get('headless', True)) page = await browser.newPage(userAgent=useragent, viewport={ "width": 1600, "height": 900 }) delay = random.uniform(self.min_delay, self.max_delay) await asyncio.sleep(delay) try: parser_obj = self.parser_class(page, config=self.config) parser = await parser_obj.parse(link) if parser is not None: print('Success') else: print('Probably captcha') return parser except BaseException as e: print('except', link, e) print('Fail') await browser.close()
async def main(): async with async_playwright() as p: for browser_type in [p.chromium, p.firefox, p.webkit]: browser = await browser_type.launch() page = await browser.newPage() await page.goto('http://whatsmyuseragent.org/') await page.screenshot(path=f'example-{browser_type.name}.png') await browser.close()
async def main(): print('start main') async with async_playwright() as p: browser = await p.chromium.launch(headless=False) tasks = list() for thread in range(10): tasks.append(open_test(browser, random.choice(PAGES), thread)) await asyncio.gather(*tasks) await browser.close()
async def test_launch_browser(monkeypatch): monkeypatch.setattr("uzen.core.settings.BROWSER_WS_ENDPOINT", "wss://chrome.browserless.io") assert settings.BROWSER_WS_ENDPOINT == "wss://chrome.browserless.io" try: async with async_playwright() as p: browser = await launch_browser(p) assert isinstance(browser, playwright.browser.Browser) assert browser.wsEndpoint == "wss://chrome.browserless.io" await browser.close() except Exception: pass
async def get_all_pdfs(specs, max_concurrent): async with async_playwright() as p: browser = await p.chromium.launch() running = set() for spec in specs: if len(running) >= max_concurrent: done, running = await asyncio.wait(running, return_when=asyncio.FIRST_COMPLETED) for t in done: if t.exception(): print('Error: {}'.format(t.exception())) running.add(asyncio.create_task(get_pdf(browser, spec))) await asyncio.wait(running) await browser.close()
async def main(): async with async_playwright() as pw: # for browser_type in [pw.chromium, pw.firefox, pw.webkit]: for browser_type in [pw.webkit]: print("*" * 100) print(browser_type.name) browser = await browser_type.launch( executablePath=browsers[browser_type.name], timeout=5000, # args=["--no-sandbox"], # --no-sandbox is not recognized in webkit ) page = await browser.newPage() await page.goto("http://httpbin.org/get") print(await page.content()) await browser.close()
async def _preview(hostname: str, protocol="http") -> Screenshot: try: async with async_playwright() as p: browser = await launch_browser(p) page = await browser.newPage() # try with http await page.goto( f"{protocol}://{hostname}", waitUntil=settings.BROWSER_WAIT_UNTIL, ) screenshot_data = await page.screenshot() await browser.close() screenshot = Screenshot() screenshot.data = base64.b64encode( screenshot_data).decode() return screenshot except Error as e: raise (e)
async def playweight_demo(): async with async_playwright() as p: width, height = 1366, 768 browser = await p.chromium.launch( headless=False, devtools=True, args=[ '--disable-infobars', f'--window-size={width},{height}' ]) page = await browser.newPage() await page.setExtraHTTPHeaders(headers=self.headers) await page.setViewportSize(width, height) await page.goto(response.url) await page.waitForSelector(selector='feed-list', timeout=5000, state='attached') await browser.close()
async def main(): async with async_playwright() as p: for browser_type in [p.chromium, p.firefox, p.webkit]: # 指定为有头模式,方便查看 browser = await browser_type.launch(headless=False) page = await browser.newPage() await page.goto('http://baidu.com') # 执行一次搜索操作 await page.fill("input[name=\"wd\"]", "AirPython") await page.press("input[name=\"wd\"]", "Enter") # 等待页面加载完全 await page.waitForSelector("text=百度热榜") # 截图 await page.screenshot(path=f'example-{browser_type.name}.png') await browser.close()
async def generate_image_batch( targets: List[Union[str, Path, HtmlDoc]], *, resolution=None, query_parameters_list: Optional[List[Optional[Dict]]] = None, output_files: Optional[List[Optional[Union[Path, str]]]] = None, ) -> List[bytes]: """ target could be url, path or html doc """ if resolution is None: resolution = {"width": 1920, "height": 1080} async with async_playwright() as p: browser = await p.chromium.launch() context = await browser.newContext(viewport=resolution) screenshots: List[bytes] = [] for target, query_parameters, output_file in zip( targets, query_parameters_list, output_files): if isinstance(target, Path): url_address = f"file://{target.absolute()}" elif isinstance(target, HtmlDoc): with tempfile.NamedTemporaryFile(mode="w", suffix=".html", delete=False) as tf: tf.write(target.html) url_address = f"file://{tf.name}" else: url_address = target await _generate(context, output_file, query_parameters, screenshots, url_address) await browser.close() return screenshots
async def main(): async with async_playwright() as p: browser = await p.chromium.launch(headless=False, slowMo=200) page = await browser.newPage() await page.addInitScript(source=''' const newProto = navigator.__proto__; delete newProto.webdriver; navigator.__proto__ = newProto; ''') #page.on('dialog',lambda dialog:print(dialog.message,dialog.type)) await page.goto('https://antispider1.scrape.center/', waitUntil='networkidle') # js2 = '''() => { # alert ( # window.navigator.webdriver # ) # }''' # async with page.expect_dialog() as dialog: # await page.evaluate(js2) # #print("弹出窗口,查询navigator.wendriver的值") # await page.keyboard.press("Enter") await page.screenshot(path='example-firefox.png') await browser.close()
async def main(): email = os.getenv("EMAIL") senha = os.getenv("SENHA") produto = os.getenv("PRODUTO") async with async_playwright() as p: browser = await p.firefox.launch(headless=False) page = await browser.newPage() page.setDefaultTimeout(0) access = asyncio.create_task( login(browser, page, email, senha, produto)) #order = asyncio.create_task(order(browser,page,email,senha)) #pay = asyncio.create_task(pay(browser,page,email,senha)) tasks = [access] for task in tasks: done, pending = await asyncio.wait({task}, return_when="FIRST_EXCEPTION") if task in done: resultado = task.result() if not resultado['status']: print(resultado['mensagem']) break
async def take_snapshot( url: str, accept_language: Optional[str] = None, ignore_https_errors: bool = False, referer: Optional[str] = None, timeout: Optional[int] = None, user_agent: Optional[str] = None, ) -> SnapshotResult: """Take a snapshot of a website by puppeteer Arguments: url {str} -- A URL of a website Keyword Arguments: accept_language {Optional[str]} -- Accept-language header to use (default: {None}) ignore_https_errors {bool} -- Whether to ignore HTTPS errors (default: {False}) referer {Optional[str]} -- Referer header to use (default: {None}) timeout {Optional[int]} -- Maximum time to wait for in seconds (default: {None}) user_agent {Optional[str]} -- User-agent header to use (default: {None}) Returns: SnapshotResult """ submitted_url: str = url try: async with async_playwright() as p: browser: playwright.browser.Browser = await launch_browser(p) page: Page = await browser.newPage( ignoreHTTPSErrors=ignore_https_errors, userAgent=user_agent) headers = {} if accept_language is not None: headers["Accept-Language"] = accept_language await page.setExtraHTTPHeaders(headers) # intercept responses on page to get scripts scripts: List[Script] = [] async def handle_response(response: Response) -> None: content_type: str = response.headers.get( "content-type", "") if response.ok and is_js_content_type(content_type): content = await response.text() scripts.append( Script( url=response.url, content=content, sha256=calculate_sha256(content), )) page.on( "response", lambda response: asyncio.create_task( handle_response(response)), ) # default timeout = 30 seconds timeout = timeout or 30 * 1000 res: Response = await page.goto( url, referer=referer, timeout=timeout, waitUntil=settings.BROWSER_WAIT_UNTIL, ) request = { "accept_language": accept_language, "browser": browser.version, "ignore_https_errors": ignore_https_errors, "referer": referer, "timeout": timeout, "user_agent": await page.evaluate("() => navigator.userAgent"), } url = page.url status = res.status screenshot_data = await page.screenshot() body = await page.content() sha256 = calculate_sha256(body) headers = res.headers await browser.close() except Error as e: raise (e) server = headers.get("server") content_type = headers.get("content-type") content_length = headers.get("content-length") hostname = cast(str, get_hostname_from_url(url)) certificate = Certificate.load_and_dump_from_url(url) ip_address = cast(str, get_ip_address_by_hostname(hostname)) asn = get_asn_by_ip_address(ip_address) or "" whois = Whois.whois(hostname) snapshot = Snapshot( url=url, submitted_url=submitted_url, status=status, body=body, sha256=sha256, headers=headers, hostname=hostname, ip_address=ip_address, asn=asn, server=server, content_length=content_length, content_type=content_type, whois=whois, certificate=certificate, request=request, ) screenshot = Screenshot() screenshot.data = base64.b64encode(screenshot_data).decode() return SnapshotResult( screenshot=screenshot, snapshot=snapshot, scripts=scripts, )
async def get_car_list(url=None, is_all_pages=False): async with async_playwright() as p: browser = await p.chromium.launch( headless=True, # proxy={"server": "", "username": "", "password": "",}, args=[ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--single-process", "--disable-gpu", ], ) user_agent = shadow_useragent.ShadowUserAgent() context = await browser.newContext( userAgent=user_agent.random, ignoreHTTPSErrors=True, # viewport={"width": 1920, "height": 1080}, ) page = await context.newPage() await page.goto(url) try: await page.selectOption(".top [name='serverSideDataTable_length']", "100") await page.waitForFunction( "document.querySelector('#serverSideDataTable_processing').style.cssText == 'display: none;'" ) if page.url != "https://www.copart.com/notfound-error": next_status = await page.evaluate( """document.querySelector('#serverSideDataTable_next').getAttribute('class')""" ) car_list = [] if is_all_pages and next_status != "paginate_button next disabled": page_numbers = await page.evaluate( """document.querySelector('#serverSideDataTable_last>a').getAttribute('data-dt-idx')""" ) for i in range(int(page_numbers) - 3): car_list = await get_row_data(page, car_list) if (await page.evaluate( """document.querySelector('#serverSideDataTable_next').getAttribute('class')""" ) != "paginate_button next disabled"): await page.click("#serverSideDataTable_next>a") await page.waitForFunction( "document.querySelector('#serverSideDataTable_processing').style.cssText == 'display: none;'" ) return car_list else: car_list = await get_row_data(page, car_list) return car_list else: return 404 except Exception: return 404 finally: await browser.close()
async def get_car_info(lot_id, member=False): async with async_playwright() as p: browser = await p.chromium.launch( headless=True, # proxy={"server": "", "username": "", "password": "",}, args=[ "--no-sandbox", "--disable-setuid-sandbox", "--disable-dev-shm-usage", "--disable-accelerated-2d-canvas", "--no-first-run", "--no-zygote", "--single-process", "--disable-gpu", ], ) user_agent = shadow_useragent.ShadowUserAgent() context = await browser.newContext( userAgent=user_agent.random, ignoreHTTPSErrors=True, # viewport={"width": 1920, "height": 1080}, ) page = await context.newPage() async def login(page): await page.goto("https://www.copart.com/login/") await page.type("input#username", "") await page.type("input#password", "") await page.click(".loginfloatright.margin15") await page.waitForSelector(".welcomeMsg") if member: await login(page) url = f"https://www.copart.com/lot/{lot_id}" await page.goto(url) if page.url != "https://www.copart.com/notfound-error": keys = list( filter( lambda x: x != "Notes:", [ await page.evaluate( "(elem) => elem.innerText.replace(':', '')", v) for v in await page.querySelectorAll( ".lot-detail-section label") ], )) values = [ await page.evaluate("(elem) => elem.innerText", v) for v in await page.querySelectorAll(".lot-detail-section label+span") ] car_info = {} car_info = dict(zip(keys, values)) car_info["Bid Price"] = await get_text(page, ".bid-price") car_info["Sale Location"] = await get_text( page, ".panel.clr [data-uname='lotdetailSaleinformationlocationvalue']", ) car_info["Sale Date"] = await get_text( page, "[data-uname='lotdetailSaleinformationsaledatevalue'] [ng-if^='validateDate']", ) response = car_info else: response = 404 await browser.close() return response
async def capture_pdf(url, filename): async with async_playwright() as p: browser = await p.chromium.launch() page = await Browser.init(browser, url) await Browser.capture(page, filename) await browser.close()
async def playwright(): async with async_playwright() as playwright_object: yield playwright_object