Exemple #1
0
async def convert_page(session, path, reduction_code):
    urlpath = 'file://' + os.path.abspath(path)
    logger.info('Navigating to %s', urlpath)
    async with session.wait_for(page.LoadEventFired):
        await session.execute(page.navigate(url=urlpath))

    (_, exc) = await session.execute(runtime.evaluate(reduction_code))

    root_id = (await session.execute(dom.get_document())).node_id
    main_id = await session.execute(dom.query_selector(root_id, '#main'))

    target_item_id = await session.execute(dom.query_selector(main_id, 'h1'))
    target_item = await session.execute(dom.get_outer_html(target_item_id))
    content_html = await session.execute(dom.get_outer_html(main_id))
    return (target_item, content_html)
Exemple #2
0
async def read_page(browserurl, targeturl):
    """
    read a page and get the title
    originally https://github.com/HyperionGray/trio-chrome-devtools-protocol/blob/master/examples/get_title.py
    """
    logger.info("Connecting to browser: %s", browserurl)
    async with open_cdp(browserurl) as conn:
        logger.info("Listing targets")
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info("Attaching to target id=%s", target_id)
        session = await conn.open_session(target_id)

        logger.info("Navigating to %s", targeturl)
        await session.execute(page.enable())
        async with session.wait_for(page.LoadEventFired):
            await session.execute(page.navigate(targeturl))
            _, item = await session.execute(page.print_to_pdf())
            print(f"pdf is: {type(item)}")

        logger.info("Extracting page title")
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, "title")
        )
        html = await session.execute(dom.get_outer_html(title_node_id))
Exemple #3
0
async def main():
    logger.info('Connecting to browser: %s', sys.argv[1])
    async with open_cdp_connection(sys.argv[1]) as conn:
        logger.info('Listing targets')
        targets = await conn.execute(target.get_targets())

        for t in targets:
            if (t.type == 'page' and not t.url.startswith('devtools://')
                    and not t.attached):
                target_id = t.target_id
                break

        logger.info('Attaching to target id=%s', target_id)
        session = await conn.open_session(target_id)

        logger.info('Navigating to %s', sys.argv[2])
        await session.execute(page.enable())
        async with session.wait_for(page.LoadEventFired):
            await session.execute(page.navigate(sys.argv[2]))

        logger.info('Extracting page title')
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, 'title'))
        html = await session.execute(dom.get_outer_html(title_node_id))
        print(html)
Exemple #4
0
async def save_pdf(browserurl, targeturl, pdfpath, sleeptime):
    """
    make a pdf from a webpage
    originally https://github.com/HyperionGray/trio-chrome-devtools-protocol/blob/master/examples/screenshot.py

    Parameters

    browserurl: str
        ws address for chrome developer protocol commands

    targeturl: str
        url of page to print to pdf

    pngfile: str
        filename for png file
    """
    logger.info("Connecting to browser: %s", browserurl)
    async with open_cdp(browserurl) as conn:
        logger.info("Listing targets")
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info("Attaching to target id=%s", target_id)
        async with conn.open_session(target_id) as session:
            logger.info("Setting device emulation")
            await session.execute(
                emulation.set_device_metrics_override(
                    width=1200, height=2000, device_scale_factor=1, mobile=False
                )
            )

            logger.info("Enabling page events")
            await session.execute(page.enable())

            logger.info("Navigating to %s", targeturl)
            async with session.wait_for(page.LoadEventFired):
                await session.execute(page.navigate(url=targeturl))

            time.sleep(sleeptime)
            root_node = await session.execute(dom.get_document())
            title_node_id = await session.execute(
                dom.query_selector(root_node.node_id, "body")
            )
            body_html = await session.execute(dom.get_outer_html(title_node_id))

            logger.debug(body_html)

            logger.info("Saving a pdf")
            # TODO: make sure that javascript finishes rendering
            # await session.execute(page.capture_screenshot(format="png"))
            pdf_data, _ = await session.execute(page.print_to_pdf())

            pdf_file = await trio.open_file(pdfpath, "wb")
            async with pdf_file:
                await pdf_file.write(b64decode(pdf_data))
            logger.info(f"wrote {pdfpath}")
Exemple #5
0
async def main():
    logger.info('Connecting to browser: %s', sys.argv[1])
    async with open_cdp_connection(sys.argv[1]) as conn:
        logger.info('Listing targets')
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info('Attaching to target id=%s', target_id)
        session = await conn.open_session(target_id)

        logger.info('Navigating to %s', sys.argv[2])
        await session.execute(page.enable())
        await session.execute(page.navigate(sys.argv[2]))
        event = await session.wait_for(page.LoadEventFired)

        logger.info('Extracting page title')
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, 'title'))
        html = await session.execute(dom.get_outer_html(title_node_id))
        print(html)