Ejemplo n.º 1
0
async def convert_page(session, path, reduction_code):
    urlpath = 'file://' + os.path.abspath(path)
    logger.info('Navigating to %s', urlpath)
    async with session.wait_for(page.LoadEventFired):
        await session.execute(page.navigate(url=urlpath))

    (_, exc) = await session.execute(runtime.evaluate(reduction_code))

    root_id = (await session.execute(dom.get_document())).node_id
    main_id = await session.execute(dom.query_selector(root_id, '#main'))

    target_item_id = await session.execute(dom.query_selector(main_id, 'h1'))
    target_item = await session.execute(dom.get_outer_html(target_item_id))
    content_html = await session.execute(dom.get_outer_html(main_id))
    return (target_item, content_html)
Ejemplo n.º 2
0
async def read_page(browserurl, targeturl):
    """
    read a page and get the title
    originally https://github.com/HyperionGray/trio-chrome-devtools-protocol/blob/master/examples/get_title.py
    """
    logger.info("Connecting to browser: %s", browserurl)
    async with open_cdp(browserurl) as conn:
        logger.info("Listing targets")
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info("Attaching to target id=%s", target_id)
        session = await conn.open_session(target_id)

        logger.info("Navigating to %s", targeturl)
        await session.execute(page.enable())
        async with session.wait_for(page.LoadEventFired):
            await session.execute(page.navigate(targeturl))
            _, item = await session.execute(page.print_to_pdf())
            print(f"pdf is: {type(item)}")

        logger.info("Extracting page title")
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, "title")
        )
        html = await session.execute(dom.get_outer_html(title_node_id))
Ejemplo n.º 3
0
async def main():
    logger.info('Connecting to browser: %s', sys.argv[1])
    async with open_cdp_connection(sys.argv[1]) as conn:
        logger.info('Listing targets')
        targets = await conn.execute(target.get_targets())

        for t in targets:
            if (t.type == 'page' and not t.url.startswith('devtools://')
                    and not t.attached):
                target_id = t.target_id
                break

        logger.info('Attaching to target id=%s', target_id)
        session = await conn.open_session(target_id)

        logger.info('Navigating to %s', sys.argv[2])
        await session.execute(page.enable())
        async with session.wait_for(page.LoadEventFired):
            await session.execute(page.navigate(sys.argv[2]))

        logger.info('Extracting page title')
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, 'title'))
        html = await session.execute(dom.get_outer_html(title_node_id))
        print(html)
Ejemplo n.º 4
0
async def save_pdf(browserurl, targeturl, pdfpath, sleeptime):
    """
    make a pdf from a webpage
    originally https://github.com/HyperionGray/trio-chrome-devtools-protocol/blob/master/examples/screenshot.py

    Parameters

    browserurl: str
        ws address for chrome developer protocol commands

    targeturl: str
        url of page to print to pdf

    pngfile: str
        filename for png file
    """
    logger.info("Connecting to browser: %s", browserurl)
    async with open_cdp(browserurl) as conn:
        logger.info("Listing targets")
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info("Attaching to target id=%s", target_id)
        async with conn.open_session(target_id) as session:
            logger.info("Setting device emulation")
            await session.execute(
                emulation.set_device_metrics_override(
                    width=1200, height=2000, device_scale_factor=1, mobile=False
                )
            )

            logger.info("Enabling page events")
            await session.execute(page.enable())

            logger.info("Navigating to %s", targeturl)
            async with session.wait_for(page.LoadEventFired):
                await session.execute(page.navigate(url=targeturl))

            time.sleep(sleeptime)
            root_node = await session.execute(dom.get_document())
            title_node_id = await session.execute(
                dom.query_selector(root_node.node_id, "body")
            )
            body_html = await session.execute(dom.get_outer_html(title_node_id))

            logger.debug(body_html)

            logger.info("Saving a pdf")
            # TODO: make sure that javascript finishes rendering
            # await session.execute(page.capture_screenshot(format="png"))
            pdf_data, _ = await session.execute(page.print_to_pdf())

            pdf_file = await trio.open_file(pdfpath, "wb")
            async with pdf_file:
                await pdf_file.write(b64decode(pdf_data))
            logger.info(f"wrote {pdfpath}")
Ejemplo n.º 5
0
async def test_session_execute(nursery, session_handler):
    ''' Connect a session and execute a command on it. '''
    server = await start_server(nursery, session_handler)

    async with open_cdp(server) as conn:
        session = await conn.connect_session(target.TargetID('target1'))
        assert session.session_id == 'session1'
        node_id = await session.execute(
            dom.query_selector(dom.NodeId(0), 'p.foo'))
        assert node_id == 1
Ejemplo n.º 6
0
async def merge_pages_in(ethox_doc, ws_addr, reduction_code):
    async with open_cdp_connection(ws_addr) as conn:
        logger.info('Listing targets')
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info('Attaching to target id=%s', target_id)
        session = await conn.open_session(target_id)

        logger.info('Setting device emulation')
        await session.execute(
            emulation.set_device_metrics_override(width=800,
                                                  height=600,
                                                  device_scale_factor=1,
                                                  mobile=False))

        logger.info('Enabling page events')
        await session.execute(page.enable())

        logger.info('Starting to crawl documentation')

        contents = {}
        for doc_page in glob.iglob(os.path.join(ethox_doc, '**', '*.html'),
                                   recursive=True):
            (target_item,
             main_contents) = await convert_page(session, doc_page,
                                                 reduction_code)
            contents[target_item] = main_contents

        await convert_page(session, os.path.join(ethox_doc, 'index.html'),
                           reduction_code)
        root_id = (await session.execute(dom.get_document())).node_id
        body_id = await session.execute(dom.query_selector(root_id, 'body'))
        footer_id = await session.execute(dom.query_selector(
            root_id, 'footer'))
        main_id = await session.execute(dom.query_selector(root_id, '#main'))

        for (_, contents) in contents.items():
            cloned_id = await session.execute(
                dom.copy_to(main_id, body_id, footer_id))
            await session.execute(dom.set_outer_html(cloned_id, contents))

        await print_page(session, 'cargo_doc.pdf')
Ejemplo n.º 7
0
async def test_session_execute(nursery):
    ''' Open a session and execute a command on it. '''
    async def handler(request):
        # It's tricky to catch exceptions from the server, so exceptions are
        # logged instead.
        try:
            ws = await request.accept()

            # Handle "attachToTarget" command.
            command = json.loads(await ws.get_message())
            assert command['method'] == 'Target.attachToTarget'
            assert command['params']['targetId'] == 'target1'
            logging.info('Server received:  %r', command)
            response = {
                'id': command['id'],
                'result': {
                    'sessionId': 'session1',
                }
            }
            logging.info('Server sending:  %r', response)
            await ws.send_message(json.dumps(response))

            # Handle "querySelector" command.
            command = json.loads(await ws.get_message())
            assert command['method'] == 'DOM.querySelector'
            assert command['sessionId'] == 'session1'
            assert command['params']['nodeId'] == 0
            assert command['params']['selector'] == 'p.foo'
            logging.info('Server received:  %r', command)
            response = {
                'id': command['id'],
                'sessionId': command['sessionId'],
                'result': {
                    'nodeId': 1,
                }
            }
            logging.info('Server sending:  %r', response)
            await ws.send_message(json.dumps(response))
        except Exception:
            logging.exception('Server exception')
    server = await start_server(nursery, handler)

    async with open_cdp_connection(server) as conn:
        session = await conn.open_session(target.TargetID('target1'))
        assert session.session_id == 'session1'
        node_id = await session.execute(
            dom.query_selector(dom.NodeId(0),'p.foo'))
        assert node_id == 1
Ejemplo n.º 8
0
async def main():
    logger.info('Connecting to browser: %s', sys.argv[1])
    async with open_cdp_connection(sys.argv[1]) as conn:
        logger.info('Listing targets')
        targets = await conn.execute(target.get_targets())
        target_id = targets[0].target_id

        logger.info('Attaching to target id=%s', target_id)
        session = await conn.open_session(target_id)

        logger.info('Navigating to %s', sys.argv[2])
        await session.execute(page.enable())
        await session.execute(page.navigate(sys.argv[2]))
        event = await session.wait_for(page.LoadEventFired)

        logger.info('Extracting page title')
        root_node = await session.execute(dom.get_document())
        title_node_id = await session.execute(
            dom.query_selector(root_node.node_id, 'title'))
        html = await session.execute(dom.get_outer_html(title_node_id))
        print(html)