Example #1
0
def test(page_graph, html, tab, headless):
    div_nodes = pg_find_html_element_node(page_graph, 'div', lambda page_graph, node: True)
    assert len(div_nodes) == 1

    div_node = div_nodes[0]

    div_create_nodes = pg_nodes_directly_leading_to_with_edge_type(
        page_graph, div_node, 'create node'
    )
    assert len(div_create_nodes) == 1

    data_uri_script_node = div_create_nodes[0]
    assert page_graph.nodes[data_uri_script_node]['node type'] == 'script'

    data_uri_script_execute_nodes = pg_nodes_directly_leading_to_with_edge_type(
        page_graph, data_uri_script_node, 'execute'
    )
    assert len(data_uri_script_execute_nodes) == 1

    data_uri_script_elem_node = data_uri_script_execute_nodes[0]
    assert page_graph.nodes[data_uri_script_elem_node]['node type'] == 'HTML element'
    assert page_graph.nodes[data_uri_script_elem_node]['tag name'] == 'script'

    data_uri_script_elem_attr_set_edges = pg_edges_directly_leading_to_with_edge_type(
        page_graph, data_uri_script_elem_node, 'set attribute'
    )
    assert len(data_uri_script_elem_attr_set_edges) == 1

    data_uri_script_elem_attr_set_edge = data_uri_script_elem_attr_set_edges[0]
    assert data_uri_script_elem_attr_set_edge['data']['key'] == 'src'
    assert data_uri_script_elem_attr_set_edge['data']['value'].startswith('data:')
    assert page_graph.nodes[data_uri_script_elem_attr_set_edge['from']]['node type'] == 'script'
Example #2
0
def test(page_graph, html, tab, headless):
    page_graph_nodes = page_graph.nodes(data=True)

    iframe_nodes = pg_find_html_element_node(
        page_graph, 'iframe',
        generate_html_element_id_selector('local_iframe'))
    assert len(iframe_nodes) == 1
    local_iframe_node = iframe_nodes[0]

    iframe_nodes = pg_find_html_element_node(
        page_graph, 'iframe',
        generate_html_element_id_selector('remote_iframe'))
    assert len(iframe_nodes) == 1
    remote_iframe_node = iframe_nodes[0]

    # Check successors of |local_iframe_node|.
    s = list(page_graph.successors(local_iframe_node))
    assert len(s) == 3
    # Check that we have the empty-frame DOM root that |local_iframe_node| is
    # initialized with.
    assert page_graph_nodes[s[0]]['node type'] == 'DOM root'
    assert page_graph_nodes[s[0]]['url'] == 'about:blank'
    # Check that we have the DOM root it gets when the local page loads.
    assert page_graph_nodes[s[1]]['node type'] == 'DOM root'
    assert page_graph_nodes[s[1]]['url'].endswith('/static_page.html')
    # Check that we have the remote frame it gets when the remote page loads
    # (after swapping frame srcs).
    assert page_graph_nodes[s[2]]['node type'] == 'remote frame'
    assert page_graph_nodes[s[2]]['url'] == 'https://whatwg.org/'

    # Check successors of |remote_iframe_node|.
    s = list(page_graph.successors(remote_iframe_node))
    assert len(s) == 3
    # Check that we have the empty-frame DOM root that |remote_iframe_node| is
    # initialized with.
    assert page_graph_nodes[s[0]]['node type'] == 'DOM root'
    assert page_graph_nodes[s[0]]['url'] == 'about:blank'
    # Check that we have the remote frame it gets when the static page loads.
    assert page_graph_nodes[s[1]]['node type'] == 'remote frame'
    assert page_graph_nodes[s[1]]['url'] == 'https://whatwg.org/'
    # Check that we have the DOM root it gets when the local page loads (after
    # swapping frame srcs).
    assert page_graph_nodes[s[2]]['node type'] == 'DOM root'
    assert page_graph_nodes[s[2]]['url'].endswith('/static_page.html')
Example #3
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('document.cookie')
    )

    # should be exactly one script node with document.cookie
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    storage_node = pg_find_static_node(page_graph, 'cookie jar')

    script_successors = list(page_graph.successors(script_node))

    # should be exactly two successors to the script node
    # (the text node with the code, and the actual script)
    assert len(script_successors) == 2

    execute_script_node = script_successors[1]
    script_to_cookie_edges = pg_edges_data_from_to(page_graph, execute_script_node, storage_node)

    # should be exactly three edges script->cookie
    assert len(script_to_cookie_edges) == 3

    expected_structure_script_to_cookie = [
        {'key': 'awesomeKey', 'value': 'awesomeValue'},
        {'key': 'anotherKey', 'value': 'anotherValue'},
        {'key': 'http://localhost:8080/storage_cookie.html'},
    ]

    # verify the script->cookie set edges contain the correct data
    for i in range(2):
        assert script_to_cookie_edges[i]['key'] == expected_structure_script_to_cookie[i]['key']
        assert (
            script_to_cookie_edges[i]['value'] == expected_structure_script_to_cookie[i]['value']
        )

    # verify the script->cookie read call edge contain the correct data
    assert script_to_cookie_edges[2]['key'] == expected_structure_script_to_cookie[2]['key']

    cookie_to_script_edge = pg_edges_data_from_to(page_graph, storage_node, execute_script_node)

    # should be exactly one edge cookie->script
    assert len(cookie_to_script_edge) == 1

    expected_structure_cookie_to_script = [
        {
            'key': 'http://localhost:8080/storage_cookie.html',
            'value': 'awesomeKey=awesomeValue; anotherKey=anotherValue',
        }
    ]

    # check the structure...
    assert cookie_to_script_edge[0]['key'] == expected_structure_cookie_to_script[0]['key']
    assert cookie_to_script_edge[0]['value'] == expected_structure_cookie_to_script[0]['value']
Example #4
0
def test(page_graph, html, tab, headless):
    form = pg_find_html_element_node(page_graph, 'form',
                                     lambda page_graph, node: True)[0]
    expected_edge_type = 'add event listener'
    event_edge_in = [
        edge for edge in page_graph.in_edges(form, data=True)
        if pg_edge_data(edge, 'edge type') == expected_edge_type
    ]

    assert len(event_edge_in) == 1
    event_edge_in = event_edge_in[0]

    parser_node = list(page_graph.nodes)[0]
    assert parser_node == pg_edge_out(event_edge_in)
    assert form == pg_edge_in(event_edge_in)

    # check the edge from the form to the event handler
    expected_edge_type_out = 'event listener'
    event_edge_out = [
        edge for edge in page_graph.out_edges(form, data=True)
        if pg_edge_data(edge, 'edge type') == expected_edge_type_out
    ]
    assert len(event_edge_out) == 1
    event_edge_out = event_edge_out[0]

    event_function_node = pg_find_html_element_node(
        page_graph, 'script', lambda page_graph, node: True)[0]
    successors = list(page_graph.successors(event_function_node))
    assert len(successors) == 2  # the text node, and a script node
    # actual_function_node = successors[1]

    # form has an edge from itself to the event handler
    assert form == pg_edge_out(event_edge_out)
    # assert actual_function_node == pg_edge_in(event_edge_out)

    # it should be the same event listener id on the two edges
    assert pg_edge_data(event_edge_in, 'event listener id') == pg_edge_data(
        event_edge_out, 'event listener id')
Example #5
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('fetch'))
    assert len(script_nodes) == 1

    html_element = script_nodes[0]

    # there should be an execute edge to the actual script
    scripts = pg_nodes_directly_reachable_from_with_edge_type(
        page_graph, html_element, 'execute')
    assert len(scripts) == 1
    actual_script = scripts[0]

    # there should be a request start edge to a resource node,
    # as well as a request complete edge back
    all_edges_from_script = []
    all_edges_to_script = []
    for edge in page_graph.edges(data=True):
        if pg_edge_out(edge) == actual_script:
            all_edges_from_script.append(edge)
        elif pg_edge_in(edge) == actual_script:
            all_edges_to_script.append(edge)

    # we have on edge from the script...
    assert len(all_edges_from_script) == 1
    # ... and it should be a request start edge ...
    assert pg_edge_data(all_edges_from_script[0],
                        'edge type') == 'request start'
    # ... and the target should be a resource node.
    assert pg_get_node_data(
        page_graph, all_edges_from_script[0][1])['node type'] == 'resource'

    # we should have two edges coming to the script (execute and request complete)...
    assert len(all_edges_to_script) == 2
    for edge in all_edges_to_script:
        assert (pg_edge_data(edge, 'edge type') == 'execute'
                or pg_edge_data(edge, 'edge type') == 'request complete')

    # ... where the edge from the resource should be a request complete
    resource_node = all_edges_from_script[0][1]
    for edge in all_edges_to_script:
        if pg_edge_out(edge) == resource_node:
            assert pg_edge_data(edge, 'edge type') == 'request complete'
Example #6
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('screen.'))
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]
    # check so all the nodes directly reachable from the script goes to different screen nodes
    all_screen_nodes = pg_nodes_directly_reachable_from(
        page_graph, executing_node)
    assert len(all_screen_nodes) == 8

    # check the call edges
    for i in range(0, len(all_screen_nodes)):
        edges = pg_edges_data_from_to(page_graph, executing_node,
                                      all_screen_nodes[i])
        # should at most two call edges to each screen node (`colorDepth` and
        # `pixelDepth` are synonyms)
        assert len(edges) >= 1 and len(edges) <= 2
        edge = edges[0]
        # should be exactly 3 keys (type, id, timestamp)
        assert len(edge) == 3
        assert edge['edge type'] == 'js call'
        assert 'id' in edge and 'timestamp' in edge

    # check the result edges
    for i in range(0, len(all_screen_nodes)):
        edges = pg_edges_data_from_to(page_graph, all_screen_nodes[i],
                                      executing_node)
        # should be at most two result edges from each screen node
        assert len(edges) >= 1 and len(edges) <= 2
        edge = edges[0]
        # should be exactly 4 keys (type, id, timestamp, value)
        assert len(edge) == 4
        assert edge['edge type'] == 'js result'
        assert edge['value'] == str(int(edge['value']))
        assert 'id' in edge and 'timestamp' in edge
def test(page_graph, html, tab, headless):
    page_graph_nodes = page_graph.nodes(data=True)
    id_mapping = pg_node_id_mapping(page_graph)

    link_nodes = pg_find_html_element_node(page_graph, 'link')
    assert len(link_nodes) == 1
    html_link_node = link_nodes[0]

    # Check the successors of |html_link_node|.
    successors = list(page_graph.successors(html_link_node))
    assert len(successors) == 1
    html_link_node_checks = {
        'css': [
            lambda x: 'node type' in page_graph_nodes[x] and page_graph_nodes[
                x]['node type'] == 'resource' and 'url' in page_graph_nodes[x]
            and page_graph_nodes[x]['url'].endswith('css_image.css'),
            None,
        ]
    }
    pg_node_check_successors(page_graph, html_link_node, html_link_node_checks)

    # Check the image request resulting from CSS.
    res_nodes = pg_find_node(
        page_graph,
        'resource',
        selector=lambda pg, n: 'url' in page_graph_nodes[n] and
        page_graph_nodes[n]['url'].endswith('css_image.png'),
    )
    assert len(res_nodes) == 1
    img_res_node = res_nodes[0]

    ## Check predecessors of |img_res_node| (should be only parser).
    img_res_node_checks = {
        'parser':
        [lambda x: page_graph_nodes[x]['node type'] == 'parser', None]
    }
    pg_node_check_predecessors(page_graph, img_res_node, img_res_node_checks)
Example #8
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('webgl'))

    assert len(script_nodes) == 1

    script_node = script_nodes[0]

    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]
    all_webgl_nodes = pg_nodes_directly_reachable_from(page_graph,
                                                       executing_node)
    all_nodes_unique = sorted(set(all_webgl_nodes))
    # length should be 4 (1 is getContext, the rest are the 3 different webgl functions)
    assert len(all_nodes_unique) == 4

    shader_args = [
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
    ]

    get_parameter_args = [
        'gl.ALIASED_LINE_WIDTH_RANGE',
        'gl.ALIASED_POINT_SIZE_RANGE',
        'gl.ALPHA_BITS',
        'gl.BLUE_BITS',
        'gl.DEPTH_BITS',
        'gl.GREEN_BITS',
        'gl.MAX_COMBINED_TEXTURE_IMAGE_UNITS',
        'gl.MAX_CUBE_MAP_TEXTURE_SIZE',
        'gl.MAX_FRAGMENT_UNIFORM_VECTORS',
        'gl.MAX_RENDERBUFFER_SIZE',
        'gl.MAX_TEXTURE_IMAGE_UNITS',
        'gl.MAX_TEXTURE_SIZE',
        'gl.MAX_VARYING_VECTORS',
        'gl.MAX_VERTEX_ATTRIBS',
        'gl.MAX_VERTEX_TEXTURE_IMAGE_UNITS',
        'gl.MAX_VERTEX_UNIFORM_VECTORS',
        'gl.MAX_VIEWPORT_DIMS',
        'gl.RED_BITS',
        'gl.RENDERER',
        'gl.SHADING_LANGUAGE_VERSION',
        'gl.STENCIL_BITS',
        'gl.VENDOR',
        'gl.VERSION',
        'ext.UNMASKED_VENDOR_WEBGL',
        'ext.UNMASKED_RENDERER_WEBGL',
        'ext.MAX_TEXTURE_MAX_ANISOTROPY_EXT',
    ]

    get_extension_args = [
        'WEBGL_debug_renderer_info', 'EXT_texture_filter_anisotropic'
    ]

    for node in all_nodes_unique:
        edges = pg_edges_data_from_to(page_graph, executing_node, node)
        if len(edges) == 2:
            # this can be either getExtension or getParameter to webgl2
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = get_extension_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del get_extension_args[pos]
        elif len(edges) == 36:
            # getShaderPrecisionFormat
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = shader_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del shader_args[pos]

        elif len(edges) == 1:
            # getContext
            assert edges[0]['edge type'] == 'js call'
            assert edges[0]['args'].startswith('webgl')
        elif len(edges) == 26:
            # getParameter to webgl
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = get_parameter_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del get_parameter_args[pos]
        else:
            # something went bad
            assert False

    # result edges...
    for node in all_nodes_unique:
        edges = pg_edges_data_from_to(page_graph, node, executing_node)
        if len(edges) == 2:
            # this can be either getExtension or getParameter to webgl2
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        elif len(edges) == 36:
            # getShaderPrecisionFormat
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        elif len(edges) == 1:
            # getContext
            assert edges[0]['edge type'] == 'js result'
        elif len(edges) == 26:
            # getParameter to webgl
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        else:
            # something went bad
            assert False
Example #9
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('sessionStorage'))

    # should be exactly one script node with document.sessionStorage
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    storage_node = pg_find_static_node(page_graph, 'session storage')

    script_successors = list(page_graph.successors(script_node))

    # should be exactly two successors to the script node
    # (the text node with the code, and the actual script)
    assert len(script_successors) == 2

    execute_script_node = script_successors[1]
    edges_script_to_session = pg_edges_data_from_to(page_graph,
                                                    execute_script_node,
                                                    storage_node)

    # should be exactly five edges with data
    assert len(edges_script_to_session) == 5

    expected_structure_script_to_session = [
        {
            'key': 'myCat',
            'value': 'Tom'
        },
        {
            'key': 'myMouse',
            'value': 'Jerry'
        },
        {
            'key': 'myCat'
        },
        {
            'key': 'myCat'
        },
        {},
    ]

    # verify the edges contain the correct data
    for i in range(len(edges_script_to_session)):
        # all but storage clear have defined keys
        if i != len(edges_script_to_session) - 1:
            assert (edges_script_to_session[i]['key'] ==
                    expected_structure_script_to_session[i]['key'])

        # set edges also have a value
        if i < 2:
            assert (edges_script_to_session[i]['value'] ==
                    expected_structure_script_to_session[i]['value'])

    # get the session->script edge(s)
    edges_session_to_script = pg_edges_data_from_to(page_graph, storage_node,
                                                    execute_script_node)

    # should be exactly one edge storage->script
    assert len(edges_session_to_script) == 1

    expected_structure_session_to_script = [{'key': 'myCat', 'value': 'Tom'}]

    # check the edge
    assert edges_session_to_script[0][
        'key'] == expected_structure_session_to_script[0]['key']
    assert edges_session_to_script[0][
        'value'] == expected_structure_session_to_script[0]['value']
Example #10
0
def test(page_graph, html, tab, headless):
    div_node = pg_find_html_element_node(page_graph, 'div',
                                         lambda page_graph, node: True)[0]

    relevant_edge_types = {'add event listener', 'remove event listener'}
    relevant_edges = [
        edge for edge in page_graph.in_edges(div_node, data=True)
        if pg_edge_data(edge, 'edge type') in relevant_edge_types
    ]
    relevant_edges.reverse()

    script_nodes = []
    script_ids = []

    event_listeners = {}

    def take_add_event_listener_edge(event_type):
        assert len(relevant_edges) > 0

        edge = relevant_edges.pop()
        assert pg_edge_data(edge, 'edge type') == 'add event listener'
        assert pg_edge_data(edge, 'key') == event_type

        event_listener_id = pg_edge_data(edge, 'event listener id')
        event_listener_script_id = pg_edge_data(edge, 'script id')
        assert event_listener_id not in event_listeners
        event_listeners[event_listener_id] = event_listener_script_id

        return edge

    def take_remove_event_listener_edge(event_type):
        assert len(relevant_edges) > 0

        edge = relevant_edges.pop()
        assert pg_edge_data(edge, 'edge type') == 'remove event listener'
        assert pg_edge_data(edge, 'key') == event_type

        event_listener_id = pg_edge_data(edge, 'event listener id')
        event_listener_script_id = pg_edge_data(edge, 'script id')
        assert event_listener_id in event_listeners
        assert event_listeners[event_listener_id] == event_listener_script_id
        del event_listeners[event_listener_id]

        return edge

    # Check the "add event listener" edges from the first script node.

    # addEventListener('click', foo):
    edge = take_add_event_listener_edge('click')
    script_nodes.append(pg_edge_out(edge))
    script_ids.append(page_graph.nodes[script_nodes[0]]['script id'])
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # A duplicate addEventListener('click', foo) call should be ignored here...

    # addEventListener('click', boundFoo):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', boundBoundFoo):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', bar):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', baz):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    assert len(event_listeners) == 5

    # Check the "remove event listener" edges from the first script node.

    # removeEventListener('click', foo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # A duplicate removeEventListener('click', foo) call should be ignored here...

    # removeEventListener('click', boundFoo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', boundBoundFoo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', bar):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', baz):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[0] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    assert len(event_listeners) == 0

    # Check the "add event listener" edges from the second script node.

    # addEventListener('click', foo):
    edge = take_add_event_listener_edge('click')
    script_nodes.append(pg_edge_out(edge))
    assert script_nodes.count(script_nodes[1]) == 1
    script_ids.append(page_graph.nodes[script_nodes[1]]['script id'])
    assert script_ids.count(script_ids[1]) == 1
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # A duplicate addEventListener('click', foo) call should be ignored here...

    # addEventListener('click', boundFoo):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', boundBoundFoo):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', boundBoundBoundFoo):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', bar):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # addEventListener('click', baz):
    edge = take_add_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    assert len(event_listeners) == 6

    # Check the "remove event listener" edges from the second script node.

    # removeEventListener('click', foo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # A duplicate removeEventListener('click', foo) call should be ignored here...

    # removeEventListener('click', boundFoo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', boundBoundFoo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', boundBoundBoundFoo):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', bar):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # removeEventListener('click', baz):
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[1] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    assert len(event_listeners) == 0

    # Check attribute event listener behavior for the third script node.

    # onclick = quux
    edge = take_add_event_listener_edge('click')
    script_nodes.append(pg_edge_out(edge))
    assert script_nodes.count(script_nodes[2]) == 1
    script_ids.append(page_graph.nodes[script_nodes[2]]['script id'])
    assert script_ids.count(script_ids[2]) == 1
    assert script_ids[2] == pg_edge_data(edge, 'script id')

    # onclick = foo (removing quux)
    edge = take_remove_event_listener_edge('click')
    assert script_nodes[2] == pg_edge_out(edge)
    assert script_ids[2] == pg_edge_data(edge, 'script id')

    # onclick = foo (adding foo)
    edge = take_add_event_listener_edge('click')
    assert script_nodes[2] == pg_edge_out(edge)
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # Check attribute event listener behavior for the fourth script node.

    # onclick = quux (removing foo)
    edge = take_remove_event_listener_edge('click')
    script_nodes.append(pg_edge_out(edge))
    assert script_nodes.count(script_nodes[3]) == 1
    script_ids.append(page_graph.nodes[script_nodes[3]]['script id'])
    assert script_ids.count(script_ids[3]) == 1
    assert script_ids[0] == pg_edge_data(edge, 'script id')

    # onclick = quux (adding quux)
    edge = take_add_event_listener_edge('click')
    assert script_nodes[3] == pg_edge_out(edge)
    assert script_ids[2] == pg_edge_data(edge, 'script id')
Example #11
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('navigator.'))
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]

    # check so all the nodes directly reachable from the script goes to different navigator nodes
    all_navigator_nodes = pg_nodes_directly_reachable_from(
        page_graph, executing_node)
    node_order = [
        'NavigatorID.userAgent',
        'NavigatorLanguage.language',
        'NavigatorLanguage.languages',
        'NavigatorPlugins.plugins',
        'Navigator.doNotTrack',
        'Navigator.cookieEnabled',
        'NavigatorID.platform',
    ]
    for i in range(0, len(all_navigator_nodes)):
        assert pg_get_node_data(
            page_graph, all_navigator_nodes[i])['method'] == node_order[i]

    # check the call edges
    for i in range(0, len(all_navigator_nodes)):
        edges = pg_edges_data_from_to(page_graph, executing_node,
                                      all_navigator_nodes[i])
        # should only be one call edge to each navigator node
        assert len(edges) == 1
        edge = edges[0]
        # should be exactly 3 keys (edge type, id and timestamp)
        assert len(edge) == 3
        assert edge['edge type'] == 'js call'
        assert 'id' in edge and 'timestamp' in edge

    # check the result edges
    result_validators = [
        lambda x: x.startswith('Mozilla/5.0 '),
        lambda x: x == 'en-US',
        lambda x: x.startswith('en-US'),
        None if headless else (lambda x: 'plugin' in x),
        None,
        lambda x: x == 'true',
        lambda x: x in ['MacIntel', 'Win32', 'Linux x86_64'],
    ]
    for i in range(0, len(all_navigator_nodes)):
        edges = pg_edges_data_from_to(page_graph, all_navigator_nodes[i],
                                      executing_node)
        # should only be one result edge from each navigator node
        assert len(edges) == 1
        edge = edges[0]
        assert edge['edge type'] == 'js result'
        if result_validators[i]:
            # should be exactly 4 keys (type, id, timestamp and value)
            assert len(edge) == 4
            assert result_validators[i](edge['value'])
            assert 'id' in edge and 'timestamp' in edge
            assert edge['edge type'] == 'js result'
        else:
            # should be exactly 3 keys (type, id, timestamp), since we didn't return a value
            assert len(edge) == 3
            assert 'id' in edge and 'timestamp' in edge
            assert edge['edge type'] == 'js result'
def test(page_graph, html, tab, headless):
    page_graph_nodes = page_graph.nodes(data=True)
    id_mapping = pg_node_id_mapping(page_graph)

    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('eval("var script = ')
    )

    assert len(script_nodes) == 1
    html_script_node = script_nodes[0]

    script_nodes = pg_find_html_element_node(
        page_graph,
        'script',
        generate_script_text_selector('var title = document.getElementById', exclude_text='eval'),
    )
    assert len(script_nodes) == 1
    eval_script_node = script_nodes[0]

    # Check predecessors of |html_script_node|.
    predecessors = list(page_graph.predecessors(html_script_node))
    assert len(predecessors) == 2
    html_script_node_checks = {
        'parser': [lambda x: page_graph_nodes[x]['node type'] == 'parser', None],
        'body': [
            lambda x: page_graph_nodes[x]['node type'] == 'HTML element'
            and page_graph_nodes[x]['tag name'] == 'body',
            None,
        ],
    }
    pg_node_check_predecessors(page_graph, html_script_node, html_script_node_checks)
    assert html_script_node_checks['parser'][1] is not None
    assert html_script_node_checks['body'][1] is not None

    # Check predecessors of |eval_script_node|

    # TODO: Check other edges of |html_script_node|'s script node.
    def check_script_actor(pn):
        if (
            page_graph_nodes[pn]['node type'] == 'script'
            and list(page_graph.predecessors(pn))[0] == html_script_node
        ):
            # Check that |eval_script_node|'s text node was
            # only inserted once.
            for n in page_graph.successors(eval_script_node):
                if page_graph_nodes[n]['node type'] == 'text node':
                    eval_script_text_node = n
                    break
            insert_edges = []
            for e, d in page_graph[pn][eval_script_text_node].items():
                if d['edge type'] != 'insert node':
                    continue
                parent_id = d['parent']
                assert parent_id in id_mapping
                p = id_mapping[parent_id]
                if (
                    page_graph_nodes[p]['node type'] == 'HTML element'
                    and page_graph_nodes[p]['tag name'] == '#document-fragment'
                ):
                    continue
                insert_edges.append(e)
            if len(insert_edges) == 1:
                return True
        return False

    predecessors = list(page_graph.predecessors(eval_script_node))
    assert len(predecessors) == 2
    eval_script_node_checks = {
        'body': [
            lambda x: page_graph_nodes[x]['node type'] == 'HTML element'
            and page_graph_nodes[x]['tag name'] == 'body',
            None,
        ],
        'script_actor': [check_script_actor, None],
    }
    pg_node_check_predecessors(page_graph, eval_script_node, eval_script_node_checks)
    assert eval_script_node_checks['body'][1] is not None
    assert eval_script_node_checks['script_actor'][1] is not None

    # Check successors of |eval_script_node|.
    assert len(page_graph[eval_script_node]) == 2
    eval_script_node_checks = {
        'text node': [lambda x: page_graph_nodes[x]['node type'] == 'text node', None],
        'script': [lambda x: page_graph_nodes[x]['node type'] == 'script', None],
    }
    pg_node_check_successors(page_graph, eval_script_node, eval_script_node_checks)
    script_text_node = eval_script_node_checks['text node'][1]
    script_node = eval_script_node_checks['script'][1]
    assert script_text_node is not None
    assert script_node is not None

    # Check the successors of |script_node|, i.e., |eval_script_node|'s
    # script node..
    assert len(page_graph[script_node]) == 1
    script_node_checks = {'heading': [lambda x: page_graph_nodes[x]['text'] == 'Big Title', None]}
    pg_node_check_successors(page_graph, script_node, script_node_checks)
    assert script_node_checks['heading'][1] is not None