Example #1
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('document.cookie')
    )

    # should be exactly one script node with document.cookie
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    storage_node = pg_find_static_node(page_graph, 'cookie jar')

    script_successors = list(page_graph.successors(script_node))

    # should be exactly two successors to the script node
    # (the text node with the code, and the actual script)
    assert len(script_successors) == 2

    execute_script_node = script_successors[1]
    script_to_cookie_edges = pg_edges_data_from_to(page_graph, execute_script_node, storage_node)

    # should be exactly three edges script->cookie
    assert len(script_to_cookie_edges) == 3

    expected_structure_script_to_cookie = [
        {'key': 'awesomeKey', 'value': 'awesomeValue'},
        {'key': 'anotherKey', 'value': 'anotherValue'},
        {'key': 'http://localhost:8080/storage_cookie.html'},
    ]

    # verify the script->cookie set edges contain the correct data
    for i in range(2):
        assert script_to_cookie_edges[i]['key'] == expected_structure_script_to_cookie[i]['key']
        assert (
            script_to_cookie_edges[i]['value'] == expected_structure_script_to_cookie[i]['value']
        )

    # verify the script->cookie read call edge contain the correct data
    assert script_to_cookie_edges[2]['key'] == expected_structure_script_to_cookie[2]['key']

    cookie_to_script_edge = pg_edges_data_from_to(page_graph, storage_node, execute_script_node)

    # should be exactly one edge cookie->script
    assert len(cookie_to_script_edge) == 1

    expected_structure_cookie_to_script = [
        {
            'key': 'http://localhost:8080/storage_cookie.html',
            'value': 'awesomeKey=awesomeValue; anotherKey=anotherValue',
        }
    ]

    # check the structure...
    assert cookie_to_script_edge[0]['key'] == expected_structure_cookie_to_script[0]['key']
    assert cookie_to_script_edge[0]['value'] == expected_structure_cookie_to_script[0]['value']
Example #2
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('fetch'))
    assert len(script_nodes) == 1

    html_element = script_nodes[0]

    # there should be an execute edge to the actual script
    scripts = pg_nodes_directly_reachable_from_with_edge_type(
        page_graph, html_element, 'execute')
    assert len(scripts) == 1
    actual_script = scripts[0]

    # there should be a request start edge to a resource node,
    # as well as a request complete edge back
    all_edges_from_script = []
    all_edges_to_script = []
    for edge in page_graph.edges(data=True):
        if pg_edge_out(edge) == actual_script:
            all_edges_from_script.append(edge)
        elif pg_edge_in(edge) == actual_script:
            all_edges_to_script.append(edge)

    # we have on edge from the script...
    assert len(all_edges_from_script) == 1
    # ... and it should be a request start edge ...
    assert pg_edge_data(all_edges_from_script[0],
                        'edge type') == 'request start'
    # ... and the target should be a resource node.
    assert pg_get_node_data(
        page_graph, all_edges_from_script[0][1])['node type'] == 'resource'

    # we should have two edges coming to the script (execute and request complete)...
    assert len(all_edges_to_script) == 2
    for edge in all_edges_to_script:
        assert (pg_edge_data(edge, 'edge type') == 'execute'
                or pg_edge_data(edge, 'edge type') == 'request complete')

    # ... where the edge from the resource should be a request complete
    resource_node = all_edges_from_script[0][1]
    for edge in all_edges_to_script:
        if pg_edge_out(edge) == resource_node:
            assert pg_edge_data(edge, 'edge type') == 'request complete'
Example #3
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('screen.'))
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]
    # check so all the nodes directly reachable from the script goes to different screen nodes
    all_screen_nodes = pg_nodes_directly_reachable_from(
        page_graph, executing_node)
    assert len(all_screen_nodes) == 8

    # check the call edges
    for i in range(0, len(all_screen_nodes)):
        edges = pg_edges_data_from_to(page_graph, executing_node,
                                      all_screen_nodes[i])
        # should at most two call edges to each screen node (`colorDepth` and
        # `pixelDepth` are synonyms)
        assert len(edges) >= 1 and len(edges) <= 2
        edge = edges[0]
        # should be exactly 3 keys (type, id, timestamp)
        assert len(edge) == 3
        assert edge['edge type'] == 'js call'
        assert 'id' in edge and 'timestamp' in edge

    # check the result edges
    for i in range(0, len(all_screen_nodes)):
        edges = pg_edges_data_from_to(page_graph, all_screen_nodes[i],
                                      executing_node)
        # should be at most two result edges from each screen node
        assert len(edges) >= 1 and len(edges) <= 2
        edge = edges[0]
        # should be exactly 4 keys (type, id, timestamp, value)
        assert len(edge) == 4
        assert edge['edge type'] == 'js result'
        assert edge['value'] == str(int(edge['value']))
        assert 'id' in edge and 'timestamp' in edge
Example #4
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('webgl'))

    assert len(script_nodes) == 1

    script_node = script_nodes[0]

    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]
    all_webgl_nodes = pg_nodes_directly_reachable_from(page_graph,
                                                       executing_node)
    all_nodes_unique = sorted(set(all_webgl_nodes))
    # length should be 4 (1 is getContext, the rest are the 3 different webgl functions)
    assert len(all_nodes_unique) == 4

    shader_args = [
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.MEDIUM_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.HIGH_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.FRAGMENT_SHADER, gl.LOW_FLOAT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.HIGH_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.MEDIUM_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.VERTEX_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.HIGH_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.MEDIUM_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
        'gl.FRAGMENT_SHADER, gl.LOW_INT',
    ]

    get_parameter_args = [
        'gl.ALIASED_LINE_WIDTH_RANGE',
        'gl.ALIASED_POINT_SIZE_RANGE',
        'gl.ALPHA_BITS',
        'gl.BLUE_BITS',
        'gl.DEPTH_BITS',
        'gl.GREEN_BITS',
        'gl.MAX_COMBINED_TEXTURE_IMAGE_UNITS',
        'gl.MAX_CUBE_MAP_TEXTURE_SIZE',
        'gl.MAX_FRAGMENT_UNIFORM_VECTORS',
        'gl.MAX_RENDERBUFFER_SIZE',
        'gl.MAX_TEXTURE_IMAGE_UNITS',
        'gl.MAX_TEXTURE_SIZE',
        'gl.MAX_VARYING_VECTORS',
        'gl.MAX_VERTEX_ATTRIBS',
        'gl.MAX_VERTEX_TEXTURE_IMAGE_UNITS',
        'gl.MAX_VERTEX_UNIFORM_VECTORS',
        'gl.MAX_VIEWPORT_DIMS',
        'gl.RED_BITS',
        'gl.RENDERER',
        'gl.SHADING_LANGUAGE_VERSION',
        'gl.STENCIL_BITS',
        'gl.VENDOR',
        'gl.VERSION',
        'ext.UNMASKED_VENDOR_WEBGL',
        'ext.UNMASKED_RENDERER_WEBGL',
        'ext.MAX_TEXTURE_MAX_ANISOTROPY_EXT',
    ]

    get_extension_args = [
        'WEBGL_debug_renderer_info', 'EXT_texture_filter_anisotropic'
    ]

    for node in all_nodes_unique:
        edges = pg_edges_data_from_to(page_graph, executing_node, node)
        if len(edges) == 2:
            # this can be either getExtension or getParameter to webgl2
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = get_extension_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del get_extension_args[pos]
        elif len(edges) == 36:
            # getShaderPrecisionFormat
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = shader_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del shader_args[pos]

        elif len(edges) == 1:
            # getContext
            assert edges[0]['edge type'] == 'js call'
            assert edges[0]['args'].startswith('webgl')
        elif len(edges) == 26:
            # getParameter to webgl
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js call'
                try:
                    pos = get_parameter_args.index(edges[i]['args'])
                except ValueError:
                    assert False

                del get_parameter_args[pos]
        else:
            # something went bad
            assert False

    # result edges...
    for node in all_nodes_unique:
        edges = pg_edges_data_from_to(page_graph, node, executing_node)
        if len(edges) == 2:
            # this can be either getExtension or getParameter to webgl2
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        elif len(edges) == 36:
            # getShaderPrecisionFormat
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        elif len(edges) == 1:
            # getContext
            assert edges[0]['edge type'] == 'js result'
        elif len(edges) == 26:
            # getParameter to webgl
            for i in range(0, len(edges)):
                assert edges[i]['edge type'] == 'js result'
        else:
            # something went bad
            assert False
Example #5
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('sessionStorage'))

    # should be exactly one script node with document.sessionStorage
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    storage_node = pg_find_static_node(page_graph, 'session storage')

    script_successors = list(page_graph.successors(script_node))

    # should be exactly two successors to the script node
    # (the text node with the code, and the actual script)
    assert len(script_successors) == 2

    execute_script_node = script_successors[1]
    edges_script_to_session = pg_edges_data_from_to(page_graph,
                                                    execute_script_node,
                                                    storage_node)

    # should be exactly five edges with data
    assert len(edges_script_to_session) == 5

    expected_structure_script_to_session = [
        {
            'key': 'myCat',
            'value': 'Tom'
        },
        {
            'key': 'myMouse',
            'value': 'Jerry'
        },
        {
            'key': 'myCat'
        },
        {
            'key': 'myCat'
        },
        {},
    ]

    # verify the edges contain the correct data
    for i in range(len(edges_script_to_session)):
        # all but storage clear have defined keys
        if i != len(edges_script_to_session) - 1:
            assert (edges_script_to_session[i]['key'] ==
                    expected_structure_script_to_session[i]['key'])

        # set edges also have a value
        if i < 2:
            assert (edges_script_to_session[i]['value'] ==
                    expected_structure_script_to_session[i]['value'])

    # get the session->script edge(s)
    edges_session_to_script = pg_edges_data_from_to(page_graph, storage_node,
                                                    execute_script_node)

    # should be exactly one edge storage->script
    assert len(edges_session_to_script) == 1

    expected_structure_session_to_script = [{'key': 'myCat', 'value': 'Tom'}]

    # check the edge
    assert edges_session_to_script[0][
        'key'] == expected_structure_session_to_script[0]['key']
    assert edges_session_to_script[0][
        'value'] == expected_structure_session_to_script[0]['value']
Example #6
0
def test(page_graph, html, tab, headless):
    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('navigator.'))
    assert len(script_nodes) == 1

    script_node = script_nodes[0]
    successors = list(page_graph.successors(script_node))
    assert len(successors) == 2  # since we are an inline script tag

    executing_node = successors[1]

    # check so all the nodes directly reachable from the script goes to different navigator nodes
    all_navigator_nodes = pg_nodes_directly_reachable_from(
        page_graph, executing_node)
    node_order = [
        'NavigatorID.userAgent',
        'NavigatorLanguage.language',
        'NavigatorLanguage.languages',
        'NavigatorPlugins.plugins',
        'Navigator.doNotTrack',
        'Navigator.cookieEnabled',
        'NavigatorID.platform',
    ]
    for i in range(0, len(all_navigator_nodes)):
        assert pg_get_node_data(
            page_graph, all_navigator_nodes[i])['method'] == node_order[i]

    # check the call edges
    for i in range(0, len(all_navigator_nodes)):
        edges = pg_edges_data_from_to(page_graph, executing_node,
                                      all_navigator_nodes[i])
        # should only be one call edge to each navigator node
        assert len(edges) == 1
        edge = edges[0]
        # should be exactly 3 keys (edge type, id and timestamp)
        assert len(edge) == 3
        assert edge['edge type'] == 'js call'
        assert 'id' in edge and 'timestamp' in edge

    # check the result edges
    result_validators = [
        lambda x: x.startswith('Mozilla/5.0 '),
        lambda x: x == 'en-US',
        lambda x: x.startswith('en-US'),
        None if headless else (lambda x: 'plugin' in x),
        None,
        lambda x: x == 'true',
        lambda x: x in ['MacIntel', 'Win32', 'Linux x86_64'],
    ]
    for i in range(0, len(all_navigator_nodes)):
        edges = pg_edges_data_from_to(page_graph, all_navigator_nodes[i],
                                      executing_node)
        # should only be one result edge from each navigator node
        assert len(edges) == 1
        edge = edges[0]
        assert edge['edge type'] == 'js result'
        if result_validators[i]:
            # should be exactly 4 keys (type, id, timestamp and value)
            assert len(edge) == 4
            assert result_validators[i](edge['value'])
            assert 'id' in edge and 'timestamp' in edge
            assert edge['edge type'] == 'js result'
        else:
            # should be exactly 3 keys (type, id, timestamp), since we didn't return a value
            assert len(edge) == 3
            assert 'id' in edge and 'timestamp' in edge
            assert edge['edge type'] == 'js result'
def test(page_graph, html, tab, headless):
    page_graph_nodes = page_graph.nodes(data=True)
    id_mapping = pg_node_id_mapping(page_graph)

    script_nodes = pg_find_html_element_node(
        page_graph, 'script', generate_script_text_selector('eval("var script = ')
    )

    assert len(script_nodes) == 1
    html_script_node = script_nodes[0]

    script_nodes = pg_find_html_element_node(
        page_graph,
        'script',
        generate_script_text_selector('var title = document.getElementById', exclude_text='eval'),
    )
    assert len(script_nodes) == 1
    eval_script_node = script_nodes[0]

    # Check predecessors of |html_script_node|.
    predecessors = list(page_graph.predecessors(html_script_node))
    assert len(predecessors) == 2
    html_script_node_checks = {
        'parser': [lambda x: page_graph_nodes[x]['node type'] == 'parser', None],
        'body': [
            lambda x: page_graph_nodes[x]['node type'] == 'HTML element'
            and page_graph_nodes[x]['tag name'] == 'body',
            None,
        ],
    }
    pg_node_check_predecessors(page_graph, html_script_node, html_script_node_checks)
    assert html_script_node_checks['parser'][1] is not None
    assert html_script_node_checks['body'][1] is not None

    # Check predecessors of |eval_script_node|

    # TODO: Check other edges of |html_script_node|'s script node.
    def check_script_actor(pn):
        if (
            page_graph_nodes[pn]['node type'] == 'script'
            and list(page_graph.predecessors(pn))[0] == html_script_node
        ):
            # Check that |eval_script_node|'s text node was
            # only inserted once.
            for n in page_graph.successors(eval_script_node):
                if page_graph_nodes[n]['node type'] == 'text node':
                    eval_script_text_node = n
                    break
            insert_edges = []
            for e, d in page_graph[pn][eval_script_text_node].items():
                if d['edge type'] != 'insert node':
                    continue
                parent_id = d['parent']
                assert parent_id in id_mapping
                p = id_mapping[parent_id]
                if (
                    page_graph_nodes[p]['node type'] == 'HTML element'
                    and page_graph_nodes[p]['tag name'] == '#document-fragment'
                ):
                    continue
                insert_edges.append(e)
            if len(insert_edges) == 1:
                return True
        return False

    predecessors = list(page_graph.predecessors(eval_script_node))
    assert len(predecessors) == 2
    eval_script_node_checks = {
        'body': [
            lambda x: page_graph_nodes[x]['node type'] == 'HTML element'
            and page_graph_nodes[x]['tag name'] == 'body',
            None,
        ],
        'script_actor': [check_script_actor, None],
    }
    pg_node_check_predecessors(page_graph, eval_script_node, eval_script_node_checks)
    assert eval_script_node_checks['body'][1] is not None
    assert eval_script_node_checks['script_actor'][1] is not None

    # Check successors of |eval_script_node|.
    assert len(page_graph[eval_script_node]) == 2
    eval_script_node_checks = {
        'text node': [lambda x: page_graph_nodes[x]['node type'] == 'text node', None],
        'script': [lambda x: page_graph_nodes[x]['node type'] == 'script', None],
    }
    pg_node_check_successors(page_graph, eval_script_node, eval_script_node_checks)
    script_text_node = eval_script_node_checks['text node'][1]
    script_node = eval_script_node_checks['script'][1]
    assert script_text_node is not None
    assert script_node is not None

    # Check the successors of |script_node|, i.e., |eval_script_node|'s
    # script node..
    assert len(page_graph[script_node]) == 1
    script_node_checks = {'heading': [lambda x: page_graph_nodes[x]['text'] == 'Big Title', None]}
    pg_node_check_successors(page_graph, script_node, script_node_checks)
    assert script_node_checks['heading'][1] is not None