Exemple #1
0
    def from_dict(policy_dict: dict) -> "Policy":
        """
        Returns a Policy instantiated from a simple dictionary of source URLs to lists of push URLs. The returned
        Policy will only be useful for display purposes, as the action space will be instantiated with an empty
        list of push groups.
        """
        policy = Policy()
        for ptype, policy_obj in policy_dict.items():
            if ptype not in {"push", "preload"}:
                continue

            action_set = policy.source_to_push if ptype == "push" else policy.source_to_preload
            reverse_map = policy.push_to_source if ptype == "push" else policy.preload_to_source

            for (source, deps) in policy_obj.items():
                action_set[Resource(
                    url=source, size=0, type=ResourceType.NONE)] = set(
                        Resource(url=push["url"],
                                 size=0,
                                 type=ResourceType[push["type"]])
                        for push in deps)
                for obj in deps:
                    reverse_map[Resource(
                        url=obj["url"], size=0,
                        type=ResourceType[obj["type"]])] = Resource(
                            url=source, size=0, type=ResourceType.NONE)
        policy.steps_taken += sum(map(len, policy.source_to_push.values()))
        policy.steps_taken += sum(map(len, policy.source_to_preload.values()))
        return policy
Exemple #2
0
def har_entries_to_resources(har: Har) -> List[Resource]:
    """ Converts a list of HAR entries to a list of Resources """
    har_entries = har.log.entries

    # filter only entries that are requests for http(s) resources
    har_entries = [entry for entry in har_entries if entry.request.url.startswith("http")]
    # filter only entries for requests that completed
    har_entries = [entry for entry in har_entries if entry.response.status != 0]
    # sort the requests by initiated time
    har_entries = sorted(har_entries, key=lambda e: e.started_date_time)
    # select unique entries in case the same URL shows up twice
    har_entries = ordered_uniq(har_entries, key=lambda e: e.request.url)

    resource_list = []
    for (order, entry) in enumerate(har_entries):
        resource_list.append(
            Resource(
                url=entry.request.url,
                size=max(entry.response.body_size, 0) + max(entry.response.headers_size, 0),
                type=get_har_entry_type(entry),
                order=order,
                source_id=order,
                critical=entry.critical,
            )
        )

    return compute_parent_child_relationships(resource_list, har.timings)
Exemple #3
0
def compute_parent_child_relationships(res_list: List[Resource], timings: Dict[str, Timing]) -> List[Resource]:
    """
    Returns a new, ordered list of resources with parent-child relationships given the passed-in
    timing information. The input list is assumed to be ordered
    """
    # pre-map entry URL to its order
    order_map = {res.url: res.order for res in res_list}
    new_res_list = []
    for res in res_list:
        timing = timings.get(res.url, None)
        parent = order_map.get(timing.initiator, 0) if timing else 0

        new_res_list.append(
            Resource(
                url=res.url,
                type=res.type,
                size=res.size,
                order=res.order,
                group_id=res.group_id,
                source_id=res.source_id,
                initiator=parent,
                execution_ms=timing.execution_ms if timing else 0,
                fetch_delay_ms=timing.fetch_delay_ms if timing else 0,
                time_to_first_byte_ms=timing.time_to_first_byte_ms if timing else 0,
                critical=res.critical,
            )
        )

    return new_res_list
Exemple #4
0
def resource_list_to_push_groups(res_list: List[Resource],
                                 train_domain_globs=None) -> List[PushGroup]:
    """ Convert an ordered list of resources to a list of PushGroups """

    # extract the list of domains and sort
    domains = sorted(list(set(Url.parse(res.url).domain for res in res_list)))
    # map domain to push group
    domain_to_push_group = {domain: i for (i, domain) in enumerate(domains)}
    # create the push groups
    is_trainable = lambda d: not train_domain_globs or any(
        map(pathlib.PurePath(d).match, train_domain_globs))
    trainable_domains = set(domain for domain in domains
                            if is_trainable(domain))
    push_groups = [
        PushGroup(id=i,
                  name=domain,
                  resources=[],
                  trainable=(domain in trainable_domains))
        for (i, domain) in enumerate(domains)
    ]
    # map the old order to the new order so that the initiators can be translated in place
    old_to_new_order_map = {
        res.order: order
        for (order, res) in enumerate(res_list)
    }

    for (order, res) in enumerate(res_list):
        url = Url.parse(res.url)
        group_id = domain_to_push_group[url.domain]
        new_res = Resource(
            url=res.url,
            size=res.size,
            type=res.type,
            order=order,
            group_id=group_id,
            source_id=len(push_groups[group_id].resources),
            initiator=old_to_new_order_map[res.initiator],
            execution_ms=res.execution_ms,
            fetch_delay_ms=res.fetch_delay_ms,
            time_to_first_byte_ms=res.time_to_first_byte_ms,
            critical=res.critical,
        )
        push_groups[new_res.group_id].resources.append(new_res)

    return push_groups
Exemple #5
0
def find_url_stable_set(url: str, config: Config) -> List[Resource]:
    """
    Loads the given URL `STABLE_SET_NUM_RUNS` times back-to-back and records the HAR file
    generated by chrome. It then finds the common URLs across the page loads, computes their
    relative ordering, and returns a list of PushGroups for the webpage
    """
    log = logger.with_namespace("find_url_stable_set")
    hars: List[Har] = []
    resource_sets: List[Set[Resource]] = []
    pos_dict = collections.defaultdict(lambda: collections.defaultdict(int))
    for n in range(STABLE_SET_NUM_RUNS):
        log.debug("capturing HAR...", run=n + 1, url=url)
        har = capture_har_in_replay_server(url, config,
                                           get_default_client_environment())
        resource_list = har_entries_to_resources(har)
        if not resource_list:
            log.warn("no response received", run=n + 1)
            continue
        log.debug("received resources", total=len(resource_list))

        for i in range(len(resource_list)):  # pylint: disable=consider-using-enumerate
            for j in range(i + 1, len(resource_list)):
                pos_dict[resource_list[i].url][resource_list[j].url] += 1

        resource_sets.append(set(resource_list))
        hars.append(har)

    log.debug("resource set lengths",
              resource_lens=list(map(len, resource_sets)))
    if not resource_sets:
        return []

    common_res = list(set.intersection(*resource_sets))
    common_res.sort(key=functools.cmp_to_key(
        lambda a, b: -pos_dict[a.url][b.url] + (len(resource_sets) // 2)))

    # Hackily reorder the combined resource sets so that compute_parent_child_relationships works
    common_res = [
        Resource(**{
            **r._asdict(), "order": i
        }) for (i, r) in enumerate(common_res)
    ]
    return compute_parent_child_relationships(common_res, hars[0].timings)
Exemple #6
0
def create_resource(url):
    return Resource(url=url, size=1024, order=1, group_id=0, source_id=0, type=ResourceType.HTML)
Exemple #7
0
def get_push_groups() -> List[PushGroup]:
    return [
        PushGroup(
            id=0,
            name="example.com",
            trainable=True,
            resources=[
                Resource(
                    url="http://example.com/",
                    size=1024,
                    order=0,
                    group_id=0,
                    source_id=0,
                    initiator=0,
                    type=ResourceType.HTML,
                ),
                Resource(
                    url="http://example.com/A",
                    size=1024,
                    order=1,
                    group_id=0,
                    source_id=1,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
                Resource(
                    url="http://example.com/B",
                    size=1024,
                    order=5,
                    group_id=0,
                    source_id=2,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
                Resource(
                    url="http://example.com/C",
                    size=1024,
                    order=8,
                    group_id=0,
                    source_id=3,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
                Resource(
                    url="http://example.com/F",
                    size=1024,
                    order=12,
                    group_id=0,
                    source_id=4,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
            ],
        ),
        PushGroup(
            id=1,
            name="img.example.com",
            trainable=True,
            resources=[
                Resource(
                    url="http://img.example.com/D",
                    size=1024,
                    order=9,
                    group_id=1,
                    source_id=0,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
                Resource(
                    url="http://img.example.com/E",
                    size=1024,
                    order=11,
                    group_id=1,
                    source_id=1,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
                Resource(
                    url="http://img.example.com/G",
                    size=1024,
                    order=13,
                    group_id=1,
                    source_id=2,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
            ],
        ),
        PushGroup(
            id=2,
            name="serve.ads.googleads.com",
            trainable=False,
            resources=[
                Resource(
                    url="http://serve.ads.googleads.com/script.1.js",
                    size=1024,
                    order=4,
                    group_id=2,
                    source_id=0,
                    initiator=0,
                    type=ResourceType.SCRIPT,
                ),
                Resource(
                    url="http://serve.ads.googleads.com/script.2.js",
                    size=1024,
                    order=7,
                    group_id=2,
                    source_id=1,
                    initiator=4,
                    type=ResourceType.SCRIPT,
                ),
                Resource(
                    url="http://serve.ads.googleads.com/script.3.js",
                    size=1024,
                    order=10,
                    group_id=2,
                    source_id=2,
                    initiator=7,
                    type=ResourceType.SCRIPT,
                ),
            ],
        ),
        PushGroup(
            id=3,
            name="static.example.com",
            trainable=True,
            resources=[
                Resource(
                    url="http://static.example.com/script.js",
                    size=1024,
                    order=2,
                    group_id=3,
                    source_id=0,
                    initiator=1,
                    type=ResourceType.SCRIPT,
                ),
                Resource(
                    url="http://static.example.com/font.woff",
                    size=1024,
                    order=3,
                    group_id=3,
                    source_id=1,
                    initiator=2,
                    type=ResourceType.FONT,
                ),
                Resource(
                    url="http://static.example.com/image.jpg",
                    size=1024,
                    order=6,
                    group_id=3,
                    source_id=2,
                    initiator=0,
                    type=ResourceType.IMAGE,
                ),
            ],
        ),
    ]