def replay(args): """ Starts a replay environment for the given replay directory, including setting up interfaces, running a DNS server, and configuring and running an nginx server to serve the requests """ policy = None cert_path = os.path.abspath(args.cert_path) if args.cert_path else None key_path = os.path.abspath(args.key_path) if args.key_path else None per_resource_latency = os.path.abspath( args.per_resource_latency) if args.per_resource_latency else None if args.policy: log.debug("reading policy", push_policy=args.policy) with open(args.policy, "r") as policy_file: policy_dict = json.load(policy_file) policy = Policy.from_dict(policy_dict) # handle sigterm gracefully signal.signal(signal.SIGTERM, sigterm_handler) with start_server(args.replay_dir, cert_path, key_path, policy, per_resource_latency, cache_time=args.cache_time, extract_critical_requests=args.extract_critical_requests, enable_http2=args.enable_http2): while True: time.sleep(86400)
def replay(args): """ Starts a replay environment for the given replay directory, including setting up interfaces, running a DNS server, and configuring and running an nginx server to serve the requests """ policy = None cert_path = os.path.abspath(args.cert_path) if args.cert_path else None key_path = os.path.abspath(args.key_path) if args.key_path else None if args.policy: log.debug("reading policy", push_policy=args.policy) with open(args.policy, "r") as policy_file: policy_dict = json.load(policy_file) policy = Policy.from_dict(policy_dict) with start_server( args.replay_dir, cert_path, key_path, policy, cache_time=args.cache_time, extract_critical_requests=args.extract_critical_requests, ): while True: time.sleep(86400)
def record(args): """ Record a website using Mahimahi. Stores the recorded files in the specified directory. In order to use it with blaze, you must preprocess it using `blaze preprocess` to generate a training manifest. """ log.info("recording website", website=args.website, record_dir=args.record_dir) config = get_config() log.debug("using configuration", **config._asdict()) record_webpage(args.website, args.record_dir, config)
def _generator(env_config: EnvironmentConfig) -> Policy: push_groups = env_config.push_groups # Collect all resources and group them by type all_resources = sorted( [res for group in push_groups for res in group.resources], key=lambda res: res.order) res_by_type = collections.defaultdict(list) for res in all_resources: # Only consider non-cached objects in the push resource type distribution if res.type in dist and res.url not in cached_urls: res_by_type[res.type].append(res) # choose the number of resources to push/preload total = sum(map(len, res_by_type.values())) if total <= 1: return Policy() n = random.randint(1, total) # choose the weight factor between push and preload weight = push_weight if push_weight is not None else random.random() # Choose n resources based on the resource type distribution without replacement log.debug("generating push-preload policy", num_resources=len(all_resources), total_size=n, push_weight=weight) res = [] for _ in range(n): g, r, s = _choose_with_dist(res_by_type, dist) res_by_type[g].pop(r) res.append(s) policy = Policy() for r in res: if r.source_id == 0 or r.order == 0: continue push = random.random() < weight policy.steps_taken += 1 if push: source = random.randint(0, r.source_id - 1) policy.add_default_push_action( push_groups[r.group_id].resources[source], r) else: source = random.randint(0, r.order - 1) policy.add_default_preload_action(all_resources[source], r) return policy
def preprocess(args): """ Preprocesses a website for training. Automatically discovers linked pages up to a certain depth and finds the stable set of page dependencies. The page load is recorded and stored and a training manifest is outputted. """ domain = Url.parse(args.website).domain train_domain_globs = args.train_domain_globs or ["*{}*".format(domain)] log.info("preprocessing website", website=args.website, record_dir=args.record_dir, train_domain_globs=train_domain_globs) config = get_config(env_config=EnvironmentConfig( replay_dir=args.record_dir, request_url=args.website)) client_env = get_default_client_environment() log.debug("using configuration", **config._asdict()) log.info("capturing execution") har_resources = har_entries_to_resources( capture_har_in_replay_server(args.website, config, client_env)) log.info("finding dependency stable set...") res_list = find_url_stable_set(args.website, config) log.info("found total dependencies", total=len(res_list)) push_groups = resource_list_to_push_groups( res_list, train_domain_globs=train_domain_globs) if args.extract_critical_requests: log.info("extracting critical requests") push_groups = annotate_critical_requests(args.website, config, client_env, push_groups) critical_resources = set(res.url for group in push_groups for res in group.resources if res.critical) log.debug("critical resources", resources=critical_resources) log.info("finding cacheable objects") push_groups = annotate_cacheable_objects(args.record_dir, push_groups) log.info("generating configuration...") env_config = EnvironmentConfig(replay_dir=args.record_dir, request_url=args.website, push_groups=push_groups, har_resources=har_resources) env_config.save_file(args.output) log.info("successfully prepared website for training", output=args.output)
def page_load_time(args): """ Captures a webpage and calculates the median page load time for a given website in a fast, no-latency Mahimahi shell. Then simulates the load based on profiling the page in the same Mahimahi shell. """ # Validate the arguments if args.latency is not None and args.latency < 0: log.critical("provided latency must be greater or equal to 0") sys.exit(1) if args.bandwidth is not None and args.bandwidth <= 0: log.critical("provided bandwidth must be greater than 0") sys.exit(1) if args.cpu_slowdown is not None and args.cpu_slowdown not in {1, 2, 4}: log.critical("provided cpu slodown must be 1, 2, or 4") sys.exit(1) # Setup the client environment default_client_env = get_default_client_environment() client_env = get_client_environment_from_parameters( args.bandwidth or default_client_env.bandwidth, args.latency or default_client_env.latency, args.cpu_slowdown or default_client_env.cpu_slowdown, ) # If a push/preload policy was specified, read it policy = None if args.policy: log.debug("reading policy", push_policy=args.policy) with open(args.policy, "r") as policy_file: policy_dict = json.load(policy_file) policy = Policy.from_dict(policy_dict) env_config = EnvironmentConfig.load_file(args.from_manifest) config = get_config(env_config) log.info("calculating page load time", manifest=args.from_manifest, url=env_config.request_url) plt, orig_plt = 0, 0 if not args.only_simulator: if not args.speed_index: orig_plt, *_ = get_page_load_time_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, cache_time=args.cache_time, user_data_dir=args.user_data_dir, ) if policy: plt, *_ = get_page_load_time_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, policy=policy, cache_time=args.cache_time, user_data_dir=args.user_data_dir, ) else: orig_plt = get_speed_index_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, cache_time=args.cache_time, user_data_dir=args.user_data_dir, ) if policy: plt = get_speed_index_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, policy=policy, cache_time=args.cache_time, user_data_dir=args.user_data_dir, ) log.debug("running simulator...") sim = Simulator(env_config) orig_sim_plt = sim.simulate_load_time(client_env) sim_plt = sim.simulate_load_time(client_env, policy) print( json.dumps( { "client_env": client_env._asdict(), "metric": "speed_index" if args.speed_index else "plt", "cache": "warm" if args.user_data_dir else "cold", "cache_time": args.cache_time, "replay_server": { "with_policy": plt, "without_policy": orig_plt }, "simulator": { "with_policy": sim_plt, "without_policy": orig_sim_plt }, }, indent=4, ))
def _get_results_in_replay_server( config: Config, client_env: ClientEnvironment, iterations: int, max_retries: int, policy_generator: Callable[[EnvironmentConfig], Policy], cache_time: Optional[int] = None, user_data_dir: Optional[str] = None, speed_index: Optional[bool] = False, ) -> Tuple[float, List[float], List[Policy]]: log.debug("capturing median PLT in mahimahi with given environment") if not speed_index: orig_plt, *_ = get_page_load_time_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, cache_time=cache_time, user_data_dir=user_data_dir, ) else: orig_plt = get_speed_index_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, cache_time=cache_time, user_data_dir=user_data_dir, ) plts = [] policies = [] retries = 0 while retries <= max_retries and len(plts) < iterations: policy = policy_generator(config.env_config) log.debug("getting HAR in mahimahi with policy:") log.debug(json.dumps(policy.as_dict, indent=4)) try: if not speed_index: plt, *_ = get_page_load_time_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, policy=policy, cache_time=cache_time, user_data_dir=user_data_dir, ) else: plt = get_speed_index_in_replay_server( request_url=config.env_config.request_url, client_env=client_env, config=config, policy=policy, cache_time=cache_time, user_data_dir=user_data_dir, ) plts.append(plt) policies.append(policy) except (subprocess.CalledProcessError, subprocess.TimeoutExpired, ValueError, FileNotFoundError) as e: log.warn("replay_server failed:", i=len(plts), retries=retries, error=repr(e)) traceback.print_exc() retries += 1 return orig_plt, plts, policies
def evaluate(args): """ Instantiate the given model and checkpoint and query it for the policy corresponding to the given client and network conditions. Also allows running the generated policy through the simulator and replay server to get the PLTs and compare them under different conditions. """ log.info("evaluating model...", model=args.model, location=args.location, manifest=args.manifest) client_env = get_client_environment_from_parameters( args.bandwidth, args.latency, args.cpu_slowdown) manifest = EnvironmentConfig.load_file(args.manifest) cached_urls = set( res.url for group in manifest.push_groups for res in group.resources if args.cache_time is not None and res.cache_time > args.cache_time) log.debug("using cached resources", cached_urls=cached_urls) config = get_config(manifest, client_env, args.reward_func).with_mutations( cached_urls=cached_urls, use_aft=args.use_aft) if args.model == "A3C": from blaze.model import a3c as model if args.model == "APEX": from blaze.model import apex as model if args.model == "PPO": from blaze.model import ppo as model import ray ray.init(num_cpus=2, log_to_driver=False) saved_model = model.get_model(args.location) instance = saved_model.instantiate(config) policy = instance.policy data = policy.as_dict if args.verbose or args.run_simulator or args.run_replay_server: data = { "manifest": args.manifest, "location": args.location, "client_env": client_env._asdict(), "policy": policy.as_dict, } if args.run_simulator: sim = Simulator(manifest) sim_plt = sim.simulate_load_time(client_env) push_plt = sim.simulate_load_time(client_env, policy) data["simulator"] = { "without_policy": sim_plt, "with_policy": push_plt } if args.run_replay_server: *_, plts = get_page_load_time_in_replay_server( config.env_config.request_url, client_env, config) *_, push_plts = get_page_load_time_in_replay_server( config.env_config.request_url, client_env, config, policy=policy) data["replay_server"] = { "without_policy": plts, "with_policy": push_plts } print(json.dumps(data, indent=4))