def main(xls_file, start_cmd, end_cmd, cleanmr): if start_cmd: assert end_cmd is not None, "Wrong input" if end_cmd: assert start_cmd is not None, "Wrong input" if cleanmr: os.system("cleanmr") if start_cmd: os.system(start_cmd) srtml.init() df = pd.read_excel(xls_file, sheet_name="Model Raw Profile") for index, row in df.iterrows(): pgraph = create_pgraph(df.loc[index, "Model Name"]) profile_dict = convert_profiles_to_regression_models( json.loads(df.loc[index, "raw profile"])) dinfo = DATASET_INFORMATION( **json.loads(df.loc[index, "Dataset Information"])) pgraph.submit( df.loc[index, "Accuracy"], dinfo, df.loc[index, "feature"], profile_dict, ) shutdown() if end_cmd: os.system(end_cmd)
def main(xls_file, start_cmd, end_cmd): if start_cmd: os.system(start_cmd) srtml.init() df = pd.read_excel(xls_file, sheet_name="Arrival Information") columns = [ "mu (qps)", "cv", "# requests", "Latency Constraint (ms)", "Planner", "configuration", "Estimated Latency (ms)", "Cost", "Accuracy", ] new_df = pd.DataFrame(columns=columns) for index, row in df.iterrows(): row_df = {columns[i]: df.loc[index, columns[i]] for i in range(5)} with VGraph(name="classifier") as graph: classifier = Classifier( feature=IMAGE_CLASSIFICATION_FEATURE, dataset_information=dinfo, name="VClassifier", ) arrival_curve = generate_fixed_arrival_process( mean_qps=df.loc[index, columns[0]], cv=df.loc[index, columns[1]], num_requests=df.loc[index, columns[2]], ).tolist() config = graph.configure( throughput_qps_constraint=df.loc[index, columns[0]], latency_ms_constraint=df.loc[index, columns[3]], planner_kwargs={"inter_arrival_process": arrival_curve}, planner_cls=PLANNER_CLS.get(df.loc[index, columns[4]], None), print_state=True, materialize=False, ) estimated_values = graph.state.get_estimate_values()._asdict() row_df["configuration"] = json.dumps(config) row_df["Estimated Latency (ms)"] = estimated_values["latency"] row_df["Cost"] = estimated_values["cost"] row_df["Accuracy"] = estimated_values["accuracy"] new_df = new_df.append(row_df, ignore_index=True) shutdown() if end_cmd: os.system(end_cmd) with pd.ExcelWriter(xls_file, mode="a") as writer: new_df.to_excel(writer, sheet_name="Planner Configuration")
def main(xls_file, start_cmd, end_cmd): if start_cmd: assert end_cmd is not None, "Wrong input" if end_cmd: assert start_cmd is not None, "Wrong input" df = pd.read_excel(xls_file, sheet_name="Model Information") clean_profile_df = pd.DataFrame() raw_profile_df = pd.DataFrame(columns=[ "pgraph", "raw profile", "Dataset Information", "feature", "Model Name", "Accuracy", "sysinfo", ]) for index, row in df.iterrows(): if start_cmd: os.system(start_cmd) srtml.init() # profile_df = pd.DataFrame( # columns=[ # "pgraph" # ] # ) pgraph = create_pgraph(df.loc[index, "Model Name"]) profile_dict = profile_pgraph( pgraph, **json.loads(df.loc[index, "profile configuration"])) pprint(profile_dict) clean_profile_df = pd.concat([ clean_profile_df, get_dataframe_from_profile(pgraph.ppu_identifier, profile_dict), ]) raw_profile_df = raw_profile_df.append( { "pgraph": pgraph.ppu_identifier, "raw profile": json.dumps(profile_dict), "Dataset Information": df.loc[index, "Dataset Information"], "feature": df.loc[index, "feature"], "Model Name": df.loc[index, "Model Name"], "Accuracy": df.loc[index, "Accuracy"], "sysinfo": get_sysinfo(), }, ignore_index=True, ) shutdown() if end_cmd: os.system(end_cmd) with pd.ExcelWriter(xls_file, mode="a") as writer: clean_profile_df.to_excel(writer, sheet_name="Model Profile") raw_profile_df.to_excel(writer, sheet_name="Model Raw Profile")
def main(xls_file, start_cmd, end_cmd, cleanmr): if start_cmd: assert end_cmd is not None, "Wrong input" if end_cmd: assert start_cmd is not None, "Wrong input" if cleanmr: os.system("cleanmr") if start_cmd: os.system(start_cmd) ray_serve_kwargs = { "ray_init_kwargs": { "object_store_memory": int(5e10), "num_cpus": 24, "_internal_config": json.dumps({ "max_direct_call_object_size": 1000 * 1024 * 1024, # 10Mb "max_grpc_message_size": 10000 * 1024 * 1024, # 100Mb }), # "resources": resources, }, "start_server": False, } srtml.init() df = pd.read_excel(xls_file, sheet_name="Model Raw Profile") for index, row in df.iterrows(): pgraph = create_pgraph(df.loc[index, "Model Name"]) profile_dict = convert_profiles_to_regression_models( json.loads(df.loc[index, "raw profile"])) dinfo = DATASET_INFORMATION( **json.loads(df.loc[index, "Dataset Information"])) pgraph.submit( df.loc[index, "Accuracy"], dinfo, df.loc[index, "feature"], profile_dict, ) shutdown() if end_cmd: os.system(end_cmd)
def main(xls_file, start_cmd, end_cmd): df = pd.read_excel(xls_file, sheet_name="Model Information") columns = [ "mu (qps)", "cv", "# requests", "Latency Constraint (ms)", "Ingest mu Observed (qps)", "Throughput (qps)", "p95 (ms)", "p99 (ms)", ] raw_columns = [*columns[:3], "Latency (ms)"] new_df_clean = pd.DataFrame(columns=columns) new_df_raw = pd.DataFrame(columns=raw_columns) for index, row in df.iterrows(): row_df = {columns[i]: df.loc[index, columns[i]] for i in range(3)} raw_row_df = dict(row_df) graph = create_pgraph(df.loc[index, "Model Name"], df.loc[index, "Number of Conditionals"]) graph.configure(SERVE_MODE) if start_cmd: os.system(start_cmd) srtml.init(start_server=False) # pgraph_metadata = json.loads(df.loc[index, "configuration"]) # graph.materialize(pgraph_metadata=pgraph_metadata) arrival_curve = generate_fixed_arrival_process( mean_qps=df.loc[index, columns[0]], cv=df.loc[index, columns[1]], num_requests=df.loc[index, columns[2]], ).tolist() graph.provision(SERVE_MODE) # img_path = os.path.join(IMAGE_CLASSIFICATION_DIR, "elephant.jpg") # data = base64.b64encode(open(img_path, "rb").read()) # Warm-up and throughput calculation WARMUP = 200 NUM_REQUESTS = 1000 ppu = graph.handle futures = [ppu.remote(data=1) for _ in range(WARMUP)] ray.get(futures) start_time = time.time() futures = [ppu.remote(data=1) for _ in range(NUM_REQUESTS)] ray.wait(futures, num_returns=len(futures)) end_time = time.time() time_taken = end_time - start_time throughput_qps = NUM_REQUESTS / time_taken row_df[columns[5]] = throughput_qps # latency calculation http_actor = HTTPProxyActor.remote(host="127.0.0.1", port=8000) ray.get(http_actor.register_route.remote("/noop", ppu)) ray.get(http_actor.init_latency.remote()) client_path = os.path.join("noop_client.go", ) row_df[columns[3]] = df.loc[index, columns[3]] ls_output = subprocess.Popen([ "go", "run", client_path, str(df.loc[index, columns[3]]), str(1), *[str(val) for val in arrival_curve], ]) ls_output.communicate() latency_list = ray.get(http_actor.get_latency.remote()) ingest_mu, latency_ms, p95_ms, p99_ms = get_latency_stats( collected_latency=latency_list) row_df[columns[4]] = ingest_mu row_df[columns[6]] = p95_ms row_df[columns[7]] = p99_ms raw_row_df["Latency (ms)"] = latency_ms shutdown() if end_cmd: os.system(end_cmd) new_df_clean = new_df_clean.append(row_df, ignore_index=True) new_df_raw = new_df_raw.append(raw_row_df, ignore_index=True) with pd.ExcelWriter(xls_file, mode="a") as writer: new_df_clean.to_excel(writer, sheet_name="Results") new_df_raw.to_excel(writer, sheet_name="Results(Raw)")
def main(xls_file, start_cmd, end_cmd): df = pd.read_excel(xls_file, sheet_name="Planner Configuration") columns = [ "mu (qps)", "cv", "# requests", "Latency Constraint (ms)", "Planner", "Estimated Latency (ms)", "Cost", "Accuracy", "Ingest mu Observed (qps)", "Throughput (qps)", "p95 (ms)", "p99 (ms)", ] raw_columns = [*columns[:8], "Latency (ms)"] new_df_clean = pd.DataFrame(columns=columns) new_df_raw = pd.DataFrame(columns=raw_columns) for index, row in df.iterrows(): row_df = {columns[i]: df.loc[index, columns[i]] for i in range(8)} raw_row_df = dict(row_df) if start_cmd: os.system(start_cmd) ray_serve_kwargs = { "ray_init_kwargs": { "object_store_memory": int(5e10), "num_cpus": 24, "_internal_config": json.dumps({ "max_direct_call_object_size": 10 * 1024 * 1024, # 10Mb "max_grpc_message_size": 100 * 1024 * 1024, # 100Mb }), # "resources": resources, }, "start_server": False, } srtml.init(ray_serve_kwargs=ray_serve_kwargs) with VGraph(name="bert") as graph: classifier = Classifier( feature=IMAGE_CLASSIFICATION_FEATURE, dataset_information=dinfo, name="VClassifier", ) pgraph_metadata = json.loads(df.loc[index, "configuration"]) # pgraph_metadata['bert/VClassifier']['ppu_state']['Sentimental-bert24-2/bert24_p2_stage0']['resources']['Tesla P40'] = 0.2 # pgraph_metadata['bert/VClassifier']['ppu_state']['Sentimental-bert24-2/bert24_p2_stage0']['num_gpus'] = 0.2 graph.materialize(pgraph_metadata=pgraph_metadata) arrival_curve = generate_fixed_arrival_process( mean_qps=df.loc[index, columns[0]], cv=df.loc[index, columns[1]], num_requests=df.loc[index, columns[2]], ).tolist() graph.provision(SERVE_MODE) # img_path = os.path.join(IMAGE_CLASSIFICATION_DIR, "elephant.jpg") # data = base64.b64encode(open(img_path, "rb").read()) data = 'I am hot and sexy' # Warm-up and throughput calculation WARMUP = 200 NUM_REQUESTS = 1000 vpu = graph.handle futures = [vpu.remote(text=data) for _ in range(WARMUP)] ray.get(futures) start_time = time.time() futures = [vpu.remote(text=data) for _ in range(NUM_REQUESTS)] ray.wait(futures, num_returns=len(futures)) end_time = time.time() time_taken = end_time - start_time throughput_qps = NUM_REQUESTS / time_taken row_df[columns[9]] = throughput_qps print('#######################################') # latency calculation http_actor = HTTPProxyActor.remote(host="127.0.0.1", port=8001) ray.get(http_actor.register_route.remote("/bert", vpu)) ray.get(http_actor.init_latency.remote()) client_path = os.path.join( IMAGE_CLASSIFICATION_DIR_TWO_VERTEX, "accuracy_degradation", "virtual", "image_prepoc_client.go", ) ls_output = subprocess.Popen([ "go", "run", client_path, str(df.loc[index, columns[3]]), 'lol', *[str(val) for val in arrival_curve], ]) ls_output.communicate() latency_list = ray.get(http_actor.get_latency.remote()) ingest_mu, latency_ms, p95_ms, p99_ms = get_latency_stats( collected_latency=latency_list) row_df[columns[8]] = ingest_mu row_df[columns[10]] = p95_ms row_df[columns[11]] = p99_ms raw_row_df["Latency (ms)"] = latency_ms elapsed_latency_list = [ latency_dict["end"] - latency_dict["start"] for latency_dict in latency_list ] elapsed_latency_list.sort() f = open( "/nethome/gkakkar7/srtml-experiments/experimental_results/qps_latency_tradeoff/125q_200l_p2/latencies.txt", "w") for entry in elapsed_latency_list: f.write(str(entry)) f.write("\n") f.close() shutdown() if end_cmd: os.system(end_cmd) new_df_clean = new_df_clean.append(row_df, ignore_index=True) new_df_raw = new_df_raw.append(raw_row_df, ignore_index=True) with pd.ExcelWriter(xls_file, mode="a") as writer: new_df_clean.to_excel(writer, sheet_name="Planner Config Run Results") new_df_raw.to_excel(writer, sheet_name="Planner Config Run Results(Raw)")