def run(config, lt_config, log_dir, run_func=run_single_data_point.run, throughput_key = "ops/sec(cum)", concurrency_key="concurrency"): # create latency throughput dir, if not running recovery lt_dir = os.path.join(log_dir, "latency_throughput") lt_logs_dir = os.path.join(lt_dir, "logs") checkpoint_csv_fpath = os.path.join(lt_dir, "lt.csv") if not os.path.exists(lt_logs_dir): # not running recovery os.makedirs(lt_logs_dir) # read lt config file start, end = lt_config["concurrency"] step_size = lt_config["step_size"] # honing in on increasingly smaller ranges data = [] while step_size > 0: print("start", type(start), "end", type(end), "step_size", type(step_size)) for concurrency in range(start, end + step_size, step_size): # run trial for this concurrency config["concurrency"] = concurrency # make directory for this specific concurrency, unique by timestamp specific_logs_dir = os.path.join(lt_logs_dir, "{0}_{1}".format( str(concurrency), datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f"))) # run trial os.makedirs(specific_logs_dir) results_fpath_csv = run_func(config, specific_logs_dir) # gather data from this run datum = {"concurrency": concurrency} more_data = csv_utils.read_in_data(results_fpath_csv) datum.update(*more_data) data.append(datum) # find max throughput and hone in on it max_throughput_concurrency = max(data, key=operator.itemgetter(throughput_key))[concurrency_key] concurrency = last_adjustments(max_throughput_concurrency) start = int(concurrency - step_size) end = int(concurrency + step_size) step_size = int(step_size / 2) # checkpoint_csv_fpath, and also write out csv values every round of honing in insert_csv_data(data, checkpoint_csv_fpath) # plot the latency throughput graphs plot_utils.gnuplot(LT_GNUPLOT_EXE, checkpoint_csv_fpath, os.path.join(lt_dir, "p50_lt.png"), os.path.join(lt_dir, "p95_lt.png"), os.path.join(lt_dir, "p99_lt.png")) return checkpoint_csv_fpath
def insert_csv_data(data, csv_fpath): if len(data) <= 0: return None existing_rows = csv_utils.read_in_data(csv_fpath) all_data = existing_rows + data all_data = sorted(all_data, key=lambda i: i["concurrency"]) _ = csv_utils.write_out_data(all_data, csv_fpath) return csv_fpath
def find_optimal_concurrency(lt_fpath_csv, throughput_key="ops/sec(cum)", concurrency_key="concurrency"): data = csv_utils.read_in_data(lt_fpath_csv) max_throughput_concurrency = max(data, key=operator.itemgetter(throughput_key))[concurrency_key] return int(last_adjustments(max_throughput_concurrency))