def main(): global DF, PTABLE, OPTIONS parser = argparse.ArgumentParser() parser.add_argument("glob") parser.add_argument("--period", type=int) parser.add_argument("--bb-low", type=float) parser.add_argument("--bb-high", type=float) parser.add_argument("--lo-zone", type=float) parser.add_argument("--hi-zone", type=float) parser.add_argument("--lo-sigma", type=float) parser.add_argument("--hi-sigma", type=float) parser.add_argument("--protect-loss", type=bool) parser.add_argument("--method", default="dual_annealing") parser.add_argument("--finish", default=None) args = parser.parse_args() DF = pd.DataFrame(columns=["time", "mark", "ask", "bid"]) for csvfile in glob.glob(args.glob): csvdf = pd.read_csv(csvfile, index_col=0) csvdf["time"] = csvdf.apply(timefunc, axis=1) csvdf["mark"] = pd.to_numeric(csvdf["mark"]) csvdf["ask"] = pd.to_numeric(csvdf["ask"]) csvdf["bid"] = pd.to_numeric(csvdf["bid"]) if DF.shape[0] > 0: prev_time = DF.iloc[DF.shape[0] - 1]["time"] prev_mark = DF.iloc[DF.shape[0] - 1]["mark"] dt = csvdf.iloc[0]["time"] - prev_time scale = csvdf.iloc[0]["mark"] - prev_mark csvdf["time"] = csvdf["time"] - dt csvdf["mark"] = csvdf["mark"] - scale csvdf["ask"] = csvdf["ask"] - scale csvdf["bid"] = csvdf["bid"] - scale DF = DF.append(csvdf, ignore_index=True) bounds_dict = { "period": (12, 48 * 3600 / 5), "bb_low": (0.25, 4), "bb_high": (0.25, 4), "lo_zone": (-0.1, 0.5), "hi_zone": (0.5, 1.1), "lo_sigma": (0, 4), "hi_sigma": (0, 4), "protect_loss": (0, 1), } abs_dict = { "period": 1, "bb_low": 0.1, "bb_high": 0.1, "lo_zone": 0.01, "hi_zone": 0.01, "lo_sigma": 0.1, "hi_sigma": 0.1, "protect_loss": 1, } PTABLE = PrettyTable([ "Iteration", "Time", "Period", "BB Low", "BB High", "Low Zone", "High Zone", "Low Sigma", "High Sigma", "Protect", "Return", ]) PTABLE.float_format = ".4" bounds = [] bounds.append((0, 100)) bounds.append(( f"{datetime(2021, 1, 1, 0, 0, 0):%X}", f"{datetime(2021, 1, 1, 23, 59, 59):%X}", )) bounds.append([int(v) for v in bounds_dict["period"]]) bounds.append([float(v) for v in bounds_dict["bb_low"]]) bounds.append([float(v) for v in bounds_dict["bb_high"]]) bounds.append([float(v) for v in bounds_dict["lo_zone"]]) bounds.append([float(v) for v in bounds_dict["hi_zone"]]) bounds.append([float(v) for v in bounds_dict["lo_sigma"]]) bounds.append([float(v) for v in bounds_dict["hi_sigma"]]) bounds.append((False, True)) bounds.append((-99.0, 99.0)) for i in product([0, 1], repeat=len(bounds)): PTABLE.add_row([bounds[j][i[j]] for j in range(len(bounds))]) OPTIONS = PTABLE._get_options({}) frows = PTABLE._format_rows(PTABLE._get_rows(OPTIONS), OPTIONS) PTABLE._compute_widths(frows, OPTIONS) PTABLE._hrule = PTABLE._stringify_hrule(OPTIONS) print(PTABLE._stringify_header(OPTIONS)) fixed = [] bounds = [] abs_diff = [] for arg in [ "period", "bb_low", "bb_high", "lo_zone", "hi_zone", "lo_sigma", "hi_sigma", "protect_loss", ]: if getattr(args, arg) is not None: fixed.append(getattr(args, arg)) else: fixed.append(None) bounds.append(bounds_dict[arg]) abs_diff.append(abs_dict[arg]) res = None if args.method == "brute": x0, fval, grid, Jout = optimize.brute( func=run, args=tuple(fixed), ranges=bounds, full_output=True, finish=args.finish, ) if grid.ndim == 1: plt.plot(grid, -np.log(Jout)) plt.title(args.glob) plt.show() elif grid.ndim == 3: fig = plt.figure(figsize=(10, 6)) ax1 = fig.add_subplot(111, projection="3d") mycmap = plt.get_cmap("gist_earth") surf1 = ax1.plot_surface(grid[0, :], grid[1, :], -np.log(Jout), cmap=mycmap) fig.colorbar(surf1, ax=ax1, shrink=0.5, aspect=5) plt.title(args.glob) plt.show() elif args.method == "basinhopping": res = optimize.basinhopping( func=run, x0=tuple(fixed), minimizer_kwargs={"args": tuple(7 * [None])}, ) elif args.method == "shgo-sobol": constraints = [] if args.period is None: constraints.append({ "type": "eq", "fun": lambda x: np.array([x[0] - int(x[0])]) }) if args.protect_loss is None: constraints.append({ "type": "eq", "fun": lambda x: np.array([x[7] - int(x[7])]) }) res = optimize.shgo( func=run, args=tuple(fixed), bounds=bounds, constraints=constraints, options={"disp": True}, sampling_method="sobol", minimizer_kwargs={"options": { "eps": np.array(abs_diff) }}, ) tbl = PrettyTable([ "Period", "BB Low", "BB High", "Low Zone", "High Zone", "Low Sigma", "High Sigma", "Protect", "Return", ]) tbl.float_format = ".4" for minim in res.xl: row = [] i = 0 for val in fixed: if val is None: row.append(minim[i]) i += 1 else: row.append(val) score = run(minim, *fixed) row.append(-np.log(score)) tbl.add_row(row) print(PTABLE._hrule) print() print(tbl) elif args.method == "hyperopt": space = [ hp.quniform("period", 12, 48 * 3600 / 5, 1), hp.uniform("bb_low", 0.25, 4), hp.uniform("bb_high", 0.25, 4), hp.uniform("lo_zone", -0.1, 0.5), hp.uniform("hi_zone", 0.5, 1.1), hp.uniform("lo_sigma", 0, 4), hp.uniform("hi_sigma", 0, 4), hp.quniform("protect_loss", 0, 1, 1), ] res = fmin(run, space, algo=tpe.suggest, max_evals=200) print(run(space_eval(space, res))) elif len(bounds) == 0: run([], *fixed) elif len(bounds) == 1: x0 = [(bounds[0][0] + bounds[0][1]) / 2] constraints = () options = {"disp": True} if args.period is None: constraints = [{ "type": "eq", "fun": lambda x: np.array([x[0] - int(x[0])]) }] options["finite_diff_rel_step"] = (1 / x0[0], ) res = optimize.minimize( fun=run, x0=x0, method="trust-constr", args=tuple(fixed), bounds=Bounds(bounds[0][0], bounds[0][1]), constraints=constraints, options=options, ) else: res = getattr(optimize, args.method)( func=run, args=tuple(fixed), bounds=bounds, maxiter=1000000, local_search_options={ "options": { "disp": True } }, ) if res is not None: print(res) print(f"Glob = {args.glob}") print(f"Default = {DF.iloc[DF.shape[0] - 1]['mark']/DF.iloc[0]['mark']}")
def run_benchmark(config): """ Benchmark script for tiledb-vcf""" # Open yaml config file with open(config, 'r') as stream: try: benchmarking_start = time.time() results = [] config = yaml.load(stream) base_cmd = config['base_command'] iterations = config['iterations'] ingestion_files = config['ingestion_files'] attribute_results = {} suite_index = 0 suite_names = [] errors = {} # Get the size of the files being ingested ingestion_size = 0 for ingestion_file in ingestion_files: ingestion_size += os.path.getsize(ingestion_file) / (1024 * 1024) # Loop through each test suite for suite_name, test_set in config['suites'].items(): suite_names.append(suite_name) test_results = {} # Run each suite the given number of iterations iteration_count = 0 for i in range(iterations): iteration_count += 1 array_uri = test_set['array_uri'] group_uri = test_set['group_uri'] dir_to_rm = None if 'group_uri' in test_set: dir_to_rm = group_uri else: dir_to_rm = array_uri if not dir_to_rm is None and os.path.isdir(dir_to_rm): shutil.rmtree(dir_to_rm) if not os.path.isdir(group_uri): pathlib.Path(group_uri).mkdir(parents=True, exist_ok=True) # Run each test in the suite for test in test_set['tests']: # Flush caches flush_caches() test_name = test["name"] logger.info("Starting test %s - %s iteration %d", suite_name, test_name, i) # Add specified arguments cmd = [base_cmd] + test['args'] # Add group uri argument cmd.extend(["-a", array_uri]) # If store or register add ingestion files if test_name == "store" or test_name == "register": cmd.append("-f") cmd.extend(ingestion_files) if test_name == "export": export_path = os.path.join(group_uri, "export") if not os.path.isdir(export_path): os.mkdir(export_path) #cmd.extend(["-p", export_path + os.path.sep]) logger.info("Running: %s", list2cmdline(cmd)) # Time and run test command t0 = time.time() t1 = None try: ret = call(cmd) t1 = time.time() except Exception as e: if not suite_name in errors: errors[suite_name] = {"test_name": []} if not test_name in errors[suite_name]: errors[suite_name][test_name] = [] errors[suite_name][test_name].append({ "iteration": i, "ret_code": ret }) logging.error(traceback.format_exc()) continue array_size = 0 tiledb_file_sizes = None if 'check_array_size' in test and test[ 'check_array_size']: array_size = get_folder_size(array_uri) tiledb_file_sizes = get_tiledb_file_sizes( array_uri) # Save results if not test_name in test_results: test_results[test_name] = { "time": [], "size": [], "file_sizes": {} } test_results[test_name]["time"].append(t1 - t0) test_results[test_name]["size"].append(array_size) if tiledb_file_sizes != None: for file_name, size in tiledb_file_sizes.items(): if not file_name in test_results[test_name][ "file_sizes"]: test_results[test_name]["file_sizes"][ file_name] = [] test_results[test_name]["file_sizes"][ file_name].append(size) # If there was a store test we should save results for printing table at the end if 'store' in test_results: ingestion_times = test_results["store"]["time"] ingestion_time_avg = numpy.average(ingestion_times) size_avg = numpy.average( test_results["store"]["size"]) / (1024 * 1024) ingestion_time_std = numpy.std(ingestion_times) export_time_avg = 'N/A' export_time_std = 'N/A' if 'export' in test_results: export_times = test_results["export"]["time"] export_time_avg = numpy.average(export_times) export_time_std = numpy.std(export_times) results.append([ suite_name, iteration_count, ingestion_time_avg, ingestion_time_std, size_avg, ingestion_size, export_time_avg, export_time_std ]) for file_name, file_sizes in test_results['store'][ "file_sizes"].items(): if not file_name in attribute_results: attribute_results[file_name] = [None] * len( config['suites']) #{suite_name: 'N/A'} file_size_avg = numpy.average(file_sizes) / (1024 * 1024) attribute_results[file_name][ suite_index] = file_size_avg suite_index += 1 # Remove directory to save space again dir_to_rm = None if 'group_uri' in test_set: dir_to_rm = group_uri else: dir_to_rm = array_uri if not dir_to_rm is None and os.path.isdir(dir_to_rm): shutil.rmtree(dir_to_rm) header = [ 'Test', 'Iterations', 'Ingestion Time (seconds)', 'Ingestion Time (seconds) STDDEV', 'Array Size (MB)', 'Ingestion Size (MB)', 'Export Time (seconds)', 'Export Time STDDEV (seconds)' ] t = PrettyTable(header) for result in results: t.add_row(result) data = ",".join(header) + "\n" for result in results: data += ",".join(map(str, result)) + "\n" logger.info(data) print("") print(t) t = PrettyTable() t.add_column("Test", suite_names) for file_name, sizes in attribute_results.items(): t.add_column(file_name, sizes) #for result in attribute_results: # print(result) # t.add_row(result) #for index in range(len(suite_names)): # results = [] #[None] * len(attribute_results) # for file_name, result in attribute_results.items(): # results.append(result[index]) # t.add_column(suite_names[index], results) # Set file_name column #file_name_results = [] #[None] * len(attribute_results) #for file_name, result in attribute_results.items(): # file_name_results.append(file_name) #t.add_column("file_name", file_name_results) print("") print(t) data = ",".join(t.field_names) + "\n" for row in t._get_rows(t._get_options({})): data += ",".join(map(str, row)) + "\n" logger.info(data) logger.info("Total time taken to run benchmark was: %s", date.compress(time.time() - benchmarking_start)) if errors: logger.error("Errors detected in run, dumping details:") logger.error(errors) except yaml.YAMLError as exc: print(exc)