def benchmark_table(self): """Benchmark table creation from different formats.""" for name in ("df", "dict", "records"): data = getattr(self, name) test_meta = make_meta("table", name) func = Benchmark(lambda: Table(data), meta=test_meta) setattr(self, "table_{0}".format(name), func)
def benchmark_to_format_zero(self): """Benchmark each `to_format` method.""" for name in ("numpy", "dict", "records", "df", "arrow"): test_meta = make_meta("to_format", "to_{}".format(name)) func = Benchmark(lambda: getattr(self._view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(name), func)
def benchmark_table_arrow(self): """Benchmark table from arrow separately as it requires opening the Arrow file from the filesystem.""" with open(SUPERSTORE_ARROW, "rb") as arrow: data = arrow.read() test_meta = make_meta("table", "arrow") func = Benchmark(lambda: Table(data), meta=test_meta) setattr(self, "table_arrow", func)
def benchmark_view_one(self): """Benchmark view creation with different pivots.""" for pivot in PerspectiveBenchmark.ROW_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta("view", "one_{0}_pivot".format(len(pivot))) view_constructor = partial(self._table.view, row_pivots=pivot) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_view_two_column_only(self): """Benchmark column-only view creation.""" for pivot in PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta( "view", "two_column_only_{0}_pivot".format(len(pivot))) view_constructor = partial(self._table.view, column_pivots=pivot) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_view_two(self): """Benchmark view creation with row and column pivots.""" for i in range(len(PerspectiveBenchmark.ROW_PIVOT_OPTIONS)): RP = PerspectiveBenchmark.ROW_PIVOT_OPTIONS[i] CP = PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta("view", "two_{0}x{1}_pivot".format(len(RP), len(CP))) view_constructor = partial( self._table.view, row_pivots=RP, column_pivots=CP ) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_to_format_one(self): """Benchmark each `to_format` method for one-sided contexts.""" for name in ("numpy", "dict", "records", "df", "arrow"): for pivot in PerspectiveBenchmark.ROW_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta("to_format", "to_{0}_r{1}".format(name, len(pivot))) view = self._table.view(row_pivots=pivot) func = Benchmark(lambda: getattr(view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func)
def benchmark_view_two(self): """Benchmark view creation with row and Split By.""" for i in range(len(PerspectiveBenchmark.group_by_OPTIONS)): RP = PerspectiveBenchmark.group_by_OPTIONS[i] CP = PerspectiveBenchmark.split_by_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta("view", "two_{0}x{1}_pivot".format(len(RP), len(CP))) view_constructor = partial(self._table.view, group_by=RP, split_by=CP) func = Benchmark(lambda: view_constructor(), meta=test_meta) setattr(self, "view_{0}".format(test_meta["name"]), func)
def benchmark_to_format_two_column_only(self): """Benchmark each `to_format` method for two-sided column-only contexts.""" for name in ("dict", "records", "df", "arrow"): for pivot in PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS: if len(pivot) == 0: continue test_meta = make_meta( "to_format", "{0}_{1}_column".format(name, len(pivot))) view = self._table.view(column_pivots=pivot) func = Benchmark(lambda: getattr(view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func)
def benchmark_view_one_df_updates(self): """Benchmark dataframe updates for one-sided views.""" table = Table(self._df_schema) views = [table.view(row_pivots=["State", "City"]) for i in range(25)] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "one_df")) setattr(self, "update_one_df", func)
def benchmark_to_format_two(self): """Benchmark each `to_format` method for two-sided contexts.""" for name in ("numpy", "dict", "records", "df", "arrow"): for i in range(len(PerspectiveBenchmark.ROW_PIVOT_OPTIONS)): RP = PerspectiveBenchmark.ROW_PIVOT_OPTIONS[i] CP = PerspectiveBenchmark.COLUMN_PIVOT_OPTIONS[i] if len(RP) == 0 and len(CP) == 0: continue test_meta = make_meta( "to_format", "{0}_{1}x{2}".format(name, len(RP), len(CP))) view = self._table.view(row_pivots=RP, column_pivots=CP) func = Benchmark(lambda: getattr(view, "to_{0}".format(name)) (), meta=test_meta) setattr(self, "to_format_{0}".format(test_meta["name"]), func)
def benchmark_view_two_column_only_df_updates(self): """Benchmark dataframe updates for two-sided column only views.""" table = Table(self._df_schema) views = [ table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only_df")) setattr(self, "update_two_column_only_df", func)
def benchmark_view_one_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 25 views.""" table = Table(self._schema) views = [table.view(row_pivots=["State", "City"]) for i in range(25)] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "one")) setattr(self, "update_one", func)
def benchmark_view_zero_df_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 10 views. This version updates using dataframes, and is designed to compare the overhead of dataframe loading vs. regular data structure loading.""" table = Table(self._df_schema) views = [table.view() for i in range(25)] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "zero_df")) setattr(self, "update_zero_df", func)
def benchmark_view_two_column_only_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 25 views.""" table = Table(self._schema) views = [ table.view(split_by=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only")) setattr(self, "update_two_column_only", func)
def collect(self, exe_model, bin_rel_path): self.bench = Benchmark(exe_model, bin_rel_path) self.bench.collect_binaries()
def benchmark_view_zero(self): """Benchmark view creation with zero pivots.""" func = Benchmark(lambda: self._table.view(), meta=make_meta("view", "zero")) setattr(self, "view_zero", func)
class BenchRunner(): def __init__(self, out_rel_path): self.out_abs_path = getcwd() + '/' + out_rel_path self.log_path = self.out_abs_path + '/benchlog.txt' open(self.log_path, 'x').close() # won't overwrite previous logs def __enter__(self): self.put() self.put("Benchmark started", ' >> ') return self def __exit__(self, exc_type, exc_value, traceback): self.put('Finished', ' >> ') self.put() def collect(self, exe_model, bin_rel_path): self.bench = Benchmark(exe_model, bin_rel_path) self.bench.collect_binaries() def put(self, message='', prefix=' ', end='\n'): print(prefix + message, end=end) logfile = open(self.log_path, 'a') logfile.write(prefix + message + end) logfile.close() def input(self, message, prompt='', message_prefix=' ', message_end='\n'): self.put(message, message_prefix, message_end) self.put(prompt, ' > ', '') text = input() self.put() return text def list_exes(self): if len(self.bench.exes) > 0: self.put('Discovered executables:', ' > ') self.put('<name>\t<check>\t<bytes>') for exe in self.bench.exes: exe_repr = ' ' + str(exe) + '\t ' if exe.ok: exe_repr += 'ok\t ' else: exe_repr += 'error\t ' exe_repr += str(exe.statinfo.st_size) self.put(exe_repr) else: self.put('No executables found in ' + self.bench.bin_abs_path, ' !! ') def start(self): self.bench.filter_binaries() if len(self.bench.exes) == 0: self.put('No valid executable, terminating', ' !! ') return prompt = 'Start benchmarking these executables?' if self.input(prompt, '[Y/n] ').strip() in ['y', 'Y']: self.run() def run(self): raise NotImplementedError('A subclass should implement this method') def run_case(self, arguments, friendly_name): self.put('Starting for '+friendly_name, ' >> ') self.put('<name>\t<time>\t\t<sec>\t<cpu%>\t<hash>\t\t<maxmem> <len>') for exe_run in self.bench.benchmark(arguments): self.put(exe_run.__repr__('\t '), ' ') self.save_result(exe_run, friendly_name) def save_result(self, exe_run, filename): out = '_'.join([exe_run.name, filename]) + '.txt' out_file = open(self.out_abs_path + '/' + out, 'a') out_file.write(exe_run.output) out_file.close()
(1, 2**10), (1, 2**13), (1, 2**16), (1, 2**19), #~ (1, 2**18), #~ (1, 2**20), ] """ # ----------------------------------- if type(Wname) is str: Wname = [Wname] bench = Benchmark() #thetitle = str("%s (%s)" % (what_to_params[what]["name"], wname)) thetitle = str("%s" % what_to_params[what]["name"]) bench.new_figure(thetitle, xlabel="Number of points", ylabel="Time (ms)", xlog=True, ylog=True, xlims=(1.1e-2, 1), ylims=(1e-2, 1)) markers = ["o--", "o-."] for i,wname in enumerate(Wname): bench.new_curve("pywt: " + wname, marker=markers[i]) bench.new_curve("PDWT: " + wname, marker=markers[i]) leg = bench.legend() leg.draggable() results_pywt = [] results_pypwt = []