def create_testcase(self, test, zoom, layers): layers = [layers] if isinstance(layers, str) else layers mvt = MvtGenerator(self.tileset, layer_ids=layers, key_column=self.key_column) query = mvt.generate_query('TileBBox($1, xval.x, yval.y)', '$1') if self.key_column: query = f"SELECT mvt FROM ({query}) AS perfdata" prefix = 'CAST($1 as int) as z, xval.x as x, yval.y as y,' \ if not self.summary else 'sum' query = f"""\ SELECT {prefix}(COALESCE(LENGTH(({query})), 0)) AS len FROM generate_series(CAST($2 as int), CAST($3 as int)) AS xval(x), generate_series(CAST($4 as int), CAST($5 as int)) AS yval(y); """ return TEST_CASES[test].make_test(zoom, layers, query)
class PerfTester: mvt: MvtGenerator test_cases: List[TestCase] def __init__(self, tileset: str, tests: List[str], test_all, layers: List[str], zooms: List[int], dbname: str, pghost, pgport: str, user: str, password: str, summary: bool, per_layer: bool, buckets: int, save_to: Union[None, str, Path], compare_with: Union[None, str, Path], key_column: bool, gzip: bool, disable_feature_ids: bool = None, disable_tile_envelope: bool = None, exclude_layers: bool = False, verbose: bool = None): self.tileset = Tileset.parse(tileset) self.dbname = dbname self.pghost = pghost self.pgport = pgport self.user = user self.password = password self.summary = summary self.buckets = buckets self.key_column = key_column self.gzip = gzip self.disable_feature_ids = disable_feature_ids self.disable_tile_envelope = disable_tile_envelope self.verbose = verbose self.per_layer = per_layer self.save_to = Path(save_to) if save_to else None self.results = PerfRoot() if compare_with: path = Path(compare_with).resolve() with path.open('r', encoding='utf-8') as fp: self.old_run: PerfRoot = PerfRoot.from_dict(json.load(fp)) since = round_td(dt.utcnow() - dt.fromisoformat(self.old_run.created)) print( f"Comparing results with a previous run created {since} ago: {path}" ) else: self.old_run = None for test in tests: if test not in TEST_CASES: cases = '\n'.join(map(TestCase.fmt_table, TEST_CASES.values())) raise DocoptExit(f"Test '{test}' is not defined. " f"Available tests are:\n{cases}\n") if test_all: # Do this after validating individual tests, they are ignored but validated tests = [v for v in TEST_CASES.keys() if v != 'null'] all_layers = [l["layer"]['id'] for l in self.tileset.layers] if layers and exclude_layers: # inverse layers list layers = [l for l in all_layers if l not in layers] elif not layers and per_layer: layers = all_layers # Keep the order, but ensure no duplicates self.layers = list(dict.fromkeys(layers)) self.tests = list(dict.fromkeys(tests)) self.zooms = list(dict.fromkeys(zooms)) async def run(self): print(f'Connecting to PostgreSQL at {self.pghost}:{self.pgport}, ' f'db={self.dbname}, user={self.user}...') async with asyncpg.create_pool( database=self.dbname, host=self.pghost, port=self.pgport, user=self.user, password=self.password, min_size=1, max_size=1, ) as pool: async with pool.acquire() as conn: self.results.created = dt.utcnow().isoformat() self.results.tileset = str( Path(self.tileset.filename).resolve()) await self._run(conn) self.results.tests = [v.result for v in self.test_cases] self.save_results() async def _run(self, conn: Connection): self.results.pg_settings, postgis_ver = await show_settings(conn) print("\nValidating SQL fields in all layers of the tileset") use_feature_id = postgis_ver >= 3 and not self.disable_feature_ids use_tile_envelope = postgis_ver >= 3 and not self.disable_tile_envelope self.results.settings['use_feature_ids'] = use_feature_id self.results.settings['use_tile_envelope'] = use_tile_envelope self.mvt = MvtGenerator(self.tileset, use_feature_id=use_feature_id, use_tile_envelope=use_tile_envelope, gzip=self.gzip, key_column=self.key_column) self.results.layer_fields = {} for layer_id, layer_def in self.mvt.get_layers(): fields = await self.mvt.validate_layer_fields( conn, layer_id, layer_def) self.results.layer_fields[layer_id] = list(fields.keys()) self.test_cases = [] if postgis_ver < 2.5: if self.tests != ['null']: raise ValueError('Requires PostGIS version 2.5 or later') print( f'WARN: No PostGIS v2.5+ found, performance tests will not be run.' ) return old_tests = self.old_run.tests if self.old_run else None for layer in (self.layers if self.per_layer else [None]): for test in self.tests: for z in self.zooms: tc = self.create_testcase(test, z, layer or self.layers) if old_tests: tc.old_result = next( (v for v in old_tests if v.id == tc.id and v.layers == tc.layers_id and v.zoom == tc.zoom), None) self.test_cases.append(tc) for testcase in self.test_cases: await self.run_test(conn, testcase) print(f"\n\n================ SUMMARY ================") self.print_summary_graphs('test_summary', lambda t: t.id, lambda t: f"in test {t.id}", 'Per-test') self.print_summary_graphs('zoom_summary', lambda tc: str(tc.zoom), lambda t: f"at z{t.zoom}", 'Per-zoom') if self.per_layer: self.print_summary_graphs('layer_summary', lambda t: t.layers_id, lambda t: f"at {t.fmt_layers()}", 'Per-layer') self.results.summary = PerfSummary( duration=sum((v.result.duration for v in self.test_cases), timedelta()), tiles=sum(v.size() for v in self.test_cases), bytes=sum(v.result.bytes for v in self.test_cases), ) print( self.results.summary.perf_format(self.old_run and self.old_run.summary)) def create_testcase(self, test, zoom, layers) -> TestCase: layers = [layers] if isinstance(layers, str) else layers self.mvt.set_layer_ids(layers) query = self.mvt.generate_query( f'{self.mvt.tile_envelope}($1, xval.x, yval.y)', '$1') if self.key_column: query = f"SELECT mvt FROM ({query}) AS perfdata" prefix = 'CAST($1 as int) as z, xval.x as x, yval.y as y,' \ if not self.summary else 'sum' query = f"""\ SELECT {prefix}(COALESCE(LENGTH(({query})), 0)) AS len FROM generate_series(CAST($2 as int), CAST($3 as int)) AS xval(x), generate_series(CAST($4 as int), CAST($5 as int)) AS yval(y); """ return TEST_CASES[test].make_test(zoom, layers, query) async def run_test(self, conn: Connection, test: TestCase): results = [] print(f"\nRunning {test.format()}...") if self.verbose: print( f'Using SQL query:\n\n-------\n\n{test.query}\n\n-------\n\n') args = [ test.query, test.zoom, test.start[0], test.before[0] - 1, test.start[1], test.before[1] - 1, ] start = dt.utcnow() if self.summary: test.result.bytes = await conn.fetchval(*args) else: for row in await conn.fetch(*args): results.append(((row['z'], row['x'], row['y']), row['len'])) test.result.bytes += row['len'] test.result.duration = dt.utcnow() - start test.result.__post_init__() old = test.old_result if self.summary: print(test.result.perf_format(old)) return if test.size() != len(results): print( f"WARNING: Requested {test.size():,} tiles != got {len(results):,}" ) if not results: print(f"Query returned no data after {test.result.duration}") return test.tiles = len(results) results.sort(key=lambda v: v[1]) buckets = min(test.tiles, self.buckets) sums = [0] * buckets first = [buckets + 1] * buckets last = [buckets + 1] * buckets last_ind = -1 for ind, val in enumerate(results): i = int(float(ind) / test.tiles * buckets) sums[i] += val[1] last[i] = ind if last_ind != i: first[i] = ind last_ind = i test.result.buckets = [] for i in range(buckets): smallest = results[first[i]] largest = results[last[i]] test.result.buckets.append( PerfBucket( smallest_id='/'.join(map(str, smallest[0])), smallest_size=smallest[1], largest_id='/'.join(map(str, largest[0])), largest_size=largest[1], bytes=sums[i], tiles=(last[i] - first[i] + 1), )) old_buckets = old and old.buckets or [] print_graph( f"Tile size distribution for {test.tiles:,} tiles " f"(~{test.tiles / buckets:.0f}/line) generated in " f"{round_td(test.result.duration)} " f"({test.result.gen_speed:,.1f} tiles/s" f"{change(old.gen_speed, test.result.gen_speed, True) if old else ''})", [ v.graph_msg( old_buckets[ind] if ind < len(old_buckets) else None) for ind, v in enumerate(test.result.buckets) ], is_bytes=True) def print_summary_graphs(self, kind, key: Callable[[TestCase], Any], key_fmt: Callable[[TestCase], Any], long_msg): groups = {key(v): key_fmt(v) for v in self.test_cases} if len(groups) <= 1: return # do not print one-liner graphs durations = defaultdict(timedelta) tile_sizes = defaultdict(int) tile_counts = defaultdict(int) for res in self.test_cases: durations[key(res)] += res.result.duration tile_sizes[key(res)] += res.result.bytes tile_counts[key(res)] += res.size() stats = { g: PerfSummary(duration=durations[g], tiles=tile_counts[g], bytes=tile_sizes[g]) for g in groups } setattr(self.results, kind, stats) old_stats = getattr(self.old_run, kind, None) if self.old_run else None speed_data = [] size_data = [] for grp, grp_desc in groups.items(): old = old_stats[grp] if old_stats and grp in old_stats else None speed_data.append(stats[grp].graph_msg(True, grp_desc, old)) size_data.append(stats[grp].graph_msg(False, grp_desc, old)) print_graph(f"{long_msg} generation speed (longer is better)", speed_data) print_graph(f"{long_msg} average tile sizes (shorter is better)", size_data, is_bytes=True) def save_results(self): if self.save_to: print(f"Saving results to {self.save_to}") with self.save_to.open('w', encoding='utf-8') as fp: json.dump(self.results.to_dict(), fp, indent=2)