def get_active_partitions(clickhouse: ClickhousePool, storage: WritableTableStorage, database: str, table: str) -> Sequence[util.Part]: response = clickhouse.execute( """ SELECT DISTINCT partition FROM system.parts WHERE database = %(database)s AND table = %(table)s AND active = 1 """, { "database": database, "table": table }, ) schema = storage.get_schema() assert isinstance(schema, TableSchema) part_format = schema.get_part_format() assert part_format is not None return [ util.decode_part_str(part, part_format) for part, in response.results ]
def get_partitions_to_optimize(clickhouse, database, table, before=None) -> Sequence[util.Part]: engine = clickhouse.execute( """ SELECT engine FROM system.tables WHERE (database = %(database)s) AND (name = %(table)s) """, { 'database': database, 'table': table, }) if not engine: logger.warning("Table %s.%s doesn't exist on %s:%s" % (database, table, clickhouse.host, clickhouse.port)) return [] if engine[0][0].startswith('Replicated'): is_leader = clickhouse.execute( """ SELECT is_leader FROM system.replicas WHERE (database = %(database)s) AND (table = %(table)s) """, { 'database': database, 'table': table, }) # response: [(0,)] for non-leader or [(1,)] for leader if not (len(is_leader) == 1 and is_leader[0][0]): return [] active_parts = clickhouse.execute( """ SELECT partition, count() AS c FROM system.parts WHERE active AND database = %(database)s AND table = %(table)s GROUP BY partition HAVING c > 1 ORDER BY c DESC, partition """, { 'database': database, 'table': table, }) parts = [util.decode_part_str(part) for part, count in active_parts] if before: parts = [ p for p in parts if (p[0] + timedelta(days=6 - p[0].weekday())) < before ] return parts
def get_active_partitions( clickhouse: ClickhousePool, database: str, table: str ) -> Sequence[util.Part]: response = clickhouse.execute( """ SELECT DISTINCT partition FROM system.parts WHERE database = %(database)s AND table = %(table)s AND active = 1 """, {"database": database, "table": table}, ) return [util.decode_part_str(part) for part, in response]
def get_active_partitions(clickhouse, database, table): response = clickhouse.execute( """ SELECT DISTINCT partition FROM system.parts WHERE database = %(database)s AND table = %(table)s AND active = 1 """, { 'database': database, 'table': table, } ) return [util.decode_part_str(part) for part, in response]
def get_partitions_to_optimize( clickhouse: ClickhousePool, storage: ReadableTableStorage, database: str, table: str, before: Optional[datetime] = None, ) -> Sequence[util.Part]: engine = clickhouse.execute( """ SELECT engine FROM system.tables WHERE (database = %(database)s) AND (name = %(table)s) """, { "database": database, "table": table }, ) if not engine: logger.warning("Table %s.%s doesn't exist on %s:%s" % (database, table, clickhouse.host, clickhouse.port)) return [] if engine[0][0].startswith("Replicated"): is_leader = clickhouse.execute( """ SELECT is_leader FROM system.replicas WHERE (database = %(database)s) AND (table = %(table)s) """, { "database": database, "table": table }, ) # response: [(0,)] for non-leader or [(1,)] for leader if not (len(is_leader) == 1 and is_leader[0][0]): return [] active_parts = clickhouse.execute( """ SELECT partition, count() AS c FROM system.parts WHERE active AND database = %(database)s AND table = %(table)s GROUP BY partition HAVING c > 1 ORDER BY c DESC, partition """, { "database": database, "table": table }, ) schema = storage.get_schema() assert isinstance(schema, TableSchema) part_format = schema.get_part_format() assert part_format is not None parts = [ util.decode_part_str(part, part_format) for part, count in active_parts ] if before: parts = [ p for p in parts if (p.date + timedelta(days=6 - p.date.weekday())) < before ] return parts