Example #1
0
def get_active_partitions(clickhouse: ClickhousePool,
                          storage: WritableTableStorage, database: str,
                          table: str) -> Sequence[util.Part]:

    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {
            "database": database,
            "table": table
        },
    )

    schema = storage.get_schema()
    assert isinstance(schema, TableSchema)
    part_format = schema.get_part_format()
    assert part_format is not None
    return [
        util.decode_part_str(part, part_format) for part, in response.results
    ]
Example #2
0
def get_partitions_to_optimize(clickhouse,
                               database,
                               table,
                               before=None) -> Sequence[util.Part]:
    engine = clickhouse.execute(
        """
        SELECT engine
        FROM system.tables
        WHERE (database = %(database)s) AND (name = %(table)s)
        """, {
            'database': database,
            'table': table,
        })

    if not engine:
        logger.warning("Table %s.%s doesn't exist on %s:%s" %
                       (database, table, clickhouse.host, clickhouse.port))
        return []

    if engine[0][0].startswith('Replicated'):
        is_leader = clickhouse.execute(
            """
            SELECT is_leader
            FROM system.replicas
            WHERE (database = %(database)s) AND (table = %(table)s)
            """, {
                'database': database,
                'table': table,
            })

        # response: [(0,)] for non-leader or [(1,)] for leader
        if not (len(is_leader) == 1 and is_leader[0][0]):
            return []

    active_parts = clickhouse.execute(
        """
        SELECT
            partition,
            count() AS c
        FROM system.parts
        WHERE active
        AND database = %(database)s
        AND table = %(table)s
        GROUP BY partition
        HAVING c > 1
        ORDER BY c DESC, partition
        """, {
            'database': database,
            'table': table,
        })

    parts = [util.decode_part_str(part) for part, count in active_parts]

    if before:
        parts = [
            p for p in parts
            if (p[0] + timedelta(days=6 - p[0].weekday())) < before
        ]

    return parts
Example #3
0
def get_active_partitions(
    clickhouse: ClickhousePool, database: str, table: str
) -> Sequence[util.Part]:
    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {"database": database, "table": table},
    )

    return [util.decode_part_str(part) for part, in response]
Example #4
0
def get_active_partitions(clickhouse, database, table):
    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {
            'database': database,
            'table': table,
        }
    )

    return [util.decode_part_str(part) for part, in response]
Example #5
0
def get_partitions_to_optimize(
    clickhouse: ClickhousePool,
    storage: ReadableTableStorage,
    database: str,
    table: str,
    before: Optional[datetime] = None,
) -> Sequence[util.Part]:
    engine = clickhouse.execute(
        """
        SELECT engine
        FROM system.tables
        WHERE (database = %(database)s) AND (name = %(table)s)
        """,
        {
            "database": database,
            "table": table
        },
    )

    if not engine:
        logger.warning("Table %s.%s doesn't exist on %s:%s" %
                       (database, table, clickhouse.host, clickhouse.port))
        return []

    if engine[0][0].startswith("Replicated"):
        is_leader = clickhouse.execute(
            """
            SELECT is_leader
            FROM system.replicas
            WHERE (database = %(database)s) AND (table = %(table)s)
            """,
            {
                "database": database,
                "table": table
            },
        )

        # response: [(0,)] for non-leader or [(1,)] for leader
        if not (len(is_leader) == 1 and is_leader[0][0]):
            return []

    active_parts = clickhouse.execute(
        """
        SELECT
            partition,
            count() AS c
        FROM system.parts
        WHERE active
        AND database = %(database)s
        AND table = %(table)s
        GROUP BY partition
        HAVING c > 1
        ORDER BY c DESC, partition
        """,
        {
            "database": database,
            "table": table
        },
    )

    schema = storage.get_schema()
    assert isinstance(schema, TableSchema)
    part_format = schema.get_part_format()
    assert part_format is not None

    parts = [
        util.decode_part_str(part, part_format) for part, count in active_parts
    ]

    if before:
        parts = [
            p for p in parts
            if (p.date + timedelta(days=6 - p.date.weekday())) < before
        ]

    return parts