예제 #1
0
def test_reconnect(FakeClient: Client) -> None:
    # If the connection NetworkErrors a first time, make sure we call it a second time.
    FakeClient.return_value.execute.side_effect = [
        errors.NetworkError,
        '{"data": "to my face"}',
    ]
    cp = ClickhousePool("0:0:0:0", 9000, "default", "", "default")
    cp.execute("SHOW TABLES")
    assert FakeClient.return_value.execute.mock_calls == [
        call(
            "SHOW TABLES",
            params=None,
            with_column_types=False,
            query_id=None,
            settings=None,
            types_check=False,
            columnar=False,
        ),
        call(
            "SHOW TABLES",
            params=None,
            with_column_types=False,
            query_id=None,
            settings=None,
            types_check=False,
            columnar=False,
        ),
    ]
예제 #2
0
def test_add_node() -> None:
    host_name = os.environ.get("CLICKHOUSE_HOST", "localhost")
    port = int(os.environ.get("CLICKHOUSE_PORT", 9000))
    user = "******"
    password = ""
    database = os.environ.get("CLICKHOUSE_DATABASE", "default")

    client = ClickhousePool(
        host_name,
        port,
        user,
        password,
        database,
    )

    assert set(client.execute("SHOW TABLES")) == set()

    runner.Runner.add_node(
        node_type=cluster.ClickhouseNodeType.LOCAL,
        storage_sets=[StorageSetKey.OUTCOMES],
        host_name=host_name,
        port=port,
        user=user,
        password=password,
        database=database,
    )

    assert set(client.execute("SHOW TABLES")) == {
        ("outcomes_raw_local", ),
        ("outcomes_hourly_local", ),
        ("outcomes_mv_hourly_local", ),
    }
예제 #3
0
def drop_partitions(
    clickhouse: ClickhousePool,
    database: str,
    table: str,
    parts: Sequence[Tuple[datetime, int]],
    dry_run: bool = True,
) -> None:
    query_template = """\
        ALTER TABLE %(database)s.%(table)s DROP PARTITION ('%(date_str)s', %(retention_days)s)
    """

    for part_date, retention_days in parts:
        args = {
            "database": database,
            "table": table,
            "date_str": part_date.strftime("%Y-%m-%d"),
            "retention_days": retention_days,
        }

        query = (query_template % args).strip()
        if dry_run:
            logger.info("Dry run: " + query)
        else:
            logger.info("Dropping partition: " + query)
            clickhouse.execute(query)
예제 #4
0
    def load(self, writer: BufferedWriterWrapper) -> None:
        logger = logging.getLogger('snuba.bulk-loader')

        clickhouse_ro = ClickhousePool(client_settings={
            'readonly': True,
        })
        clickhouse_tables = clickhouse_ro.execute('show tables')
        if (self.__dest_table, ) not in clickhouse_tables:
            raise ValueError("Destination table %s does not exists" %
                             self.__dest_table)

        table_content = clickhouse_ro.execute("select count(*) from %s" %
                                              self.__dest_table)
        if table_content != [(0, )]:
            raise ValueError("Destination Table is not empty")

        descriptor = self.__source.get_descriptor()
        logger.info("Loading snapshot %s", descriptor.id)

        with self.__source.get_table_file(self.__source_table) as table:
            logger.info("Loading table %s from file", self.__source_table)
            row_count = 0
            with writer as buffer_writer:
                for row in table:
                    clickhouse_data = self.__row_processor(row)
                    buffer_writer.write(clickhouse_data)
                    row_count += 1
            logger.info("Load complete %d records loaded", row_count)
예제 #5
0
 def run_query(
     connection: ClickhousePool,
     query: str,
     records_count: int,
     metrics: MetricsBackend,
 ) -> None:
     connection.execute_robust(query)
     queries[connection.host].append(query)
예제 #6
0
 def test_reconnect(self, FakeClient):
     # If the connection NetworkErrors a first time, make sure we call it a second time.
     FakeClient.return_value.execute.side_effect = [
         errors.NetworkError, '{"data": "to my face"}'
     ]
     cp = ClickhousePool()
     cp.execute("SHOW TABLES")
     assert FakeClient.return_value.execute.mock_calls == [
         call("SHOW TABLES"), call("SHOW TABLES")
     ]
예제 #7
0
def test_get_fallback_host() -> None:
    FALLBACK_HOSTS_CONFIG_VAL = "host1:100,host2:100,host3:100"
    FALLBACK_HOSTS = [("host1", 100), ("host2", 100), ("host3", 100)]

    state.set_config(f"fallback_hosts:{CLUSTER_HOST}:{CLUSTER_PORT}",
                     FALLBACK_HOSTS_CONFIG_VAL)

    pool = ClickhousePool(CLUSTER_HOST, CLUSTER_PORT, "test", "test",
                          TEST_DB_NAME)

    assert pool.get_fallback_host() in FALLBACK_HOSTS
예제 #8
0
def test_reconnect(FakeClient) -> None:
    # If the connection NetworkErrors a first time, make sure we call it a second time.
    FakeClient.return_value.execute.side_effect = [
        errors.NetworkError,
        '{"data": "to my face"}',
    ]
    cp = ClickhousePool("0:0:0:0", 9000, "default", "", "default")
    cp.execute("SHOW TABLES")
    assert FakeClient.return_value.execute.mock_calls == [
        call("SHOW TABLES"),
        call("SHOW TABLES"),
    ]
예제 #9
0
def optimize(
    *,
    clickhouse_host: str,
    clickhouse_port: int,
    database: str,
    dataset_name: str,
    timeout: int,
    log_level: Optional[str] = None,
) -> None:
    from datetime import datetime
    from snuba.clickhouse.native import ClickhousePool
    from snuba.optimize import run_optimize, logger

    setup_logging(log_level)

    dataset = get_dataset(dataset_name)
    table = enforce_table_writer(dataset).get_schema().get_local_table_name()

    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    clickhouse = ClickhousePool(clickhouse_host,
                                clickhouse_port,
                                send_receive_timeout=timeout)
    num_dropped = run_optimize(clickhouse, database, table, before=today)
    logger.info("Optimized %s partitions on %s" %
                (num_dropped, clickhouse_host))
예제 #10
0
def get_active_partitions(clickhouse: ClickhousePool,
                          storage: WritableTableStorage, database: str,
                          table: str) -> Sequence[util.Part]:

    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {
            "database": database,
            "table": table
        },
    )

    schema = storage.get_schema()
    assert isinstance(schema, TableSchema)
    part_format = schema.get_part_format()
    assert part_format is not None
    return [
        util.decode_part_str(part, part_format) for part, in response.results
    ]
예제 #11
0
    def get_node_connection(
        self,
        client_settings: ClickhouseClientSettings,
        node: ClickhouseNode,
    ) -> ClickhousePool:
        """
        Get a Clickhouse connection using the client settings provided. Reuse any
        connection to the same node with the same settings otherwise establish a new
        connection.
        """

        settings, timeout = client_settings.value
        cache_key = (node, client_settings)
        if cache_key not in self.__connection_cache:
            self.__connection_cache[cache_key] = ClickhousePool(
                node.host_name,
                node.port,
                self.__user,
                self.__password,
                self.__database,
                client_settings=settings,
                send_receive_timeout=timeout,
            )

        return self.__connection_cache[cache_key]
예제 #12
0
def perform_select_query(
    columns: Sequence[str],
    table: str,
    where: Optional[Dict[str, str]],
    limit: Optional[str],
    connection: ClickhousePool,
) -> Sequence[Any]:
    """ Performs a SELECT query, with optional WHERE and LIMIT clauses

    Arguments:
    columns -- a list of columns to be SELECTed
    table -- the name of the table, upon which query is being run
    where -- a dict of WHERE conditions, (str, str) key-value pairs
    limit -- LIMIT argument, passed in as str
    connection -- ClickHouse connection object for query execution
    """

    select_clause = "SELECT " + (", ".join(columns))
    from_clause = " FROM " + table
    where_clause = ""

    if where:
        where_elems = [(key + " = " + "'" + where[key] + "'") for key in where]
        where_clause = " WHERE " + (" AND ".join(where_elems))

    limit_clause = (" LIMIT " + limit) if limit else ""
    full_query = select_clause + from_clause + where_clause + limit_clause

    return connection.execute(full_query)
예제 #13
0
def optimize_partitions(
    clickhouse: ClickhousePool,
    database: str,
    table: str,
    parts: Sequence[util.Part],
    ignore_cutoff: bool,
    clickhouse_host: Optional[str] = None,
) -> None:
    query_template = """\
        OPTIMIZE TABLE %(database)s.%(table)s
        PARTITION %(partition)s FINAL
    """

    # Adding 10 minutes to the current time before finding the midnight time
    # to ensure this keeps working even if the system clock of the host that
    # starts the pod is slightly ahead of the system clock of the host running
    # the job. This prevents us from getting the wrong midnight.
    last_midnight = (datetime.now() + timedelta(minutes=10)).replace(
        hour=0, minute=0, second=0, microsecond=0)
    if not ignore_cutoff:
        cutoff_time: Optional[datetime] = (last_midnight +
                                           settings.OPTIMIZE_JOB_CUTOFF_TIME)
        logger.info("Cutoff time: %s", str(cutoff_time))
    else:
        cutoff_time = None
        logger.info("Ignoring cutoff time")

    for part in parts:
        if cutoff_time is not None and datetime.now() > cutoff_time:
            raise JobTimeoutException(
                "Optimize job is running past the cutoff time. Abandoning.")

        args = {
            "database": database,
            "table": table,
            "partition": part.name,
        }

        query = (query_template % args).strip()
        logger.info(f"Optimizing partition: {part.name}")
        start = time.time()
        clickhouse.execute(query)
        metrics.timing(
            "optimized_part",
            time.time() - start,
            tags=_get_metrics_tags(table, clickhouse_host),
        )
예제 #14
0
    def add_node(
        self,
        node_type: ClickhouseNodeType,
        storage_sets: Sequence[StorageSetKey],
        host_name: str,
        port: int,
        user: str,
        password: str,
        database: str,
    ) -> None:
        client_settings = ClickhouseClientSettings.MIGRATE.value
        clickhouse = ClickhousePool(
            host_name,
            port,
            user,
            password,
            database,
            client_settings=client_settings.settings,
            send_receive_timeout=client_settings.timeout,
        )

        migrations: List[Migration] = []

        for group in get_active_migration_groups():
            group_loader = get_group_loader(group)

            for migration_id in group_loader.get_migrations():
                migration = group_loader.load_migration(migration_id)
                migrations.append(migration)

        for migration in migrations:
            if isinstance(migration, ClickhouseNodeMigration):
                operations = (
                    migration.forwards_local()
                    if node_type == ClickhouseNodeType.LOCAL
                    else migration.forwards_dist()
                )

                for sql_op in operations:
                    if isinstance(sql_op, SqlOperation):
                        if sql_op._storage_set in storage_sets:
                            sql = sql_op.format_sql()
                            print(f"Executing {sql}")
                            clickhouse.execute(sql)
            elif isinstance(migration, CodeMigration):
                for python_op in migration.forwards_global():
                    python_op.execute_new_node(storage_sets)
예제 #15
0
    def test_messages(self):
        processor = GroupAssigneeProcessor('sentry_groupasignee')

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret.data == [self.PROCESSED]
        self.write_processed_records(ret.data)
        cp = ClickhousePool()
        ret = cp.execute("SELECT * FROM test_groupassignee_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            1359,  # group_id
            datetime(2019, 9, 19, 0, 17, 55),
            1,  # user_id
            None,  # team_id
        )

        update_msg = json.loads(self.UPDATE_MSG_NO_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.PROCESSED]

        # Tests an update with key change which becomes a two inserts:
        # one deletion and the insertion of the new row.
        update_msg = json.loads(self.UPDATE_MSG_WITH_KEY_CHANGE)
        ret = processor.process_message(update_msg, metadata)
        assert ret.data == [self.DELETED, self.PROCESSED_UPDATE]

        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret.data == [self.DELETED]
예제 #16
0
파일: optimize.py 프로젝트: getsentry/snuba
def optimize(
    *,
    clickhouse_host: Optional[str],
    clickhouse_port: Optional[int],
    storage_name: str,
    parallel: int,
    log_level: Optional[str] = None,
) -> None:
    from datetime import datetime

    from snuba.clickhouse.native import ClickhousePool
    from snuba.optimize import logger, run_optimize

    setup_logging(log_level)
    setup_sentry()

    storage: ReadableTableStorage

    storage_key = StorageKey(storage_name)
    storage = get_storage(storage_key)

    (clickhouse_user, clickhouse_password) = storage.get_cluster().get_credentials()

    today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)

    database = storage.get_cluster().get_database()

    # TODO: In distributed mode, optimize currently must be run once for each node
    # with the host and port of that node provided via the CLI. In the future,
    # passing this information won't be necessary, and running this command once
    # will ensure that optimize is performed on all of the individual nodes for
    # that cluster.
    if clickhouse_host and clickhouse_port:
        connection = ClickhousePool(
            clickhouse_host,
            clickhouse_port,
            clickhouse_user,
            clickhouse_password,
            database,
            send_receive_timeout=ClickhouseClientSettings.OPTIMIZE.value.timeout,
        )
    elif not storage.get_cluster().is_single_node():
        raise click.ClickException("Provide Clickhouse host and port for optimize")
    else:
        connection = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.OPTIMIZE
        )

    num_dropped = run_optimize(
        connection,
        storage,
        database,
        before=today,
        parallel=parallel,
        clickhouse_host=clickhouse_host,
    )
    logger.info("Optimized %s partitions on %s" % (num_dropped, clickhouse_host))
예제 #17
0
파일: base.py 프로젝트: forkkit/snuba
    def setup_method(self, test_method, dataset_name=None):
        assert settings.TESTING, "settings.TESTING is False, try `SNUBA_SETTINGS=test` or `make test`"

        self.database = 'default'
        self.dataset_name = dataset_name

        if self.dataset_name:
            self.dataset = get_dataset(self.dataset_name)
            self.clickhouse = ClickhousePool()

            for statement in self.dataset.get_dataset_schemas(
            ).get_drop_statements():
                self.clickhouse.execute(statement)

            for statement in self.dataset.get_dataset_schemas(
            ).get_create_statements():
                self.clickhouse.execute(statement)

            redis_client.flushdb()
예제 #18
0
파일: connect.py 프로젝트: getsentry/snuba
def check_clickhouse(clickhouse: ClickhousePool) -> None:
    ver = clickhouse.execute("SELECT version()").results[0][0]
    # The newer versions of altinity on arm add this to the version
    # and it breaks this check
    ver = ver.replace(".testingarm", "")
    ver = ver.replace(".altinitystable", "")
    if version.parse(ver) < version.parse(CLICKHOUSE_SERVER_MIN_VERSION):
        raise InvalidClickhouseVersion(
            f"Snuba requires Clickhouse version {CLICKHOUSE_SERVER_MIN_VERSION} ({clickhouse.host}:{clickhouse.port} - {ver})"
        )
예제 #19
0
 def test_bulk_load(self):
     row = GroupAssigneeRow.from_bulk({
         'project_id': '2',
         'group_id': '1359',
         'date_added': '2019-09-19 00:17:55+00',
         'user_id': '1',
         'team_id': '',
     })
     self.write_processed_records(row.to_clickhouse())
     cp = ClickhousePool()
     ret = cp.execute("SELECT * FROM test_groupassignee_local;")
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         1359,  # group_id
         datetime(2019, 9, 19, 0, 17, 55),
         1,  # user_id
         None,  # team_id
     )
예제 #20
0
    def test_messages(self):
        processor = GroupedMessageProcessor('sentry_groupedmessage')

        metadata = KafkaMessageMetadata(
            offset=42,
            partition=0,
        )

        begin_msg = json.loads(self.BEGIN_MSG)
        ret = processor.process_message(begin_msg, metadata)
        assert ret is None

        commit_msg = json.loads(self.COMMIT_MSG)
        ret = processor.process_message(commit_msg, metadata)
        assert ret is None

        insert_msg = json.loads(self.INSERT_MSG)
        ret = processor.process_message(insert_msg, metadata)
        assert ret[1] == self.PROCESSED
        self.write_processed_records(ret[1])
        cp = ClickhousePool()
        ret = cp.execute("SELECT * FROM test_groupedmessage_local;")
        assert ret[0] == (
            42,  # offset
            0,  # deleted
            2,  # project_id
            74,  # id
            0,  # status
            datetime(2019, 6, 19, 6, 46, 28),
            datetime(2019, 6, 19, 6, 45, 32),
            datetime(2019, 6, 19, 6, 45, 32),
            None,
        )

        update_msg = json.loads(self.UPDATE_MSG)
        ret = processor.process_message(update_msg, metadata)
        assert ret[1] == self.PROCESSED

        delete_msg = json.loads(self.DELETE_MSG)
        ret = processor.process_message(delete_msg, metadata)
        assert ret[1] == self.DELETED
예제 #21
0
파일: common.py 프로젝트: getsentry/snuba
def get_ro_node_connection(
    clickhouse_host: str,
    clickhouse_port: int,
    storage_name: str,
    client_settings: ClickhouseClientSettings,
) -> ClickhousePool:
    storage_key = None
    try:
        storage_key = StorageKey(storage_name)
    except ValueError:
        raise InvalidStorageError(
            f"storage {storage_name} is not a valid storage name",
            extra_data={"storage_name": storage_name},
        )

    key = f"{storage_key}-{clickhouse_host}"
    if key in NODE_CONNECTIONS:
        return NODE_CONNECTIONS[key]

    storage = get_storage(storage_key)
    cluster = storage.get_cluster()

    if not is_valid_node(clickhouse_host, clickhouse_port, cluster):
        raise InvalidNodeError(
            f"host {clickhouse_host} and port {clickhouse_port} are not valid",
            extra_data={"host": clickhouse_host, "port": clickhouse_port},
        )

    database = cluster.get_database()

    assert client_settings in {
        ClickhouseClientSettings.QUERY,
        ClickhouseClientSettings.TRACING,
    }, "admin can only use QUERY or TRACING ClickhouseClientSettings"

    if client_settings == ClickhouseClientSettings.QUERY:
        username = settings.CLICKHOUSE_READONLY_USER
        password = settings.CLICKHOUSE_READONLY_PASSWORD
    else:
        username = settings.CLICKHOUSE_TRACE_USER
        password = settings.CLICKHOUSE_TRACE_PASSWORD

    connection = ClickhousePool(
        clickhouse_host,
        clickhouse_port,
        username,
        password,
        database,
        max_pool_size=2,
        client_settings=client_settings.value.settings,
    )
    NODE_CONNECTIONS[key] = connection
    return connection
예제 #22
0
def test_fallback_logic() -> None:
    state.set_config("use_fallback_host_in_native_connection_pool", 1)

    network_failure_connection = mock.Mock()
    network_failure_connection.execute.side_effect = EOFError()

    verification_connection = mock.Mock()
    verification_connection.execute.return_value = []

    pool = ClickhousePool(CLUSTER_HOST, CLUSTER_PORT, "test", "test",
                          TEST_DB_NAME)

    # The execute method will try to reuse a single slot in the connection
    # pool but reestablish new connections with _create_conn if a connection
    # fails with a network-related error. It may be cleaner to move connection
    # negotation/establishment into another class for separation of concerns.
    with mock.patch.object(pool,
                           "_create_conn",
                           lambda x, y=False: network_failure_connection):
        pool.pool = queue.LifoQueue(1)
        pool.pool.put(network_failure_connection, block=False)
        pool.fallback_pool = queue.LifoQueue(1)
        pool.fallback_pool.put(verification_connection, block=False)
        pool.execute("SELECT something")

    assert (network_failure_connection.execute.call_count == 3
            ), "Expected three (failed) attempts with main connection pool"
    assert (
        verification_connection.execute.call_count == 1
    ), "Expected one (successful) attempt with fallback connection pool"
예제 #23
0
def optimize_partitions(
    clickhouse: ClickhousePool,
    database: str,
    table: str,
    parts: Sequence[util.Part],
) -> None:

    query_template = """\
        OPTIMIZE TABLE %(database)s.%(table)s
        PARTITION %(partition)s FINAL
    """

    for part in parts:
        args = {
            "database": database,
            "table": table,
            "partition": part.name,
        }

        query = (query_template % args).strip()
        logger.info(f"Optimizing partition: {part.name}")
        clickhouse.execute(query)
예제 #24
0
 def test_bulk_load(self):
     row = GroupedMessageRow.from_bulk({
         'project_id': '2',
         'id': '10',
         'status': '0',
         'last_seen': '2019-06-28 17:57:32+00',
         'first_seen': '2019-06-28 06:40:17+00',
         'active_at': '2019-06-28 06:40:17+00',
         'first_release_id': '26',
     })
     self.write_processed_records(row.to_clickhouse())
     cp = ClickhousePool()
     ret = cp.execute("SELECT * FROM test_groupedmessage_local;")
     assert ret[0] == (
         0,  # offset
         0,  # deleted
         2,  # project_id
         10,  # id
         0,  # status
         datetime(2019, 6, 28, 17, 57, 32),
         datetime(2019, 6, 28, 6, 40, 17),
         datetime(2019, 6, 28, 6, 40, 17),
         26,
     )
예제 #25
0
def drop_partitions(
    clickhouse: ClickhousePool,
    database: str,
    table: str,
    parts: Sequence[util.Part],
    dry_run: bool = True,
) -> None:
    query_template = """\
        ALTER TABLE %(database)s.%(table)s DROP PARTITION %(partition)s
    """

    for part in parts:
        args = {
            "database": database,
            "table": table,
            "partition": part.name,
        }

        query = (query_template % args).strip()
        if dry_run:
            logger.info("Dry run: " + query)
        else:
            logger.info("Dropping partition: " + query)
            clickhouse.execute(query)
예제 #26
0
        def run_query(
            connection: ClickhousePool,
            query: str,
            records_count: int,
            metrics: MetricsBackend,
        ) -> None:
            t = time.time()

            logger.debug("Executing replace query: %s" % query)
            connection.execute_robust(query)
            duration = int((time.time() - t) * 1000)

            logger.info("Replacing %s rows took %sms" %
                        (records_count, duration))
            metrics.timing(
                "replacements.count",
                records_count,
                tags={"host": connection.host},
            )
            metrics.timing(
                "replacements.duration",
                duration,
                tags={"host": connection.host},
            )
예제 #27
0
def get_active_partitions(
    clickhouse: ClickhousePool, database: str, table: str
) -> Sequence[util.Part]:
    response = clickhouse.execute(
        """
        SELECT DISTINCT partition
        FROM system.parts
        WHERE database = %(database)s
        AND table = %(table)s
        AND active = 1
        """,
        {"database": database, "table": table},
    )

    return [util.decode_part_str(part) for part, in response]
예제 #28
0
def test_cache_partition() -> None:
    pool = ClickhousePool("localhost", 9000, "", "", "")
    reader1 = NativeDriverReader(None, pool)
    reader2 = NativeDriverReader(None, pool)

    default_cache = _get_cache_partition(reader1)
    another_default_cache = _get_cache_partition(reader2)

    assert id(default_cache) == id(another_default_cache)

    reader3 = NativeDriverReader("non_default", pool)
    reader4 = NativeDriverReader("non_default", pool)
    nondefault_cache = _get_cache_partition(reader3)
    another_nondefault_cache = _get_cache_partition(reader4)

    assert id(nondefault_cache) == id(another_nondefault_cache)
    assert id(default_cache) != id(nondefault_cache)
예제 #29
0
파일: cleanup.py 프로젝트: fpacifici/snuba
def cleanup(
    *,
    clickhouse_host: Optional[str],
    clickhouse_port: Optional[int],
    dry_run: bool,
    database: str,
    storage_name: str,
    log_level: Optional[str] = None,
) -> None:
    """
    Deletes stale partitions for ClickHouse tables
    """

    setup_logging(log_level)

    from snuba.cleanup import run_cleanup, logger
    from snuba.clickhouse.native import ClickhousePool

    writable_storage = get_writable_storage(StorageKey(storage_name))

    (
        clickhouse_user,
        clickhouse_password,
    ) = writable_storage.get_cluster().get_credentials()

    table = writable_storage.get_table_writer().get_schema(
    ).get_local_table_name()

    if clickhouse_host and clickhouse_port and database:
        connection = ClickhousePool(
            clickhouse_host,
            clickhouse_port,
            clickhouse_user,
            clickhouse_password,
            database,
        )
    elif not writable_storage.get_cluster().is_single_node():
        raise click.ClickException(
            "Provide ClickHouse host and port for cleanup")
    else:
        connection = writable_storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.CLEANUP)

    num_dropped = run_cleanup(connection, database, table, dry_run=dry_run)
    logger.info("Dropped %s partitions on %s" % (num_dropped, clickhouse_host))
예제 #30
0
def cleanup(clickhouse_host, clickhouse_port, dry_run, database, dataset,
            log_level):
    """
    Deletes stale partitions for ClickHouse tables
    """

    from snuba.cleanup import run_cleanup, logger
    from snuba.clickhouse.native import ClickhousePool

    dataset = get_dataset(dataset)
    table = enforce_table_writer(dataset).get_schema().get_local_table_name()

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    clickhouse = ClickhousePool(clickhouse_host, clickhouse_port)
    num_dropped = run_cleanup(clickhouse, database, table, dry_run=dry_run)
    logger.info("Dropped %s partitions on %s" % (num_dropped, clickhouse_host))