Esempio n. 1
0
def optimize(
    *,
    clickhouse_host: str,
    clickhouse_port: int,
    database: str,
    dataset_name: str,
    timeout: int,
    log_level: Optional[str] = None,
) -> None:
    from datetime import datetime
    from snuba.clickhouse.native import ClickhousePool
    from snuba.optimize import run_optimize, logger

    setup_logging(log_level)

    dataset = get_dataset(dataset_name)
    table = enforce_table_writer(dataset).get_schema().get_local_table_name()

    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    clickhouse = ClickhousePool(clickhouse_host,
                                clickhouse_port,
                                send_receive_timeout=timeout)
    num_dropped = run_optimize(clickhouse, database, table, before=today)
    logger.info("Optimized %s partitions on %s" %
                (num_dropped, clickhouse_host))
Esempio n. 2
0
def optimize(
    *,
    clickhouse_host: Optional[str],
    clickhouse_port: Optional[int],
    storage_name: str,
    parallel: int,
    log_level: Optional[str] = None,
) -> None:
    from datetime import datetime

    from snuba.clickhouse.native import ClickhousePool
    from snuba.optimize import logger, run_optimize

    setup_logging(log_level)
    setup_sentry()

    storage: ReadableTableStorage

    storage_key = StorageKey(storage_name)
    storage = get_storage(storage_key)

    (clickhouse_user, clickhouse_password) = storage.get_cluster().get_credentials()

    today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)

    database = storage.get_cluster().get_database()

    # TODO: In distributed mode, optimize currently must be run once for each node
    # with the host and port of that node provided via the CLI. In the future,
    # passing this information won't be necessary, and running this command once
    # will ensure that optimize is performed on all of the individual nodes for
    # that cluster.
    if clickhouse_host and clickhouse_port:
        connection = ClickhousePool(
            clickhouse_host,
            clickhouse_port,
            clickhouse_user,
            clickhouse_password,
            database,
            send_receive_timeout=ClickhouseClientSettings.OPTIMIZE.value.timeout,
        )
    elif not storage.get_cluster().is_single_node():
        raise click.ClickException("Provide Clickhouse host and port for optimize")
    else:
        connection = storage.get_cluster().get_query_connection(
            ClickhouseClientSettings.OPTIMIZE
        )

    num_dropped = run_optimize(
        connection,
        storage,
        database,
        before=today,
        parallel=parallel,
        clickhouse_host=clickhouse_host,
    )
    logger.info("Optimized %s partitions on %s" % (num_dropped, clickhouse_host))
Esempio n. 3
0
def optimize(clickhouse_server, database, table, timeout, log_level):
    from datetime import datetime
    from snuba.clickhouse import ClickhousePool
    from snuba.optimize import run_optimize, logger

    logging.basicConfig(level=getattr(logging, log_level.upper()), format='%(asctime)s %(message)s')

    if not clickhouse_server:
        logger.error("Must provide at least one Clickhouse server.")
        sys.exit(1)

    today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
    for server in clickhouse_server:
        clickhouse = ClickhousePool(
            server.split(':')[0], port=int(server.split(':')[1]), send_receive_timeout=timeout
        )
        num_dropped = run_optimize(clickhouse, database, table, before=today)
        logger.info("Optimized %s partitions on %s" % (num_dropped, server))
Esempio n. 4
0
def optimize(clickhouse_host, clickhouse_port, database, dataset, timeout,
             log_level):
    from datetime import datetime
    from snuba.clickhouse.native import ClickhousePool
    from snuba.optimize import run_optimize, logger

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    dataset = get_dataset(dataset)
    table = enforce_table_writer(dataset).get_schema().get_local_table_name()

    today = datetime.utcnow().replace(hour=0,
                                      minute=0,
                                      second=0,
                                      microsecond=0)
    clickhouse = ClickhousePool(clickhouse_host,
                                clickhouse_port,
                                send_receive_timeout=timeout)
    num_dropped = run_optimize(clickhouse, database, table, before=today)
    logger.info("Optimized %s partitions on %s" %
                (num_dropped, clickhouse_host))