Ejemplo n.º 1
0
    def setup_method(self, test_method):
        assert settings.TESTING, "settings.TESTING is False, try `SNUBA_SETTINGS=test` or `make test`"

        from fixtures import raw_event

        timestamp = datetime.utcnow()
        raw_event['datetime'] = (
            timestamp - timedelta(seconds=2)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        raw_event['received'] = int(
            calendar.timegm((timestamp - timedelta(seconds=1)).timetuple()))
        self.event = self.wrap_raw_event(raw_event)

        self.database = 'default'
        self.table = settings.CLICKHOUSE_TABLE

        self.clickhouse = ClickhousePool()

        self.clickhouse.execute("DROP TABLE IF EXISTS %s" % self.table)
        self.clickhouse.execute(
            get_table_definition(
                name=self.table,
                engine=get_test_engine(),
            ))

        redis_client.flushdb()
Ejemplo n.º 2
0
def bootstrap(bootstrap_server, force):
    """
    Warning: Not intended to be used in production yet.
    """
    if not force:
        raise click.ClickException('Must use --force to run')

    from confluent_kafka.admin import AdminClient, NewTopic

    client = AdminClient({
        'bootstrap.servers': ','.join(bootstrap_server),
        'socket.timeout.ms': 1000,
    })

    topics = [
        NewTopic(o.pop('topic'), **o) for o in settings.KAFKA_TOPICS.values()
    ]

    for topic, future in client.create_topics(topics).items():
        try:
            future.result()
            print("Topic %s created" % topic)
        except Exception as e:
            print("Failed to create topic %s: %s" % (topic, e))

    from snuba.clickhouse import ClickhousePool, get_table_definition, get_test_engine

    # Need to better figure out if we are configured to use replicated
    # tables or distributed tables, etc.
    ClickhousePool().execute(
        get_table_definition(
            settings.DEFAULT_LOCAL_TABLE,
            get_test_engine(),
        ))
Ejemplo n.º 3
0
def replacer(replacements_topic, consumer_group, bootstrap_server,
             clickhouse_server, distributed_table_name, max_batch_size,
             max_batch_time_ms, auto_offset_reset, queued_max_messages_kbytes,
             queued_min_messages, log_level, dogstatsd_host, dogstatsd_port):

    import sentry_sdk
    from snuba import util
    from snuba.clickhouse import ClickhousePool
    from batching_kafka_consumer import BatchingKafkaConsumer
    from snuba.replacer import ReplacerWorker

    sentry_sdk.init(dsn=settings.SENTRY_DSN)

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')
    metrics = util.create_metrics(dogstatsd_host,
                                  dogstatsd_port,
                                  'snuba.replacer',
                                  tags=["group:%s" % consumer_group])

    client_settings = {
        # Replacing existing rows requires reconstructing the entire tuple for each
        # event (via a SELECT), which is a Hard Thing (TM) for columnstores to do. With
        # the default settings it's common for ClickHouse to go over the default max_memory_usage
        # of 10GB per query. Lowering the max_block_size reduces memory usage, and increasing the
        # max_memory_usage gives the query more breathing room.
        'max_block_size': settings.REPLACER_MAX_BLOCK_SIZE,
        'max_memory_usage': settings.REPLACER_MAX_MEMORY_USAGE,
        # Don't use up production cache for the count() queries.
        'use_uncompressed_cache': 0,
    }

    clickhouse = ClickhousePool(
        host=clickhouse_server.split(':')[0],
        port=int(clickhouse_server.split(':')[1]),
        client_settings=client_settings,
    )

    replacer = BatchingKafkaConsumer(
        replacements_topic,
        worker=ReplacerWorker(clickhouse,
                              distributed_table_name,
                              metrics=metrics),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        bootstrap_servers=bootstrap_server,
        group_id=consumer_group,
        producer=None,
        commit_log_topic=None,
        auto_offset_reset=auto_offset_reset,
    )

    def handler(signum, frame):
        replacer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)

    replacer.run()
Ejemplo n.º 4
0
def consumer(raw_events_topic, replacements_topic, commit_log_topic, consumer_group,
             bootstrap_server, clickhouse_server, distributed_table_name, max_batch_size, max_batch_time_ms,
             auto_offset_reset, queued_max_messages_kbytes, queued_min_messages, log_level,
             dogstatsd_host, dogstatsd_port):

    import sentry_sdk
    from snuba import util
    from snuba.clickhouse import ClickhousePool
    from batching_kafka_consumer import BatchingKafkaConsumer
    from snuba.consumer import ConsumerWorker

    sentry_sdk.init(dsn=settings.SENTRY_DSN)

    logging.basicConfig(level=getattr(logging, log_level.upper()), format='%(asctime)s %(message)s')
    metrics = util.create_metrics(
        dogstatsd_host, dogstatsd_port, 'snuba.consumer', tags=["group:%s" % consumer_group]
    )

    clickhouse = ClickhousePool(
        host=clickhouse_server.split(':')[0],
        port=int(clickhouse_server.split(':')[1]),
        client_settings={
            'load_balancing': 'in_order',
            'insert_distributed_sync': True,
        },
        metrics=metrics
    )

    producer = Producer({
        'bootstrap.servers': ','.join(bootstrap_server),
        'partitioner': 'consistent',
        'message.max.bytes': 50000000,  # 50MB, default is 1MB
    })

    consumer = BatchingKafkaConsumer(
        raw_events_topic,
        worker=ConsumerWorker(
            clickhouse, distributed_table_name,
            producer=producer, replacements_topic=replacements_topic, metrics=metrics
        ),
        max_batch_size=max_batch_size,
        max_batch_time=max_batch_time_ms,
        metrics=metrics,
        bootstrap_servers=bootstrap_server,
        group_id=consumer_group,
        producer=producer,
        commit_log_topic=commit_log_topic,
        auto_offset_reset=auto_offset_reset,
    )

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)

    consumer.run()
Ejemplo n.º 5
0
def cleanup(clickhouse_server, dry_run, database, table, log_level):
    from snuba.cleanup import run_cleanup, logger
    from snuba.clickhouse import ClickhousePool

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    if not clickhouse_server:
        logger.error("Must provide at least one Clickhouse server.")
        sys.exit(1)

    for server in clickhouse_server:
        clickhouse = ClickhousePool(server.split(':')[0],
                                    port=int(server.split(':')[1]))
        num_dropped = run_cleanup(clickhouse, database, table, dry_run=dry_run)
        logger.info("Dropped %s partitions on %s" % (num_dropped, server))
Ejemplo n.º 6
0
def optimize(clickhouse_server, database, table, timeout, log_level):
    from datetime import datetime
    from snuba.clickhouse import ClickhousePool
    from snuba.optimize import run_optimize, logger

    logging.basicConfig(level=getattr(logging, log_level.upper()), format='%(asctime)s %(message)s')

    if not clickhouse_server:
        logger.error("Must provide at least one Clickhouse server.")
        sys.exit(1)

    today = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
    for server in clickhouse_server:
        clickhouse = ClickhousePool(
            server.split(':')[0], port=int(server.split(':')[1]), send_receive_timeout=timeout
        )
        num_dropped = run_optimize(clickhouse, database, table, before=today)
        logger.info("Optimized %s partitions on %s" % (num_dropped, server))
Ejemplo n.º 7
0
def perf(events_file, repeat, profile_process, profile_write,
         clickhouse_server, table_name, log_level):
    from snuba.clickhouse import ClickhousePool
    from snuba.perf import run, logger

    logging.basicConfig(level=getattr(logging, log_level.upper()),
                        format='%(asctime)s %(message)s')

    if settings.CLICKHOUSE_TABLE != 'dev':
        logger.error(
            "The migration tool is only intended for local development environment."
        )
        sys.exit(1)

    clickhouse = ClickhousePool(clickhouse_server.split(':')[0],
                                port=int(clickhouse_server.split(':')[1]))
    run(events_file,
        clickhouse,
        table_name,
        repeat=repeat,
        profile_process=profile_process,
        profile_write=profile_write)
Ejemplo n.º 8
0
 def test_reconnect(self, FakeClient):
     # If the connection NetworkErrors a first time, make sure we call it a second time.
     FakeClient.return_value.execute.side_effect = [errors.NetworkError, '{"data": "to my face"}']
     cp = ClickhousePool()
     cp.execute("SHOW TABLES")
     assert FakeClient.return_value.execute.mock_calls == [call("SHOW TABLES"), call("SHOW TABLES")]
Ejemplo n.º 9
0
from flask import Flask, render_template, request
from markdown import markdown
import sentry_sdk
from sentry_sdk.integrations.flask import FlaskIntegration
import simplejson as json

from snuba import schemas, settings, state, util
from snuba.clickhouse import ClickhousePool
from snuba.replacer import get_projects_query_flags
from snuba.split import split_query


logger = logging.getLogger('snuba.api')
logging.basicConfig(level=getattr(logging, settings.LOG_LEVEL.upper()), format='%(asctime)s %(message)s')

clickhouse_rw = ClickhousePool()
clickhouse_ro = ClickhousePool(client_settings={
    'readonly': True,
})


try:
    import uwsgi
except ImportError:
    def check_down_file_exists():
        return False
else:
    def check_down_file_exists():
        try:
            return os.stat('/tmp/snuba.down').st_mtime > uwsgi.started_on
        except OSError:
Ejemplo n.º 10
0
class BaseTest(object):
    def setup_method(self, test_method):
        assert settings.TESTING, "settings.TESTING is False, try `SNUBA_SETTINGS=test` or `make test`"

        from fixtures import raw_event

        timestamp = datetime.utcnow()
        raw_event['datetime'] = (
            timestamp - timedelta(seconds=2)).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        raw_event['received'] = int(
            calendar.timegm((timestamp - timedelta(seconds=1)).timetuple()))
        self.event = self.wrap_raw_event(raw_event)

        self.database = 'default'
        self.table = settings.CLICKHOUSE_TABLE

        self.clickhouse = ClickhousePool()

        self.clickhouse.execute("DROP TABLE IF EXISTS %s" % self.table)
        self.clickhouse.execute(
            get_table_definition(
                name=self.table,
                engine=get_test_engine(),
            ))

        redis_client.flushdb()

    def teardown_method(self, test_method):
        self.clickhouse.execute("DROP TABLE IF EXISTS %s" % self.table)

        redis_client.flushdb()

    def create_event_for_date(self,
                              dt,
                              retention_days=settings.DEFAULT_RETENTION_DAYS):
        event = {
            'event_id': uuid.uuid4().hex,
            'project_id': 1,
            'group_id': 1,
            'deleted': 0,
        }
        event['timestamp'] = dt
        event['retention_days'] = retention_days
        return event

    def wrap_raw_event(self, event):
        "Wrap a raw event like the Sentry codebase does before sending to Kafka."

        unique = "%s:%s" % (str(event['project']), event['id'])
        primary_hash = md5(unique.encode('utf-8')).hexdigest()

        return {
            'event_id': event['id'],
            'group_id': int(primary_hash[:16], 16),
            'primary_hash': primary_hash,
            'project_id': event['project'],
            'message': event['message'],
            'platform': event['platform'],
            'datetime': event['datetime'],
            'data': event
        }

    def write_raw_events(self, events):
        if not isinstance(events, (list, tuple)):
            events = [events]

        out = []
        for event in events:
            if 'primary_hash' not in event:
                event = self.wrap_raw_event(event)
            _, processed = process_message(event)
            out.append(processed)

        return self.write_processed_events(out)

    def write_processed_events(self, events):
        if not isinstance(events, (list, tuple)):
            events = [events]

        rows = []
        for event in events:
            rows.append(row_from_processed_event(event))

        return self.write_rows(rows)

    def write_rows(self, rows):
        if not isinstance(rows, (list, tuple)):
            rows = [rows]

        write_rows(self.clickhouse,
                   table=self.table,
                   rows=rows,
                   types_check=True)