예제 #1
0
def generate_transactions() -> None:
    from datetime import datetime

    table_writer = get_writable_storage(
        StorageKey.TRANSACTIONS).get_table_writer()

    rows = []

    for i in range(5):
        raw_transaction = get_raw_transaction()
        # Older versions of this table did not have measurements
        del raw_transaction["data"]["measurements"]

        processed = (
            table_writer.get_stream_loader().get_processor().process_message(
                (2, "insert", raw_transaction),
                KafkaMessageMetadata(0, 0, datetime.utcnow()),
            ))
        rows.extend(processed.rows)

    BatchWriterEncoderWrapper(
        table_writer.get_batch_writer(metrics=DummyMetricsBackend(
            strict=True)),
        JSONRowEncoder(),
    ).write(rows)
예제 #2
0
    def write(*, dataset: Dataset) -> RespTuple:
        from snuba.processor import InsertBatch

        rows: MutableSequence[WriterTableRow] = []
        offset_base = int(round(time.time() * 1000))
        for index, message in enumerate(json.loads(http_request.data)):
            offset = offset_base + index
            processed_message = (
                enforce_table_writer(dataset)
                .get_stream_loader()
                .get_processor()
                .process_message(
                    message,
                    KafkaMessageMetadata(
                        offset=offset, partition=0, timestamp=datetime.utcnow()
                    ),
                )
            )
            if processed_message:
                assert isinstance(processed_message, InsertBatch)
                rows.extend(processed_message.rows)

        BatchWriterEncoderWrapper(
            enforce_table_writer(dataset).get_batch_writer(metrics), JSONRowEncoder(),
        ).write(rows)

        return ("ok", 200, {"Content-Type": "text/plain"})
예제 #3
0
파일: helpers.py 프로젝트: getsentry/snuba
def write_processed_messages(storage: WritableStorage,
                             messages: Sequence[ProcessedMessage]) -> None:
    rows: MutableSequence[WriterTableRow] = []
    for message in messages:
        assert isinstance(message, InsertBatch)
        rows.extend(message.rows)

    BatchWriterEncoderWrapper(
        storage.get_table_writer().get_batch_writer(
            metrics=DummyMetricsBackend(strict=True)),
        JSONRowEncoder(),
    ).write(rows)
예제 #4
0
    def __init__(
        self,
        storage: WritableTableStorage,
        metrics: MetricsBackend,
        producer: Optional[ConfluentKafkaProducer] = None,
        replacements_topic: Optional[Topic] = None,
    ) -> None:
        self.__storage = storage
        self.producer = producer
        self.replacements_topic = replacements_topic
        self.metrics = metrics
        table_writer = storage.get_table_writer()
        self.__writer = BatchWriterEncoderWrapper(
            table_writer.get_batch_writer(metrics, {
                "load_balancing": "in_order",
                "insert_distributed_sync": 1
            }),
            JSONRowEncoder(),
        )

        self.__processor: MessageProcessor
        self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()
예제 #5
0
class ConsumerWorker(AbstractBatchWorker[KafkaPayload, ProcessedMessage]):
    def __init__(
        self,
        storage: WritableTableStorage,
        metrics: MetricsBackend,
        producer: Optional[ConfluentKafkaProducer] = None,
        replacements_topic: Optional[Topic] = None,
    ) -> None:
        self.__storage = storage
        self.producer = producer
        self.replacements_topic = replacements_topic
        self.metrics = metrics
        table_writer = storage.get_table_writer()
        self.__writer = BatchWriterEncoderWrapper(
            table_writer.get_batch_writer(metrics, {
                "load_balancing": "in_order",
                "insert_distributed_sync": 1
            }),
            JSONRowEncoder(),
        )

        self.__processor: MessageProcessor
        self.__pre_filter = table_writer.get_stream_loader().get_pre_filter()

    def _get_processor(self) -> MessageProcessor:
        try:
            return self.__processor
        except AttributeError:
            self.__processor = (self.__storage.get_table_writer().
                                get_stream_loader().get_processor())
            return self.__processor

    def process_message(
            self,
            message: Message[KafkaPayload]) -> Optional[ProcessedMessage]:

        if self.__pre_filter and self.__pre_filter.should_drop(message):
            return None

        return self._get_processor().process_message(
            rapidjson.loads(message.payload.value),
            KafkaMessageMetadata(
                offset=message.offset,
                partition=message.partition.index,
                timestamp=message.timestamp,
            ),
        )

    def delivery_callback(self, error, message):
        if error is not None:
            # errors are KafkaError objects and inherit from BaseException
            raise error

    def flush_batch(self, batch: Sequence[ProcessedMessage]):
        """First write out all new INSERTs as a single batch, then reproduce any
        event replacements such as deletions, merges and unmerges."""
        inserts: MutableSequence[WriterTableRow] = []
        replacements: MutableSequence[ReplacementBatch] = []

        for item in batch:
            if isinstance(item, InsertBatch):
                inserts.extend(item.rows)
            elif isinstance(item, ReplacementBatch):
                replacements.append(item)
            else:
                raise TypeError(f"unexpected type: {type(item)!r}")

        if inserts:
            self.__writer.write(inserts)

            self.metrics.timing("inserts", len(inserts))

        if replacements:
            for replacement in replacements:
                key = replacement.key.encode("utf-8")
                for value in replacement.values:
                    self.producer.produce(
                        self.replacements_topic.name,
                        key=key,
                        value=rapidjson.dumps(value).encode("utf-8"),
                        on_delivery=self.delivery_callback,
                    )

            self.producer.flush()
예제 #6
0
def generate_transactions(count: int) -> None:
    """
    Generate a deterministic set of events across a time range.
    """
    import calendar
    import pytz
    import uuid
    from datetime import datetime, timedelta

    table_writer = get_writable_storage(
        StorageKey.TRANSACTIONS).get_table_writer()

    rows = []

    base_time = datetime.utcnow().replace(
        minute=0, second=0, microsecond=0,
        tzinfo=pytz.utc) - timedelta(minutes=count)

    for tick in range(count):

        trace_id = "7400045b25c443b885914600aa83ad04"
        span_id = "8841662216cc598b"
        processed = (
            table_writer.get_stream_loader().get_processor().process_message(
                (
                    2,
                    "insert",
                    {
                        "project_id":
                        1,
                        "event_id":
                        uuid.uuid4().hex,
                        "deleted":
                        0,
                        "datetime":
                        (base_time + timedelta(minutes=tick)).isoformat(),
                        "platform":
                        "javascript",
                        "data": {
                            # Project N sends every Nth (mod len(hashes)) hash (and platform)
                            "received":
                            calendar.timegm(
                                (base_time +
                                 timedelta(minutes=tick)).timetuple()),
                            "type":
                            "transaction",
                            "transaction":
                            f"/api/do_things/{count}",
                            # XXX(dcramer): would be nice to document why these have to be naive
                            "start_timestamp":
                            datetime.timestamp(
                                (base_time + timedelta(minutes=tick)).replace(
                                    tzinfo=None)),
                            "timestamp":
                            datetime.timestamp(
                                (base_time +
                                 timedelta(minutes=tick, seconds=1)).replace(
                                     tzinfo=None)),
                            "contexts": {
                                "trace": {
                                    "trace_id": trace_id,
                                    "span_id": span_id,
                                    "op": "http",
                                    "status": "0",
                                },
                            },
                            "request": {
                                "url":
                                "http://127.0.0.1:/query",
                                "headers": [
                                    ["Accept-Encoding", "identity"],
                                    ["Content-Length", "398"],
                                    ["Host", "127.0.0.1:"],
                                    ["Referer", "tagstore.something"],
                                    ["Trace", "8fa73032d-1"],
                                ],
                                "data":
                                "",
                                "method":
                                "POST",
                                "env": {
                                    "SERVER_PORT": "1010",
                                    "SERVER_NAME": "snuba"
                                },
                            },
                            "spans": [{
                                "op":
                                "db",
                                "trace_id":
                                trace_id,
                                "span_id":
                                span_id + "1",
                                "parent_span_id":
                                None,
                                "same_process_as_parent":
                                True,
                                "description":
                                "SELECT * FROM users",
                                "data": {},
                                "timestamp":
                                calendar.timegm(
                                    (base_time +
                                     timedelta(minutes=tick)).timetuple()),
                            }],
                        },
                    },
                ),
                KafkaMessageMetadata(0, 0, base_time),
            ))
        rows.extend(processed.rows)

    BatchWriterEncoderWrapper(
        table_writer.get_batch_writer(metrics=DummyMetricsBackend(
            strict=True)),
        JSONRowEncoder(),
    ).write(rows)
예제 #7
0
파일: base.py 프로젝트: cafebazaar/snuba
 def write_rows(self, rows: Sequence[WriterTableRow]) -> None:
     BatchWriterEncoderWrapper(
         enforce_table_writer(self.dataset).get_batch_writer(
             metrics=DummyMetricsBackend(strict=True)),
         JSONRowEncoder(),
     ).write(rows)