def __get_event(self) -> InsertEvent: timestamp = datetime.utcnow() data = { "datetime": (timestamp - timedelta(seconds=2)).strftime( settings.PAYLOAD_DATETIME_FORMAT, ), "received": int(calendar.timegm( (timestamp - timedelta(seconds=1)).timetuple())), **deepcopy(raw_event), } unique = "%s:%s" % (str(data["project"]), data["id"]) primary_hash = md5(unique.encode("utf-8")).hexdigest() return InsertEvent({ "event_id": data["id"], "group_id": int(primary_hash[:16], 16), "primary_hash": primary_hash, "project_id": data["project"], "message": data["message"], "platform": data["platform"], "datetime": data["datetime"], "data": data, "organization_id": data["organization_id"], "retention_days": settings.DEFAULT_RETENTION_DAYS, })
def setup_method(self) -> None: self.project_id = 1 self.platforms = ["a", "b"] self.minutes = 20 self.dataset = get_dataset("events") self.entity_key = ENTITY_NAME_LOOKUP[self.dataset.get_default_entity()] self.base_time = datetime.utcnow().replace( minute=0, second=0, microsecond=0) - timedelta(minutes=self.minutes) events_storage = get_writable_storage(StorageKey.ERRORS) write_unprocessed_events( events_storage, [ InsertEvent({ "event_id": uuid.uuid4().hex, "group_id": tick, "primary_hash": uuid.uuid4().hex, "project_id": self.project_id, "message": "a message", "platform": self.platforms[tick % len(self.platforms)], "datetime": (self.base_time + timedelta(minutes=tick)).strftime( settings.PAYLOAD_DATETIME_FORMAT), "data": { "received": calendar.timegm((self.base_time + timedelta(minutes=tick)).timetuple()), }, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, }) for tick in range(self.minutes) ], )
def setup_method(self, test_method, dataset_name="events"): super().setup_method(test_method, dataset_name) self.project_id = 1 self.platforms = ["a", "b"] self.minutes = 20 self.base_time = datetime.utcnow().replace( minute=0, second=0, microsecond=0) - timedelta(minutes=self.minutes) self.write_unprocessed_events([ InsertEvent({ "event_id": uuid.uuid4().hex, "group_id": tick, "primary_hash": uuid.uuid4().hex, "project_id": self.project_id, "message": "a message", "platform": self.platforms[tick % len(self.platforms)], "datetime": (self.base_time + timedelta(minutes=tick)).strftime( settings.PAYLOAD_DATETIME_FORMAT), "data": { "received": calendar.timegm((self.base_time + timedelta(minutes=tick)).timetuple()), }, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, }) for tick in range(self.minutes) ])
def test_error_processor() -> None: received_timestamp = datetime.now() - timedelta(minutes=1) error_timestamp = received_timestamp - timedelta(minutes=1) trace_id = str(uuid.uuid4()) span_id = "deadbeef" error = ( 2, "insert", InsertEvent({ "organization_id": 1, "retention_days": 58, "event_id": "dcb9d002cac548c795d1c9adbfc68040", "group_id": 100, "project_id": 300688, "platform": "python", "message": "", "datetime": error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT), "primary_hash": "04233d08ac90cf6fc015b1be5932e7e2", "data": { "event_id": "dcb9d002cac548c795d1c9adbfc68040", "project_id": 300688, "release": None, "dist": None, "platform": "python", "message": "", "datetime": error_timestamp.strftime(PAYLOAD_DATETIME_FORMAT), "tags": [ ["handled", "no"], ["level", "error"], ["mechanism", "excepthook"], ["runtime", "CPython 3.7.6"], ["runtime.name", "CPython"], ["server_name", "snuba"], ["environment", "dev"], ["sentry:user", "this_is_me"], ["sentry:release", "4d23338017cdee67daf25f2c"], ], "user": { "username": "******", "ip_address": "127.0.0.1", "id": "still_me", "email": "*****@*****.**", "geo": { "country_code": "XY", "region": "fake_region", "city": "fake_city", }, }, "request": { "url": "http://127.0.0.1:/query", "headers": [ ["Accept-Encoding", "identity"], ["Content-Length", "398"], ["Host", "127.0.0.1:"], ["Referer", "tagstore.something"], ["Trace", "8fa73032d-1"], ], "data": "", "method": "POST", "env": { "SERVER_PORT": "1010", "SERVER_NAME": "snuba" }, }, "_relay_processed": True, "breadcrumbs": { "values": [ { "category": "snuba.utils.streams.batching", "level": "info", "timestamp": error_timestamp.timestamp(), "data": { "asctime": error_timestamp.strftime( PAYLOAD_DATETIME_FORMAT) }, "message": "New partitions assigned: {}", "type": "default", }, { "category": "snuba.utils.streams.batching", "level": "info", "timestamp": error_timestamp.timestamp(), "data": { "asctime": error_timestamp.strftime( PAYLOAD_DATETIME_FORMAT) }, "message": "Flushing ", "type": "default", }, { "category": "httplib", "timestamp": error_timestamp.timestamp(), "type": "http", "data": { "url": "http://127.0.0.1:8123/", "status_code": 500, "reason": "Internal Server Error", "method": "POST", }, "level": "info", }, ] }, "contexts": { "runtime": { "version": "3.7.6", "type": "runtime", "name": "CPython", "build": "3.7.6", }, "trace": { "trace_id": trace_id, "span_id": span_id }, }, "culprit": "snuba.clickhouse.http in write", "exception": { "values": [{ "stacktrace": { "frames": [ { "function": "<module>", "abs_path": "/usr/local/bin/snuba", "pre_context": [ "from pkg_resources import load_entry_point", "", "if __name__ == '__main__':", " sys.argv[0] = re.sub(r'(-script\\.pyw?|\\.exe)?$', '', sys.argv[0])", " sys.exit(", ], "post_context": [" )"], "vars": { "__spec__": "None", "__builtins__": "<module 'builtins' (built-in)>", "__annotations__": {}, "__file__": "'/usr/local/bin/snuba'", "__loader__": "<_frozen_importlib_external.SourceFileLoader object at 0x7fbbc3a36ed0>", "__requires__": "'snuba'", "__cached__": "None", "__name__": "'__main__'", "__package__": "None", "__doc__": "None", }, "module": "__main__", "filename": "snuba", "lineno": 11, "in_app": False, "data": { "orig_in_app": 1 }, "context_line": " load_entry_point('snuba', 'console_scripts', 'snuba')()", }, ] }, "type": "ClickHouseError", "module": "snuba.clickhouse.http", "value": "[171] DB::Exception: Block structure mismatch", "mechanism": { "type": "excepthook", "handled": False }, }] }, "extra": { "sys.argv": [ "/usr/local/bin/snuba", "consumer", "--dataset", "transactions", ] }, "fingerprint": ["{{ default }}"], "hashes": ["c8b21c571231e989060b9110a2ade7d3"], "hierarchical_hashes": [ "04233d08ac90cf6fc015b1be5932e7e3", "04233d08ac90cf6fc015b1be5932e7e4", ], "key_id": "537125", "level": "error", "location": "snuba/clickhouse/http.py", "logger": "", "metadata": { "function": "write", "type": "ClickHouseError", "value": "[171] DB::Exception: Block structure mismatch", "filename": "snuba/something.py", }, "modules": { "cffi": "1.13.2", "ipython-genutils": "0.2.0", "isodate": "0.6.0", }, "received": received_timestamp.timestamp(), "sdk": { "version": "0.0.0.0.1", "name": "sentry.python", "packages": [{ "version": "0.0.0.0.1", "name": "pypi:sentry-sdk" }], "integrations": [ "argv", "atexit", "dedupe", "excepthook", "logging", "modules", "stdlib", "threading", ], }, "timestamp": error_timestamp.timestamp(), "title": "ClickHouseError: [171] DB::Exception: Block structure mismatch", "type": "error", "version": "7", }, }), None, ) expected_result = { "project_id": 300688, "timestamp": error_timestamp, "event_id": str(UUID("dcb9d002cac548c795d1c9adbfc68040")), "platform": "python", "dist": None, "environment": "dev", "release": "4d23338017cdee67daf25f2c", "ip_address_v4": "127.0.0.1", "user": "******", "user_name": "me", "user_id": "still_me", "user_email": "*****@*****.**", "sdk_name": "sentry.python", "sdk_version": "0.0.0.0.1", "http_method": "POST", "http_referer": "tagstore.something", "trace_id": trace_id, "span_id": int(span_id, 16), "tags.key": [ "environment", "handled", "level", "mechanism", "runtime", "runtime.name", "sentry:release", "sentry:user", "server_name", ], "tags.value": [ "dev", "no", "error", "excepthook", "CPython 3.7.6", "CPython", "4d23338017cdee67daf25f2c", "this_is_me", "snuba", ], "contexts.key": [ "runtime.version", "runtime.name", "runtime.build", "trace.trace_id", "trace.span_id", "geo.country_code", "geo.region", "geo.city", ], "contexts.value": [ "3.7.6", "CPython", "3.7.6", trace_id, span_id, "XY", "fake_region", "fake_city", ], "partition": 1, "offset": 2, "message_timestamp": datetime(1970, 1, 1), "retention_days": 90, "deleted": 0, "group_id": 100, "primary_hash": str(UUID("04233d08ac90cf6fc015b1be5932e7e2")), "hierarchical_hashes": [ str(UUID("04233d08ac90cf6fc015b1be5932e7e3")), str(UUID("04233d08ac90cf6fc015b1be5932e7e4")), ], "received": received_timestamp.astimezone(pytz.utc).replace(tzinfo=None, microsecond=0), "message": "", "title": "ClickHouseError: [171] DB::Exception: Block structure mismatch", "culprit": "snuba.clickhouse.http in write", "level": "error", "location": "snuba/clickhouse/http.py", "version": "7", "type": "error", "exception_stacks.type": ["ClickHouseError"], "exception_stacks.value": ["[171] DB::Exception: Block structure mismatch"], "exception_stacks.mechanism_type": ["excepthook"], "exception_stacks.mechanism_handled": [False], "exception_frames.abs_path": ["/usr/local/bin/snuba"], "exception_frames.colno": [None], "exception_frames.filename": ["snuba"], "exception_frames.lineno": [11], "exception_frames.in_app": [False], "exception_frames.package": [None], "exception_frames.module": ["__main__"], "exception_frames.function": ["<module>"], "exception_frames.stack_level": [0], "sdk_integrations": [ "argv", "atexit", "dedupe", "excepthook", "logging", "modules", "stdlib", "threading", ], "modules.name": ["cffi", "ipython-genutils", "isodate"], "modules.version": ["1.13.2", "0.2.0", "0.6.0"], "transaction_name": "", } meta = KafkaMessageMetadata(offset=2, partition=1, timestamp=datetime(1970, 1, 1)) processor = ErrorsProcessor({ "environment": "environment", "sentry:release": "release", "sentry:dist": "dist", "sentry:user": "******", "transaction": "transaction_name", "level": "level", }) processed_message = processor.process_message(error, meta) expected_message = InsertBatch([expected_result], None) # assert on the rows first so we get a nice diff from pytest assert processed_message.rows[0] == expected_message.rows[0] assert processed_message == expected_message
def test_transform_column_names() -> None: """ Runs a simple query containing selected expressions names that do not match the aliases of the expressions themselves. It verifies that the names of the columns in the result correspond to the SelectedExpression names and not to the expression aliases (which are supposed to be internal). """ events_storage = get_entity(EntityKey.EVENTS).get_writable_storage() event_id = uuid.uuid4().hex event_date = datetime.utcnow() write_unprocessed_events( events_storage, [ InsertEvent( { "event_id": event_id, "group_id": 10, "primary_hash": uuid.uuid4().hex, "project_id": 1, "message": "a message", "platform": "python", "datetime": event_date.strftime(settings.PAYLOAD_DATETIME_FORMAT), "data": {"received": time.time()}, "organization_id": 1, "retention_days": settings.DEFAULT_RETENTION_DAYS, } ) ], ) query = Query( Entity(EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model()), selected_columns=[ # The selected expression names are those provided by the # user in the query and those the user expect in the response. # Aliases will be internal names to prevent shadowing. SelectedExpression("event_id", Column("_snuba_event_id", None, "event_id")), SelectedExpression( "message", FunctionCall( "_snuba_message", "ifNull", (Column(None, None, "message"), Literal(None, "")), ), ), ], ) query_settings = HTTPRequestSettings() apply_query_extensions( query, { "timeseries": { "from_date": (event_date - timedelta(minutes=5)).strftime( settings.PAYLOAD_DATETIME_FORMAT ), "to_date": (event_date + timedelta(minutes=1)).strftime( settings.PAYLOAD_DATETIME_FORMAT ), "granularity": 3600, }, "project": {"project": [1]}, }, query_settings, ) dataset = get_dataset("events") timer = Timer("test") result = parse_and_run_query( dataset, Request( id="asd", body={}, query=query, settings=query_settings, referrer="asd", ), timer, ) data = result.result["data"] assert data == [{"event_id": event_id, "message": "a message"}] meta = result.result["meta"] assert meta == [ MetaColumn(name="event_id", type="String"), MetaColumn(name="message", type="String"), ]
def setup_method(self, test_method, dataset_name="events"): super(BaseEventsTest, self).setup_method(test_method, dataset_name) self.table = enforce_table_writer(self.dataset).get_schema().get_table_name() self.event = InsertEvent(get_raw_event())