def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() # Set up the times self.now = datetime.now(pytz.utc) self.start_time = self.now - timedelta(days=7) self.one_day_later = self.start_time + timedelta(days=1) self.day_before_start_time = self.start_time - timedelta(days=1)
def __init__(self, **options): self.backends = { 'dummy': DummyTSDB(), 'redis': RedisTSDB(**options.pop('redis', {})), 'snuba': SnubaTSDB(**options.pop('snuba', {})), } super(RedisSnubaTSDB, self).__init__(**options)
def __init__(self, **options): self.backends = { "dummy": DummyTSDB(), "redis": RedisTSDB(**options.pop("redis", {})), "snuba": SnubaTSDB(**options.pop("snuba", {})), } super(RedisSnubaTSDB, self).__init__(**options)
def __init__(self, switchover_timestamp=None, **options): """ A TSDB backend that uses the Snuba outcomes and events datasets as far as possible instead of reading/writing to redis. Reading will trigger a Snuba query, while writing is a noop as Snuba reads from outcomes. Note: Using this backend requires you to start Snuba outcomes consumers (not to be confused with the outcomes consumers in Sentry itself). :param switchover_timestamp: When set, only start reading from snuba after this timestamp (as returned by `time.time()`). When this timestamp has not been reached yet, this backend just degrades to Redis for *all* keys. The default `None` will start reading from Snuba immediately and is equivalent to setting a past timestamp. """ self.switchover_timestamp = switchover_timestamp self.backends = { "dummy": DummyTSDB(), "redis": RedisTSDB(**options.pop("redis", {})), "snuba": SnubaTSDB(**options.pop("snuba", {})), } super().__init__(**options)
def setUp(self): self.db = SnubaTSDB()
def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() self.now = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC ) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1env3 = self.create_environment(project=self.proj1, name='staging') self.proj1defaultenv = self.create_environment(project=self.proj1, name='') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) self.group1release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id ) self.group1release2 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id ) self.group2release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id ) data = json.dumps([{ 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], # Switch every 10 mins 'group_id': [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2], 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', # Switch every 2 hours 'environment': [self.proj1env1.name, None][(r // 7200) % 3], 'sentry:user': u'id:user{}'.format(r // 3300), 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'user': { # change every 55 min so some hours have 1 user, some have 2 'id': u"user{}".format(r // 3300), 'email': u"user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600)]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200 # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish self.proj1group1.first_seen = self.now self.proj1group1.last_seen = self.now + timedelta(seconds=14400) self.proj1group1.save() self.proj1group2.first_seen = self.now self.proj1group2.last_seen = self.now + timedelta(seconds=14400) self.proj1group2.save()
def setUp(self): assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200 self.db = SnubaTSDB() self.now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1defaultenv = self.create_environment(project=self.proj1, name='') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) data = json.dumps([ { 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], # Switch every 10 mins 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', # Switch every 2 hours 'environment': [self.proj1env1.name, None][(r // 7200) % 2], 'sentry:user': '******'.format(r // 3300), 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'sentry.interfaces.User': { # change every 55 min so some hours have 1 user, some have 2 'id': "user{}".format(r // 3300), 'email': "user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600) ]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200
class SnubaTSDBTest(TestCase): def setUp(self): self.db = SnubaTSDB() @responses.activate def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ now = parse_datetime('2018-03-09T01:00:00Z') project_id = 194503 dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) aggs = body.get('aggregations', []) meta = [{ 'name': col } for col in body['groupby'] + [a[2] for a in aggs]] datum = {col['name']: 1 for col in meta} if 'time' in datum: datum['time'] = '2018-03-09T01:00:00Z' for agg in aggs: if agg[0].startswith('topK'): datum[agg[2]] = [1] return (200, {}, json.dumps({'data': [datum], 'meta': meta})) rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response) results = self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { project_id: (0, 1, 2) # {project_id: (issue_id, issue_id, ...)} } results = self.db.get_frequency_series( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1) @responses.activate def test_groups(self): now = parse_datetime('2018-03-09T01:00:00Z') dts = [now + timedelta(hours=i) for i in range(4)] project = self.create_project() group = self.create_group(project=project) GroupHash.objects.create(project=project, group=group, hash='0' * 32) group2 = self.create_group(project=project) GroupHash.objects.create(project=project, group=group2, hash='1' * 32) with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['issue', 'time'] # Assert issue->hash map is generated, but only for referenced issues assert [group.id, ['0' * 32]] in body['issues'] assert [group2.id, ['1' * 32]] not in body['issues'] return (200, {}, json.dumps({ 'data': [{ 'time': '2018-03-09T01:00:00Z', 'issue': 1, 'aggregate': 100 }], 'meta': [{ 'name': 'time' }, { 'name': 'issue' }, { 'name': 'aggregate' }] })) rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1]) assert results is not None @responses.activate def test_releases(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() release = Release.objects.create( organization_id=self.organization.id, version='version X', date_added=now, ) release.add_project(project) dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['release', 'time'] assert ['release', 'IN', ['version X']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{ 'release': 'version X', 'time': '2018-03-09T01:00:00Z', 'aggregate': 100 }], 'meta': [{ 'name': 'release' }, { 'name': 'time' }, { 'name': 'aggregate' }] })) rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.release, [release.id], dts[0], dts[-1]) assert results == {release.id: [(to_timestamp(now), 100)]} @responses.activate def test_environment(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() env = self.create_environment(project=project, name="prod") dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['project_id', 'time'] assert ['environment', 'IN', ['prod']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{ 'project_id': project.id, 'time': '2018-03-09T01:00:00Z', 'aggregate': 100 }], 'meta': [{ 'name': 'project_id' }, { 'name': 'time' }, { 'name': 'aggregate' }] })) rsps.add_callback(responses.POST, snuba.SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.project, [project.id], dts[0], dts[-1], environment_id=env.id) assert results == {project.id: [(to_timestamp(now), 100)]} def test_invalid_model(self): with pytest.raises(Exception) as ex: self.db.get_range(TSDBModel.project_total_received_discarded, [], None, None) assert "Unsupported TSDBModel" in ex.value.message
def setUp(self): assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200 self.db = SnubaTSDB() self.now = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC ) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) data = json.dumps([{ 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', 'environment': self.proj1env1.name, 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'sentry.interfaces.User': { # change every 55 min so some hours have 1 user, some have 2 'id': "user{}".format(r // 3300), 'email': "user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600)]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200
class SnubaTSDBTest(TestCase): def setUp(self): assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200 self.db = SnubaTSDB() self.now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1defaultenv = self.create_environment(project=self.proj1, name='') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) data = json.dumps([ { 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], # Switch every 10 mins 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', # Switch every 2 hours 'environment': [self.proj1env1.name, None][(r // 7200) % 2], 'sentry:user': '******'.format(r // 3300), 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'sentry.interfaces.User': { # change every 55 min so some hours have 1 user, some have 2 'id': "user{}".format(r // 3300), 'email': "user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600) ]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200 def test_range_groups(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } # Multiple groups assert self.db.get_range(TSDBModel.group, [self.proj1group1.id, self.proj1group2.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], self.proj1group2.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } def test_range_releases(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600) == { self.release1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_project(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_environment_filter(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_id=self.proj1env1.id) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # No events submitted for env2 assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_id=self.proj1env2.id) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # Events submitted with no environment should match default environment assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_id=self.proj1defaultenv.id) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_rollups(self): # Daily daystart = self.now.replace( hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400) == { self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 10th minute to have a 1, else 0 expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60) == { self.proj1.id: expected } def test_distinct_counts_series_users(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 1), (timestamp(dts[2]), 1), (timestamp(dts[3]), 2), ], } dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 2), (timestamp(dts[2]), 2), (timestamp(dts[3]), 2), ], } def get_distinct_counts_totals_users(self): assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now + timedelta(hours=4), rollup=3600) == { self.proj1group1.id: 2, # 2 unique users overall } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now, rollup=3600) == { self.proj1group1.id: 1, # Only 1 unique user in the first hour } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600) == { self.proj1.id: 2, } def test_most_frequent(self): assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: [ (self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0), ], } def test_frequency_series(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, { self.proj1group1.id: ( self.release1.id, self.release2.id, ), self.proj1group2.id: (self.release1.id, ) }, dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), { self.release1.id: 0, self.release2.id: 0, }), (timestamp(dts[1]), { self.release1.id: 3, self.release2.id: 0, }), (timestamp(dts[2]), { self.release1.id: 0, self.release2.id: 3, }), (timestamp(dts[3]), { self.release1.id: 0, self.release2.id: 0, }), ], self.proj1group2.id: [ (timestamp(dts[0]), { self.release1.id: 0, }), (timestamp(dts[1]), { self.release1.id: 3, }), (timestamp(dts[2]), { self.release1.id: 0, }), (timestamp(dts[3]), { self.release1.id: 0, }), ], } def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ project_id = self.proj1.id dts = [self.now + timedelta(hours=i) for i in range(4)] results = self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { # {project_id: (issue_id, issue_id, ...)} project_id: (self.proj1group1.id, self.proj1group2.id) } results = self.db.get_frequency_series( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1)
def __init__(self, **options): self.dummy = DummyTSDB() self.redis = RedisTSDB(**options.pop('redis', {})) self.snuba = SnubaTSDB(**options.pop('snuba', {})) super(RedisSnubaTSDB, self).__init__(**options)
class SnubaTSDBTest(OutcomesSnubaTest): def setUp(self): super().setUp() self.db = SnubaTSDB() # Set up the times self.now = datetime.now(pytz.utc) self.start_time = self.now - timedelta(days=7) self.one_day_later = self.start_time + timedelta(days=1) self.day_before_start_time = self.start_time - timedelta(days=1) def test_organization_outcomes(self): other_organization = self.create_organization() for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]: self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4 ) # Also create some outcomes we shouldn't be querying self.store_outcomes( other_organization.id, self.project.id, outcome.value, self.one_day_later, 1, 5 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 1, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.organization_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.organization_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.organization_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.organization_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.organization_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.organization_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: # Query SnubaTSDB response = self.db.get_range( tsdb_model, [self.organization.id], self.start_time, self.now, granularity, None ) # Assert that the response has values set for the times we expect, and nothing more assert self.organization.id in response response_dict = {k: v for (k, v) in response[self.organization.id]} assert response_dict[floor_func(self.start_time)] == start_time_count assert response_dict[floor_func(self.one_day_later)] == day_later_count for time, count in response[self.organization.id]: if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]: assert count == 0 def test_project_outcomes(self): other_project = self.create_project(organization=self.organization) for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]: self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4 ) # Also create some outcomes we shouldn't be querying self.store_outcomes( self.organization.id, other_project.id, outcome.value, self.one_day_later, 1, 5 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 1, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.project_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.project_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.project_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.project_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.project_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.project_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: response = self.db.get_range( tsdb_model, [self.project.id], self.start_time, self.now, granularity, None ) # Assert that the response has values set for the times we expect, and nothing more assert self.project.id in response response_dict = {k: v for (k, v) in response[self.project.id]} assert response_dict[floor_func(self.start_time)] == start_time_count assert response_dict[floor_func(self.one_day_later)] == day_later_count for time, count in response[self.project.id]: if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]: assert count == 0 def test_key_outcomes(self): project_key = self.create_project_key(project=self.project) other_project = self.create_project(organization=self.organization) other_project_key = self.create_project_key(project=other_project) for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]: self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, project_key.id, 3, ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, project_key.id, 4, ) # Also create some outcomes we shouldn't be querying self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, other_project_key.id, 5, ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, project_key.id, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.key_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.key_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.key_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.key_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.key_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.key_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: response = self.db.get_range( # with [project_key.id, str(project_key.id), we are imitating the hack in # project_key_stats.py cause that is what `get_range` will be called with. tsdb_model, [project_key.id, str(project_key.id)], self.start_time, self.now, granularity, None, ) # Assert that the response has values set for the times we expect, and nothing more assert project_key.id in response response_dict = {k: v for (k, v) in response[project_key.id]} assert response_dict[floor_func(self.start_time)] == start_time_count assert response_dict[floor_func(self.one_day_later)] == day_later_count for time, count in response[project_key.id]: if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]: assert count == 0 def test_all_tsdb_models_have_an_entry_in_model_query_settings(self): # Ensure that the models we expect to be using Snuba are using Snuba exceptions = [ TSDBModel.project_total_forwarded # this is not outcomes and will be moved separately ] # does not include the internal TSDB model models = [ model for model in list(TSDBModel) if 0 < model.value < 700 and model not in exceptions ] for model in models: assert model in SnubaTSDB.model_query_settings def test_outcomes_have_a_10s_setting(self): exceptions = [ TSDBModel.project_total_forwarded # this is not outcomes and will be moved separately ] def is_an_outcome(model): if model in exceptions: return False # 100 - 200: project outcomes # 200 - 300: organization outcomes # 500 - 600: key outcomes # 600 - 700: filtered project based outcomes return ( (100 <= model.value < 200) or (200 <= model.value < 300) or (500 <= model.value < 600) or (600 <= model.value < 700) ) models = [x for x in list(TSDBModel) if is_an_outcome(x)] for model in models: assert model in SnubaTSDB.lower_rollup_query_settings
class SnubaTSDBTest(TestCase, SnubaTestCase): def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() self.now = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC ) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1env3 = self.create_environment(project=self.proj1, name='staging') self.proj1defaultenv = self.create_environment(project=self.proj1, name='') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) self.group1release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id ) self.group1release2 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id ) self.group2release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id ) data = json.dumps([{ 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], # Switch every 10 mins 'group_id': [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2], 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', # Switch every 2 hours 'environment': [self.proj1env1.name, None][(r // 7200) % 3], 'sentry:user': u'id:user{}'.format(r // 3300), 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'user': { # change every 55 min so some hours have 1 user, some have 2 'id': u"user{}".format(r // 3300), 'email': u"user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600)]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200 # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish self.proj1group1.first_seen = self.now self.proj1group1.last_seen = self.now + timedelta(seconds=14400) self.proj1group1.save() self.proj1group2.first_seen = self.now self.proj1group2.last_seen = self.now + timedelta(seconds=14400) self.proj1group2.save() def test_range_groups(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } # Multiple groups assert self.db.get_range( TSDBModel.group, [self.proj1group1.id, self.proj1group2.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], self.proj1group2.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } assert self.db.get_range( TSDBModel.group, [], dts[0], dts[-1], rollup=3600 ) == {} def test_range_releases(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600 ) == { self.release1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_project(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_environment_filter(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1env1.id] ) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # No events submitted for env2 assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1env2.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # Events submitted with no environment should match default environment assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1defaultenv.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_rollups(self): # Daily daystart = self.now.replace(hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400 ) == { self.proj1.id: [ (timestamp(dts[0]), 24), (timestamp(dts[1]), 0) ] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 10th minute to have a 1, else 0 expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60 ) == { self.proj1.id: expected } def test_distinct_counts_series_users(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 1), (timestamp(dts[2]), 1), (timestamp(dts[3]), 2), ], } dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 2), (timestamp(dts[2]), 2), (timestamp(dts[3]), 2), ], } assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600, ) == {} def get_distinct_counts_totals_users(self): assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now + timedelta(hours=4), rollup=3600 ) == { self.proj1group1.id: 2, # 2 unique users overall } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now, rollup=3600 ) == { self.proj1group1.id: 1, # Only 1 unique user in the first hour } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600 ) == { self.proj1.id: 2, } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [], self.now, self.now + timedelta(hours=4), rollup=3600 ) == {} def test_most_frequent(self): assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: [ (self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0), ], } assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [], self.now, self.now + timedelta(hours=4), rollup=3600, ) == {} def test_frequency_series(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, { self.proj1group1.id: (self.group1release1.id, self.group1release2.id, ), self.proj1group2.id: (self.group2release1.id, ) }, dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), { self.group1release1.id: 0, self.group1release2.id: 0, }), (timestamp(dts[1]), { self.group1release1.id: 3, self.group1release2.id: 0, }), (timestamp(dts[2]), { self.group1release1.id: 0, self.group1release2.id: 3, }), (timestamp(dts[3]), { self.group1release1.id: 0, self.group1release2.id: 0, }), ], self.proj1group2.id: [ (timestamp(dts[0]), { self.group2release1.id: 0, }), (timestamp(dts[1]), { self.group2release1.id: 3, }), (timestamp(dts[2]), { self.group2release1.id: 0, }), (timestamp(dts[3]), { self.group2release1.id: 0, }), ], } assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600, ) == {} def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ project_id = self.proj1.id dts = [self.now + timedelta(hours=i) for i in range(4)] results = self.db.get_most_frequent(TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series(TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { # {project_id: (issue_id, issue_id, ...)} project_id: (self.proj1group1.id, self.proj1group2.id) } results = self.db.get_frequency_series(TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals(TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1)
def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() self.now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name="test") self.proj1env2 = self.create_environment(project=self.proj1, name="dev") self.proj1env3 = self.create_environment(project=self.proj1, name="staging") self.proj1defaultenv = self.create_environment(project=self.proj1, name="") self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = "1" * 32 hash2 = "2" * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version="1" * 10, date_added=self.now) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version="2" * 10, date_added=self.now) self.release2.add_project(self.proj1) self.group1release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id) self.group1release2 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id) self.group2release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id) data = json.dumps([ ( 2, "insert", { "event_id": (six.text_type(r) * 32)[:32], "primary_hash": [hash1, hash2][(r // 600) % 2], # Switch every 10 mins "group_id": [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2], "project_id": self.proj1.id, "message": "message 1", "platform": "python", "datetime": (self.now + timedelta(seconds=r)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "data": { "type": "transaction" if r % 1200 == 0 else "error", "received": calendar.timegm(self.now.timetuple()) + r, "tags": { "foo": "bar", "baz": "quux", # Switch every 2 hours "environment": [self.proj1env1.name, None][(r // 7200) % 3], "sentry:user": u"id:user{}".format(r // 3300), "sentry:release": six.text_type(r // 3600) * 10, # 1 per hour }, "user": { # change every 55 min so some hours have 1 user, some have 2 "id": u"user{}".format(r // 3300), "email": u"user{}@sentry.io".format(r), }, }, }, ) for r in range(0, 14400, 600) ]) # Every 10 min for 4 hours assert (requests.post(settings.SENTRY_SNUBA + "/tests/events/insert", data=data).status_code == 200) # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish self.proj1group1.first_seen = self.now self.proj1group1.last_seen = self.now + timedelta(seconds=14400) self.proj1group1.save() self.proj1group2.first_seen = self.now self.proj1group2.last_seen = self.now + timedelta(seconds=14400) self.proj1group2.save()
class SnubaTSDBTest(TestCase, SnubaTestCase): def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() self.now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name="test") self.proj1env2 = self.create_environment(project=self.proj1, name="dev") self.proj1env3 = self.create_environment(project=self.proj1, name="staging") self.proj1defaultenv = self.create_environment(project=self.proj1, name="") self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = "1" * 32 hash2 = "2" * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version="1" * 10, date_added=self.now) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version="2" * 10, date_added=self.now) self.release2.add_project(self.proj1) self.group1release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id) self.group1release2 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id) self.group2release1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id) data = json.dumps([ ( 2, "insert", { "event_id": (six.text_type(r) * 32)[:32], "primary_hash": [hash1, hash2][(r // 600) % 2], # Switch every 10 mins "group_id": [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2], "project_id": self.proj1.id, "message": "message 1", "platform": "python", "datetime": (self.now + timedelta(seconds=r)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"), "data": { "type": "transaction" if r % 1200 == 0 else "error", "received": calendar.timegm(self.now.timetuple()) + r, "tags": { "foo": "bar", "baz": "quux", # Switch every 2 hours "environment": [self.proj1env1.name, None][(r // 7200) % 3], "sentry:user": u"id:user{}".format(r // 3300), "sentry:release": six.text_type(r // 3600) * 10, # 1 per hour }, "user": { # change every 55 min so some hours have 1 user, some have 2 "id": u"user{}".format(r // 3300), "email": u"user{}@sentry.io".format(r), }, }, }, ) for r in range(0, 14400, 600) ]) # Every 10 min for 4 hours assert (requests.post(settings.SENTRY_SNUBA + "/tests/events/insert", data=data).status_code == 200) # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish self.proj1group1.first_seen = self.now self.proj1group1.last_seen = self.now + timedelta(seconds=14400) self.proj1group1.save() self.proj1group2.first_seen = self.now self.proj1group2.last_seen = self.now + timedelta(seconds=14400) self.proj1group2.save() def test_range_groups(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ] } # Multiple groups assert self.db.get_range( TSDBModel.group, [self.proj1group1.id, self.proj1group2.id], dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], self.proj1group2.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {} def test_range_releases(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600) == { self.release1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_project(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ] } def test_range_environment_filter(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1env1.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # No events submitted for env2 assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1env2.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # Events submitted with no environment should match default environment assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.proj1defaultenv.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ] } def test_range_rollups(self): # Daily daystart = self.now.replace( hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400) == { self.proj1.id: [(timestamp(dts[0]), 12), (timestamp(dts[1]), 0)] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 20th minute to have a 1, else 0 expected = [(to_timestamp(d), 1 if i % 10 == 0 and i % 20 != 0 else 0) for i, d in enumerate(dts)] expected[0] = (expected[0][0], 0) assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60) == { self.proj1.id: expected } def test_distinct_counts_series_users(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 1), (timestamp(dts[2]), 1), (timestamp(dts[3]), 2), ] } dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 2), (timestamp(dts[2]), 2), (timestamp(dts[3]), 2), ] } assert (self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600) == {}) def get_distinct_counts_totals_users(self): assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1group1.id: 2 # 2 unique users overall } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now, rollup=3600, ) == { self.proj1group1.id: 1 # Only 1 unique user in the first hour } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: 2 } assert (self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [], self.now, self.now + timedelta(hours=4), rollup=3600, ) == {}) def test_most_frequent(self): assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)] } assert (self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [], self.now, self.now + timedelta(hours=4), rollup=3600, ) == {}) def test_frequency_series(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, { self.proj1group1.id: (self.group1release1.id, self.group1release2.id), self.proj1group2.id: (self.group2release1.id, ), }, dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), { self.group1release1.id: 0, self.group1release2.id: 0 }), (timestamp(dts[1]), { self.group1release1.id: 3, self.group1release2.id: 0 }), (timestamp(dts[2]), { self.group1release1.id: 0, self.group1release2.id: 3 }), (timestamp(dts[3]), { self.group1release1.id: 0, self.group1release2.id: 0 }), ], self.proj1group2.id: [ (timestamp(dts[0]), { self.group2release1.id: 0 }), (timestamp(dts[1]), { self.group2release1.id: 3 }), (timestamp(dts[2]), { self.group2release1.id: 0 }), (timestamp(dts[3]), { self.group2release1.id: 0 }), ], } assert (self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600) == {}) def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ project_id = self.proj1.id dts = [self.now + timedelta(hours=i) for i in range(4)] results = self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { # {project_id: (issue_id, issue_id, ...)} project_id: (self.proj1group1.id, self.proj1group2.id) } results = self.db.get_frequency_series( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1) def test_calculated_limit(self): with patch("sentry.tsdb.snuba.snuba") as snuba: # 24h test rollup = 3600 end = self.now start = end + timedelta(days=-1, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 120 # 14 day test rollup = 86400 start = end + timedelta(days=-14, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 70 # 1h test rollup = 3600 end = self.now start = end + timedelta(hours=-1, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 5
class SnubaTSDBTest(OutcomesSnubaTest): def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() # Set up the times self.now = datetime.now(pytz.utc) self.start_time = self.now - timedelta(days=7) self.one_day_later = self.start_time + timedelta(days=1) self.day_before_start_time = self.start_time - timedelta(days=1) def test_organization_outcomes(self): other_organization = self.create_organization() for outcome in [ Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED ]: self.store_outcomes(self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3) self.store_outcomes(self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4) # Also create some outcomes we shouldn't be querying self.store_outcomes(other_organization.id, self.project.id, outcome.value, self.one_day_later, 1, 5) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 1, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.organization_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.organization_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.organization_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.organization_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.organization_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.organization_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: # Query SnubaTSDB response = self.db.get_range(tsdb_model, [self.organization.id], self.start_time, self.now, granularity, None) # Assert that the response has values set for the times we expect, and nothing more assert self.organization.id in response.keys() response_dict = {k: v for (k, v) in response[self.organization.id]} assert response_dict[floor_func( self.start_time)] == start_time_count assert response_dict[floor_func( self.one_day_later)] == day_later_count for time, count in response[self.organization.id]: if time not in [ floor_func(self.start_time), floor_func(self.one_day_later) ]: assert count == 0 def test_project_outcomes(self): other_project = self.create_project(organization=self.organization) for outcome in [ Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED ]: self.store_outcomes(self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3) self.store_outcomes(self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4) # Also create some outcomes we shouldn't be querying self.store_outcomes(self.organization.id, other_project.id, outcome.value, self.one_day_later, 1, 5) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 1, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.project_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.project_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.project_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.project_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.project_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.project_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: response = self.db.get_range(tsdb_model, [self.project.id], self.start_time, self.now, granularity, None) # Assert that the response has values set for the times we expect, and nothing more assert self.project.id in response.keys() response_dict = {k: v for (k, v) in response[self.project.id]} assert response_dict[floor_func( self.start_time)] == start_time_count assert response_dict[floor_func( self.one_day_later)] == day_later_count for time, count in response[self.project.id]: if time not in [ floor_func(self.start_time), floor_func(self.one_day_later) ]: assert count == 0 def test_key_outcomes(self): project_key = self.create_project_key(project=self.project) other_project = self.create_project(organization=self.organization) other_project_key = self.create_project_key(project=other_project) for outcome in [ Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED ]: self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, project_key.id, 3, ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, project_key.id, 4, ) # Also create some outcomes we shouldn't be querying self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, other_project_key.id, 5, ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, project_key.id, 6, ) for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [ (TSDBModel.key_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3), (TSDBModel.key_total_rejected, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.key_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4), (TSDBModel.key_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3), (TSDBModel.key_total_rejected, 10, floor_to_10s_epoch, 3, 4), (TSDBModel.key_total_blacklisted, 10, floor_to_10s_epoch, 3, 4), ]: response = self.db.get_range( # with [project_key.id, six.text_type(project_key.id)], we are imitating the hack in # project_key_stats.py cause that is what `get_range` will be called with. tsdb_model, [project_key.id, six.text_type(project_key.id)], self.start_time, self.now, granularity, None, ) # Assert that the response has values set for the times we expect, and nothing more assert project_key.id in response.keys() response_dict = {k: v for (k, v) in response[project_key.id]} assert response_dict[floor_func( self.start_time)] == start_time_count assert response_dict[floor_func( self.one_day_later)] == day_later_count for time, count in response[project_key.id]: if time not in [ floor_func(self.start_time), floor_func(self.one_day_later) ]: assert count == 0
class SnubaTSDBTest(OutcomesSnubaTest): def setUp(self): super(SnubaTSDBTest, self).setUp() self.db = SnubaTSDB() # Set up the times self.now = datetime.now(pytz.utc) self.start_time = self.now - timedelta(days=7) self.one_day_later = self.start_time + timedelta(days=1) self.day_before_start_time = self.start_time - timedelta(days=1) def test_organization_outcomes(self): other_organization = self.create_organization() for tsdb_model, outcome in [ (TSDBModel.organization_total_received, Outcome.ACCEPTED), (TSDBModel.organization_total_rejected, Outcome.RATE_LIMITED), (TSDBModel.organization_total_blacklisted, Outcome.FILTERED), ]: # Create all the outcomes we will be querying self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, 3 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, 4 ) # Also create some outcomes we shouldn't be querying self.store_outcomes( other_organization.id, self.project.id, outcome.value, self.one_day_later, 5 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 6 ) # Query SnubaTSDB response = self.db.get_range( tsdb_model, [self.organization.id], self.start_time, self.now, 3600, None ) # Assert that the response has values set for the times we expect, and nothing more assert self.organization.id in response.keys() response_dict = {k: v for (k, v) in response[self.organization.id]} assert response_dict[floor_to_hour_epoch(self.start_time)] == 3 assert response_dict[floor_to_hour_epoch(self.one_day_later)] == 4 for time, count in response[self.organization.id]: if time not in [ floor_to_hour_epoch(self.start_time), floor_to_hour_epoch(self.one_day_later), ]: assert count == 0 def test_project_outcomes(self): other_project = self.create_project(organization=self.organization) for tsdb_model, outcome in [ (TSDBModel.project_total_received, Outcome.ACCEPTED), (TSDBModel.project_total_rejected, Outcome.RATE_LIMITED), (TSDBModel.project_total_blacklisted, Outcome.FILTERED), ]: # Create all the outcomes we will be querying self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.start_time, 3 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.one_day_later, 4 ) # Also create some outcomes we shouldn't be querying self.store_outcomes( self.organization.id, other_project.id, outcome.value, self.one_day_later, 5 ) self.store_outcomes( self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 6 ) # Query SnubaTSDB response = self.db.get_range( tsdb_model, [self.project.id], self.start_time, self.now, 3600, None ) # Assert that the response has values set for the times we expect, and nothing more assert self.project.id in response.keys() response_dict = {k: v for (k, v) in response[self.project.id]} assert response_dict[floor_to_hour_epoch(self.start_time)] == 3 assert response_dict[floor_to_hour_epoch(self.one_day_later)] == 4 for time, count in response[self.project.id]: if time not in [ floor_to_hour_epoch(self.start_time), floor_to_hour_epoch(self.one_day_later), ]: assert count == 0
class SnubaTSDBTest(TestCase, SnubaTestCase): def setUp(self): super().setUp() self.db = SnubaTSDB() self.now = (datetime.utcnow() - timedelta(hours=4)).replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() env1 = "test" env2 = "dev" defaultenv = "" release1 = "1" * 10 release2 = "2" * 10 self.release1 = Release.objects.create( organization_id=self.organization.id, version=release1, date_added=self.now) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version=release2, date_added=self.now) self.release2.add_project(self.proj1) for r in range(0, 14400, 600): # Every 10 min for 4 hours self.store_event( data={ "event_id": (str(r) * 32)[:32], "message": "message 1", "platform": "python", "fingerprint": [["group-1"], ["group-2"]][(r // 600) % 2], # Switch every 10 mins "timestamp": iso_format(self.now + timedelta(seconds=r)), "tags": { "foo": "bar", "baz": "quux", # Switch every 2 hours "environment": [env1, None][(r // 7200) % 3], "sentry:user": "******".format(r // 3300), }, "user": { # change every 55 min so some hours have 1 user, some have 2 "id": "user{}".format(r // 3300), "email": f"user{r}@sentry.io", }, "release": str(r // 3600) * 10, # 1 per hour, }, project_id=self.proj1.id, ) groups = Group.objects.filter(project=self.proj1).order_by("id") self.proj1group1 = groups[0] self.proj1group2 = groups[1] self.env1 = Environment.objects.get(name=env1) self.env2 = self.create_environment(name=env2) # No events self.defaultenv = Environment.objects.get(name=defaultenv) self.group1release1env1 = GroupRelease.objects.get( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id, environment=env1, ) self.group1release2env1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id, environment=env1, ) self.group2release1env1 = GroupRelease.objects.get( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id, environment=env1, ) def test_range_groups(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ] } # Multiple groups assert self.db.get_range( TSDBModel.group, [self.proj1group1.id, self.proj1group2.id], dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], self.proj1group2.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } assert self.db.get_range(TSDBModel.group, [], dts[0], dts[-1], rollup=3600) == {} def test_range_releases(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600) == { self.release1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_project(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_environment_filter(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.env1.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # No events submitted for env2 assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.env2.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } # Events submitted with no environment should match default environment assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_ids=[self.defaultenv.id], ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } def test_range_rollups(self): # Daily daystart = self.now.replace( hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400) == { self.proj1.id: [(timestamp(dts[0]), 24), (timestamp(dts[1]), 0)] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 10th minute to have a 1, else 0 expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0) for i, d in enumerate(dts)] assert self.db.get_range(TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60) == { self.proj1.id: expected } def test_distinct_counts_series_users(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600) == { self.proj1group1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 1), (timestamp(dts[2]), 1), (timestamp(dts[3]), 2), ] } dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600) == { self.proj1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 2), (timestamp(dts[2]), 2), (timestamp(dts[3]), 2), ] } assert (self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [], dts[0], dts[-1], rollup=3600) == {}) def get_distinct_counts_totals_users(self): assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1group1.id: 2 # 2 unique users overall } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now, rollup=3600, ) == { self.proj1group1.id: 1 # Only 1 unique user in the first hour } assert (self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: 2 }) assert (self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [], self.now, self.now + timedelta(hours=4), rollup=3600, ) == {}) def test_most_frequent(self): assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) in [ { self.proj1.id: [(self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0)] }, { self.proj1.id: [(self.proj1group2.id, 2.0), (self.proj1group1.id, 1.0)] }, ] # Both issues equally frequent assert (self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [], self.now, self.now + timedelta(hours=4), rollup=3600, ) == {}) def test_frequency_series(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, { self.proj1group1.id: (self.group1release1env1.id, self.group1release2env1.id), self.proj1group2.id: (self.group2release1env1.id, ), }, dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), { self.group1release1env1.id: 0, self.group1release2env1.id: 0 }), (timestamp(dts[1]), { self.group1release1env1.id: 3, self.group1release2env1.id: 0 }), (timestamp(dts[2]), { self.group1release1env1.id: 0, self.group1release2env1.id: 3 }), (timestamp(dts[3]), { self.group1release1env1.id: 0, self.group1release2env1.id: 0 }), ], self.proj1group2.id: [ (timestamp(dts[0]), { self.group2release1env1.id: 0 }), (timestamp(dts[1]), { self.group2release1env1.id: 3 }), (timestamp(dts[2]), { self.group2release1env1.id: 0 }), (timestamp(dts[3]), { self.group2release1env1.id: 0 }), ], } assert (self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, {}, dts[0], dts[-1], rollup=3600) == {}) def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ project_id = self.proj1.id dts = [self.now + timedelta(hours=i) for i in range(4)] results = self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series( TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { # {project_id: (issue_id, issue_id, ...)} project_id: (self.proj1group1.id, self.proj1group2.id) } results = self.db.get_frequency_series( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals( TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union( TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1) def test_calculated_limit(self): with patch("sentry.tsdb.snuba.snuba") as snuba: # 24h test rollup = 3600 end = self.now start = end + timedelta(days=-1, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 120 # 14 day test rollup = 86400 start = end + timedelta(days=-14, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 70 # 1h test rollup = 3600 end = self.now start = end + timedelta(hours=-1, seconds=rollup) self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5], start, end, rollup=rollup) assert snuba.query.call_args[1]["limit"] == 5
class SnubaTSDBRequestsTest(TestCase): """ Tests that the Snuba TSDB backend makes correctly formatted requests to the Snuba service, and formats the results correctly. Mocks the Snuba service request/response. """ def setUp(self): self.db = SnubaTSDB() @responses.activate def test_result_shape(self): """ Tests that the results from the different TSDB methods have the expected format. """ now = parse_datetime('2018-03-09T01:00:00Z') project_id = 194503 dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) aggs = body.get('aggregations', []) meta = [{'name': col} for col in body['groupby'] + [a[2] for a in aggs]] datum = {col['name']: 1 for col in meta} datum['project_id'] = project_id if 'time' in datum: datum['time'] = '2018-03-09T01:00:00Z' for agg in aggs: if agg[0].startswith('topK'): datum[agg[2]] = [99] return (200, {}, json.dumps({'data': [datum], 'meta': meta})) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_most_frequent(TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, 1.0)]}) results = self.db.get_most_frequent_series(TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0]) assert has_shape(results, {1: [(1, {1: 1.0})]}) items = { project_id: (0, 1, 2) # {project_id: (issue_id, issue_id, ...)} } results = self.db.get_frequency_series(TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: [(1, {1: 1})]}) results = self.db.get_frequency_totals(TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1]) assert has_shape(results, {1: {1: 1}}) results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_series(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: [(1, 1)]}) results = self.db.get_distinct_counts_totals(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, {1: 1}) results = self.db.get_distinct_counts_union(TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1]) assert has_shape(results, 1) @responses.activate def test_groups_request(self): now = parse_datetime('2018-03-09T01:00:00Z') dts = [now + timedelta(hours=i) for i in range(4)] project = self.create_project() group = self.create_group(project=project) GroupHash.objects.create(project=project, group=group, hash='0' * 32) group2 = self.create_group(project=project) GroupHash.objects.create(project=project, group=group2, hash='1' * 32) with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['issue', 'time'] # Assert issue->hash map is generated, but only for referenced issues assert [group.id, ['0' * 32]] in body['issues'] assert [group2.id, ['1' * 32]] not in body['issues'] return (200, {}, json.dumps({ 'data': [{'time': '2018-03-09T01:00:00Z', 'issue': 1, 'aggregate': 100}], 'meta': [{'name': 'time'}, {'name': 'issue'}, {'name': 'aggregate'}] })) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1]) assert results is not None @responses.activate def test_releases_request(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() release = Release.objects.create( organization_id=self.organization.id, version='version X', date_added=now, ) release.add_project(project) dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['release', 'time'] assert ['release', 'IN', ['version X']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{'release': 'version X', 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}], 'meta': [{'name': 'release'}, {'name': 'time'}, {'name': 'aggregate'}] })) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_range( TSDBModel.release, [release.id], dts[0], dts[-1], rollup=3600) assert results == { release.id: [ (int(to_timestamp(d)), 100 if d == now else 0) for d in dts] } @responses.activate def test_environment_request(self): now = parse_datetime('2018-03-09T01:00:00Z') project = self.create_project() env = self.create_environment(project=project, name="prod") dts = [now + timedelta(hours=i) for i in range(4)] with responses.RequestsMock() as rsps: def snuba_response(request): body = json.loads(request.body) assert body['aggregations'] == [['count()', None, 'aggregate']] assert body['project'] == [project.id] assert body['groupby'] == ['project_id', 'time'] assert ['environment', 'IN', ['prod']] in body['conditions'] return (200, {}, json.dumps({ 'data': [{'project_id': project.id, 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}], 'meta': [{'name': 'project_id'}, {'name': 'time'}, {'name': 'aggregate'}] })) rsps.add_callback( responses.POST, settings.SENTRY_SNUBA + '/query', callback=snuba_response) results = self.db.get_range(TSDBModel.project, [project.id], dts[0], dts[-1], environment_id=env.id, rollup=3600) assert results == { project.id: [ (int(to_timestamp(d)), 100 if d == now else 0) for d in dts] } def test_invalid_model(self): with pytest.raises(Exception) as ex: self.db.get_range(TSDBModel.project_total_received_discarded, [], None, None) assert "Unsupported TSDBModel" in ex.value.message
def setUp(self): super().setUp() self.db = SnubaTSDB() self.now = (datetime.utcnow() - timedelta(hours=4)).replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC) self.proj1 = self.create_project() env1 = "test" env2 = "dev" defaultenv = "" release1 = "1" * 10 release2 = "2" * 10 self.release1 = Release.objects.create( organization_id=self.organization.id, version=release1, date_added=self.now) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version=release2, date_added=self.now) self.release2.add_project(self.proj1) for r in range(0, 14400, 600): # Every 10 min for 4 hours self.store_event( data={ "event_id": (str(r) * 32)[:32], "message": "message 1", "platform": "python", "fingerprint": [["group-1"], ["group-2"]][(r // 600) % 2], # Switch every 10 mins "timestamp": iso_format(self.now + timedelta(seconds=r)), "tags": { "foo": "bar", "baz": "quux", # Switch every 2 hours "environment": [env1, None][(r // 7200) % 3], "sentry:user": "******".format(r // 3300), }, "user": { # change every 55 min so some hours have 1 user, some have 2 "id": "user{}".format(r // 3300), "email": f"user{r}@sentry.io", }, "release": str(r // 3600) * 10, # 1 per hour, }, project_id=self.proj1.id, ) groups = Group.objects.filter(project=self.proj1).order_by("id") self.proj1group1 = groups[0] self.proj1group2 = groups[1] self.env1 = Environment.objects.get(name=env1) self.env2 = self.create_environment(name=env2) # No events self.defaultenv = Environment.objects.get(name=defaultenv) self.group1release1env1 = GroupRelease.objects.get( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release1.id, environment=env1, ) self.group1release2env1 = GroupRelease.objects.create( project_id=self.proj1.id, group_id=self.proj1group1.id, release_id=self.release2.id, environment=env1, ) self.group2release1env1 = GroupRelease.objects.get( project_id=self.proj1.id, group_id=self.proj1group2.id, release_id=self.release1.id, environment=env1, )
from sentry.reprocessing2 import get_progress SUBSCRIPTION_REASON_MAP = { GroupSubscriptionReason.comment: "commented", GroupSubscriptionReason.assigned: "assigned", GroupSubscriptionReason.bookmark: "bookmarked", GroupSubscriptionReason.status_change: "changed_status", GroupSubscriptionReason.mentioned: "mentioned", } disabled = object() # TODO(jess): remove when snuba is primary backend snuba_tsdb = SnubaTSDB(**settings.SENTRY_TSDB_OPTIONS) logger = logging.getLogger(__name__) def merge_list_dictionaries(dict1, dict2): for key, val in six.iteritems(dict2): dict1.setdefault(key, []).extend(val) class GroupSerializerBase(Serializer): def __init__( self, collapse=None, expand=None,
class SnubaTSDBTest(TestCase): def setUp(self): assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200 self.db = SnubaTSDB() self.now = datetime.utcnow().replace( hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC ) self.proj1 = self.create_project() self.proj1env1 = self.create_environment(project=self.proj1, name='test') self.proj1env2 = self.create_environment(project=self.proj1, name='dev') self.proj1group1 = self.create_group(self.proj1) self.proj1group2 = self.create_group(self.proj1) hash1 = '1' * 32 hash2 = '2' * 32 GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1) GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2) self.release1 = Release.objects.create( organization_id=self.organization.id, version='1' * 10, date_added=self.now, ) self.release1.add_project(self.proj1) self.release2 = Release.objects.create( organization_id=self.organization.id, version='2' * 10, date_added=self.now, ) self.release2.add_project(self.proj1) data = json.dumps([{ 'event_id': (six.text_type(r) * 32)[:32], 'primary_hash': [hash1, hash2][(r // 600) % 2], 'project_id': self.proj1.id, 'message': 'message 1', 'platform': 'python', 'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'), 'data': { 'received': calendar.timegm(self.now.timetuple()) + r, 'tags': { 'foo': 'bar', 'baz': 'quux', 'environment': self.proj1env1.name, 'sentry:release': six.text_type(r // 3600) * 10, # 1 per hour }, 'sentry.interfaces.User': { # change every 55 min so some hours have 1 user, some have 2 'id': "user{}".format(r // 3300), 'email': "user{}@sentry.io".format(r) } }, } for r in range(0, 14400, 600)]) # Every 10 min for 4 hours assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200 def test_range_groups(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } # Multiple groups assert self.db.get_range( TSDBModel.group, [self.proj1group1.id, self.proj1group2.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], self.proj1group2.id: [ (timestamp(dts[0]), 3), (timestamp(dts[1]), 3), (timestamp(dts[2]), 3), (timestamp(dts[3]), 3), ], } def test_range_releases(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.release, [self.release1.id], dts[0], dts[-1], rollup=3600 ) == { self.release1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 6), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_project(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_id=self.proj1env1.id ) == { self.proj1.id: [ (timestamp(dts[0]), 6), (timestamp(dts[1]), 6), (timestamp(dts[2]), 6), (timestamp(dts[3]), 6), ] } # No events submitted for env2 assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=3600, environment_id=self.proj1env2.id ) == { self.proj1.id: [ (timestamp(dts[0]), 0), (timestamp(dts[1]), 0), (timestamp(dts[2]), 0), (timestamp(dts[3]), 0), ] } def test_range_rollups(self): # Daily daystart = self.now.replace(hour=0) # day buckets start on day boundaries dts = [daystart + timedelta(days=i) for i in range(2)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=86400 ) == { self.proj1.id: [ (timestamp(dts[0]), 24), (timestamp(dts[1]), 0) ] } # Minutely dts = [self.now + timedelta(minutes=i) for i in range(120)] # Expect every 10th minute to have a 1, else 0 expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)] assert self.db.get_range( TSDBModel.project, [self.proj1.id], dts[0], dts[-1], rollup=60 ) == { self.proj1.id: expected } def test_distinct_counts_series_users(self): dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_group, [self.proj1group1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1group1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 1), (timestamp(dts[2]), 1), (timestamp(dts[3]), 2), ], } dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_distinct_counts_series( TSDBModel.users_affected_by_project, [self.proj1.id], dts[0], dts[-1], rollup=3600 ) == { self.proj1.id: [ (timestamp(dts[0]), 1), (timestamp(dts[1]), 2), (timestamp(dts[2]), 2), (timestamp(dts[3]), 2), ], } def get_distinct_counts_totals_users(self): assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now + timedelta(hours=4), rollup=3600 ) == { self.proj1group1.id: 2, # 2 unique users overall } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_group, [self.proj1group1.id], self.now, self.now, rollup=3600 ) == { self.proj1group1.id: 1, # Only 1 unique user in the first hour } assert self.db.get_distinct_counts_totals( TSDBModel.users_affected_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600 ) == { self.proj1.id: 2, } def test_most_frequent(self): assert self.db.get_most_frequent( TSDBModel.frequent_issues_by_project, [self.proj1.id], self.now, self.now + timedelta(hours=4), rollup=3600, ) == { self.proj1.id: [ (self.proj1group1.id, 2.0), (self.proj1group2.id, 1.0), ], } def test_frequency_series(self): # Technically while we request both releases for group1 # and only release 1 on group2, that distinction is lost # in the snuba query, and we return a frequency series for # both releases * both groups dts = [self.now + timedelta(hours=i) for i in range(4)] assert self.db.get_frequency_series( TSDBModel.frequent_releases_by_group, { self.proj1group1.id: (self.release1.id, self.release2.id, ), self.proj1group2.id: (self.release1.id, ) }, dts[0], dts[-1], rollup=3600, ) == { self.proj1group1.id: [ (timestamp(dts[0]), { self.release1.id: 0, self.release2.id: 0, }), (timestamp(dts[1]), { self.release1.id: 3, self.release2.id: 0, }), (timestamp(dts[2]), { self.release1.id: 0, self.release2.id: 3, }), (timestamp(dts[3]), { self.release1.id: 0, self.release2.id: 0, }), ], self.proj1group2.id: [ (timestamp(dts[0]), { self.release1.id: 0, self.release2.id: 0, }), (timestamp(dts[1]), { self.release1.id: 3, self.release2.id: 0, }), (timestamp(dts[2]), { self.release1.id: 0, self.release2.id: 3, }), (timestamp(dts[3]), { self.release1.id: 0, self.release2.id: 0, }), ], }