コード例 #1
0
ファイル: test_snuba.py プロジェクト: zhangdinet/sentry
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()
        self.db = SnubaTSDB()

        # Set up the times
        self.now = datetime.now(pytz.utc)
        self.start_time = self.now - timedelta(days=7)
        self.one_day_later = self.start_time + timedelta(days=1)
        self.day_before_start_time = self.start_time - timedelta(days=1)
コード例 #2
0
 def __init__(self, **options):
     self.backends = {
         'dummy': DummyTSDB(),
         'redis': RedisTSDB(**options.pop('redis', {})),
         'snuba': SnubaTSDB(**options.pop('snuba', {})),
     }
     super(RedisSnubaTSDB, self).__init__(**options)
コード例 #3
0
ファイル: redissnuba.py プロジェクト: zhangdinet/sentry
 def __init__(self, **options):
     self.backends = {
         "dummy": DummyTSDB(),
         "redis": RedisTSDB(**options.pop("redis", {})),
         "snuba": SnubaTSDB(**options.pop("snuba", {})),
     }
     super(RedisSnubaTSDB, self).__init__(**options)
コード例 #4
0
    def __init__(self, switchover_timestamp=None, **options):
        """
        A TSDB backend that uses the Snuba outcomes and events datasets as far
        as possible instead of reading/writing to redis. Reading will trigger a
        Snuba query, while writing is a noop as Snuba reads from outcomes.

        Note: Using this backend requires you to start Snuba outcomes consumers
        (not to be confused with the outcomes consumers in Sentry itself).

        :param switchover_timestamp: When set, only start reading from snuba
            after this timestamp (as returned by `time.time()`). When this
            timestamp has not been reached yet, this backend just degrades to
            Redis for *all* keys.

            The default `None` will start reading from Snuba immediately and is
            equivalent to setting a past timestamp.
        """
        self.switchover_timestamp = switchover_timestamp
        self.backends = {
            "dummy": DummyTSDB(),
            "redis": RedisTSDB(**options.pop("redis", {})),
            "snuba": SnubaTSDB(**options.pop("snuba", {})),
        }
        super().__init__(**options)
コード例 #5
0
 def setUp(self):
     self.db = SnubaTSDB()
コード例 #6
0
ファイル: test_tsdb_backend.py プロジェクト: yaoqi/sentry
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(
            hour=0,
            minute=0,
            second=0,
            microsecond=0,
            tzinfo=pytz.UTC
        )

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1, name='test')
        self.proj1env2 = self.create_environment(project=self.proj1, name='dev')
        self.proj1env3 = self.create_environment(project=self.proj1, name='staging')
        self.proj1defaultenv = self.create_environment(project=self.proj1, name='')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
        GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        self.group1release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id
        )
        self.group1release2 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id
        )
        self.group2release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id
        )

        data = json.dumps([{
            'event_id': (six.text_type(r) * 32)[:32],
            'primary_hash': [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
            'group_id': [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2],
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            'data': {
                'received': calendar.timegm(self.now.timetuple()) + r,
                'tags': {
                    'foo': 'bar',
                    'baz': 'quux',
                    # Switch every 2 hours
                    'environment': [self.proj1env1.name, None][(r // 7200) % 3],
                    'sentry:user': u'id:user{}'.format(r // 3300),
                    'sentry:release': six.text_type(r // 3600) * 10,  # 1 per hour
                },
                'user': {
                    # change every 55 min so some hours have 1 user, some have 2
                    'id': u"user{}".format(r // 3300),
                    'email': u"user{}@sentry.io".format(r)
                }
            },
        } for r in range(0, 14400, 600)])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200

        # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish
        self.proj1group1.first_seen = self.now
        self.proj1group1.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group1.save()
        self.proj1group2.first_seen = self.now
        self.proj1group2.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group2.save()
コード例 #7
0
ファイル: test_tsdb_backend.py プロジェクト: yoyopie/sentry
    def setUp(self):
        assert requests.post(settings.SENTRY_SNUBA +
                             '/tests/drop').status_code == 200

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0,
                                             tzinfo=pytz.UTC)

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name='test')
        self.proj1env2 = self.create_environment(project=self.proj1,
                                                 name='dev')
        self.proj1defaultenv = self.create_environment(project=self.proj1,
                                                       name='')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group1,
                                 hash=hash1)
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group2,
                                 hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        data = json.dumps([
            {
                'event_id': (six.text_type(r) * 32)[:32],
                'primary_hash':
                [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
                'project_id':
                self.proj1.id,
                'message':
                'message 1',
                'platform':
                'python',
                'datetime':
                (self.now +
                 timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
                'data': {
                    'received': calendar.timegm(self.now.timetuple()) + r,
                    'tags': {
                        'foo': 'bar',
                        'baz': 'quux',
                        # Switch every 2 hours
                        'environment': [self.proj1env1.name,
                                        None][(r // 7200) % 2],
                        'sentry:user': '******'.format(r // 3300),
                        'sentry:release':
                        six.text_type(r // 3600) * 10,  # 1 per hour
                    },
                    'sentry.interfaces.User': {
                        # change every 55 min so some hours have 1 user, some have 2
                        'id': "user{}".format(r // 3300),
                        'email': "user{}@sentry.io".format(r)
                    }
                },
            } for r in range(0, 14400, 600)
        ])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert',
                             data=data).status_code == 200
コード例 #8
0
class SnubaTSDBTest(TestCase):
    def setUp(self):
        self.db = SnubaTSDB()

    @responses.activate
    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        now = parse_datetime('2018-03-09T01:00:00Z')
        project_id = 194503
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:

            def snuba_response(request):
                body = json.loads(request.body)
                aggs = body.get('aggregations', [])
                meta = [{
                    'name': col
                } for col in body['groupby'] + [a[2] for a in aggs]]
                datum = {col['name']: 1 for col in meta}
                if 'time' in datum:
                    datum['time'] = '2018-03-09T01:00:00Z'
                for agg in aggs:
                    if agg[0].startswith('topK'):
                        datum[agg[2]] = [1]
                return (200, {}, json.dumps({'data': [datum], 'meta': meta}))

            rsps.add_callback(responses.POST,
                              snuba.SNUBA + '/query',
                              callback=snuba_response)

            results = self.db.get_most_frequent(
                TSDBModel.frequent_issues_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: [(1, 1.0)]})

            results = self.db.get_most_frequent_series(
                TSDBModel.frequent_issues_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: [(1, {1: 1.0})]})

            items = {
                project_id:
                (0, 1, 2)  # {project_id: (issue_id, issue_id, ...)}
            }
            results = self.db.get_frequency_series(
                TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
            assert has_shape(results, {1: [(1, {1: 1})]})

            results = self.db.get_frequency_totals(
                TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
            assert has_shape(results, {1: {1: 1}})

            results = self.db.get_range(TSDBModel.project, [project_id],
                                        dts[0], dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_series(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_totals(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, {1: 1})

            results = self.db.get_distinct_counts_union(
                TSDBModel.users_affected_by_project, [project_id], dts[0],
                dts[-1])
            assert has_shape(results, 1)

    @responses.activate
    def test_groups(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        dts = [now + timedelta(hours=i) for i in range(4)]
        project = self.create_project()
        group = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group, hash='0' * 32)
        group2 = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group2, hash='1' * 32)

        with responses.RequestsMock() as rsps:

            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['issue', 'time']

                # Assert issue->hash map is generated, but only for referenced issues
                assert [group.id, ['0' * 32]] in body['issues']
                assert [group2.id, ['1' * 32]] not in body['issues']

                return (200, {},
                        json.dumps({
                            'data': [{
                                'time': '2018-03-09T01:00:00Z',
                                'issue': 1,
                                'aggregate': 100
                            }],
                            'meta': [{
                                'name': 'time'
                            }, {
                                'name': 'issue'
                            }, {
                                'name': 'aggregate'
                            }]
                        }))

            rsps.add_callback(responses.POST,
                              snuba.SNUBA + '/query',
                              callback=snuba_response)
            results = self.db.get_range(TSDBModel.group, [group.id], dts[0],
                                        dts[-1])
            assert results is not None

    @responses.activate
    def test_releases(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        release = Release.objects.create(
            organization_id=self.organization.id,
            version='version X',
            date_added=now,
        )
        release.add_project(project)
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:

            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['release', 'time']
                assert ['release', 'IN', ['version X']] in body['conditions']
                return (200, {},
                        json.dumps({
                            'data': [{
                                'release': 'version X',
                                'time': '2018-03-09T01:00:00Z',
                                'aggregate': 100
                            }],
                            'meta': [{
                                'name': 'release'
                            }, {
                                'name': 'time'
                            }, {
                                'name': 'aggregate'
                            }]
                        }))

            rsps.add_callback(responses.POST,
                              snuba.SNUBA + '/query',
                              callback=snuba_response)
            results = self.db.get_range(TSDBModel.release, [release.id],
                                        dts[0], dts[-1])
            assert results == {release.id: [(to_timestamp(now), 100)]}

    @responses.activate
    def test_environment(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        env = self.create_environment(project=project, name="prod")
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:

            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['project_id', 'time']
                assert ['environment', 'IN', ['prod']] in body['conditions']
                return (200, {},
                        json.dumps({
                            'data': [{
                                'project_id': project.id,
                                'time': '2018-03-09T01:00:00Z',
                                'aggregate': 100
                            }],
                            'meta': [{
                                'name': 'project_id'
                            }, {
                                'name': 'time'
                            }, {
                                'name': 'aggregate'
                            }]
                        }))

            rsps.add_callback(responses.POST,
                              snuba.SNUBA + '/query',
                              callback=snuba_response)
            results = self.db.get_range(TSDBModel.project, [project.id],
                                        dts[0],
                                        dts[-1],
                                        environment_id=env.id)
            assert results == {project.id: [(to_timestamp(now), 100)]}

    def test_invalid_model(self):
        with pytest.raises(Exception) as ex:
            self.db.get_range(TSDBModel.project_total_received_discarded, [],
                              None, None)
        assert "Unsupported TSDBModel" in ex.value.message
コード例 #9
0
ファイル: test_tsdb_backend.py プロジェクト: hosmelq/sentry
    def setUp(self):
        assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(
            hour=0,
            minute=0,
            second=0,
            microsecond=0,
            tzinfo=pytz.UTC
        )

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1, name='test')
        self.proj1env2 = self.create_environment(project=self.proj1, name='dev')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
        GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        data = json.dumps([{
            'event_id': (six.text_type(r) * 32)[:32],
            'primary_hash': [hash1, hash2][(r // 600) % 2],
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            'data': {
                'received': calendar.timegm(self.now.timetuple()) + r,
                'tags': {
                    'foo': 'bar',
                    'baz': 'quux',
                    'environment': self.proj1env1.name,
                    'sentry:release': six.text_type(r // 3600) * 10,  # 1 per hour
                },
                'sentry.interfaces.User': {
                    # change every 55 min so some hours have 1 user, some have 2
                    'id': "user{}".format(r // 3300),
                    'email': "user{}@sentry.io".format(r)
                }
            },
        } for r in range(0, 14400, 600)])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200
コード例 #10
0
ファイル: test_tsdb_backend.py プロジェクト: yoyopie/sentry
class SnubaTSDBTest(TestCase):
    def setUp(self):
        assert requests.post(settings.SENTRY_SNUBA +
                             '/tests/drop').status_code == 200

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0,
                                             tzinfo=pytz.UTC)

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name='test')
        self.proj1env2 = self.create_environment(project=self.proj1,
                                                 name='dev')
        self.proj1defaultenv = self.create_environment(project=self.proj1,
                                                       name='')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group1,
                                 hash=hash1)
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group2,
                                 hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        data = json.dumps([
            {
                'event_id': (six.text_type(r) * 32)[:32],
                'primary_hash':
                [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
                'project_id':
                self.proj1.id,
                'message':
                'message 1',
                'platform':
                'python',
                'datetime':
                (self.now +
                 timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
                'data': {
                    'received': calendar.timegm(self.now.timetuple()) + r,
                    'tags': {
                        'foo': 'bar',
                        'baz': 'quux',
                        # Switch every 2 hours
                        'environment': [self.proj1env1.name,
                                        None][(r // 7200) % 2],
                        'sentry:user': '******'.format(r // 3300),
                        'sentry:release':
                        six.text_type(r // 3600) * 10,  # 1 per hour
                    },
                    'sentry.interfaces.User': {
                        # change every 55 min so some hours have 1 user, some have 2
                        'id': "user{}".format(r // 3300),
                        'email': "user{}@sentry.io".format(r)
                    }
                },
            } for r in range(0, 14400, 600)
        ])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert',
                             data=data).status_code == 200

    def test_range_groups(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.group, [self.proj1group1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1group1.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ],
                                 }

        # Multiple groups
        assert self.db.get_range(TSDBModel.group,
                                 [self.proj1group1.id, self.proj1group2.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1group1.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ],
                                     self.proj1group2.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ],
                                 }

    def test_range_releases(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.release, [self.release1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.release1.id: [
                                         (timestamp(dts[0]), 0),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 0),
                                         (timestamp(dts[3]), 0),
                                     ]
                                 }

    def test_range_project(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 6),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 6),
                                         (timestamp(dts[3]), 6),
                                     ]
                                 }

    def test_range_environment_filter(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600,
                                 environment_id=self.proj1env1.id) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 6),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 0),
                                         (timestamp(dts[3]), 0),
                                     ]
                                 }

        # No events submitted for env2
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600,
                                 environment_id=self.proj1env2.id) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 0),
                                         (timestamp(dts[1]), 0),
                                         (timestamp(dts[2]), 0),
                                         (timestamp(dts[3]), 0),
                                     ]
                                 }

        # Events submitted with no environment should match default environment
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600,
                                 environment_id=self.proj1defaultenv.id) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 0),
                                         (timestamp(dts[1]), 0),
                                         (timestamp(dts[2]), 6),
                                         (timestamp(dts[3]), 6),
                                     ]
                                 }

    def test_range_rollups(self):
        # Daily
        daystart = self.now.replace(
            hour=0)  # day buckets start on day boundaries
        dts = [daystart + timedelta(days=i) for i in range(2)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=86400) == {
                                     self.proj1.id: [(timestamp(dts[0]), 24),
                                                     (timestamp(dts[1]), 0)]
                                 }

        # Minutely
        dts = [self.now + timedelta(minutes=i) for i in range(120)]
        # Expect every 10th minute to have a 1, else 0
        expected = [(to_timestamp(d), int(i % 10 == 0))
                    for i, d in enumerate(dts)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=60) == {
                                     self.proj1.id: expected
                                 }

    def test_distinct_counts_series_users(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group, [self.proj1group1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1group1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 2),
                ],
            }

        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 2),
                    (timestamp(dts[2]), 2),
                    (timestamp(dts[3]), 2),
                ],
            }

    def get_distinct_counts_totals_users(self):
        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group, [self.proj1group1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600) == {
                self.proj1group1.id: 2,  # 2 unique users overall
            }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group, [self.proj1group1.id],
            self.now,
            self.now,
            rollup=3600) == {
                self.proj1group1.id: 1,  # Only 1 unique user in the first hour
            }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project, [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600) == {
                self.proj1.id: 2,
            }

    def test_most_frequent(self):
        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: [
                (self.proj1group1.id, 2.0),
                (self.proj1group2.id, 1.0),
            ],
        }

    def test_frequency_series(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {
                self.proj1group1.id: (
                    self.release1.id,
                    self.release2.id,
                ),
                self.proj1group2.id: (self.release1.id, )
            },
            dts[0],
            dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.release1.id: 3,
                    self.release2.id: 0,
                }),
                (timestamp(dts[2]), {
                    self.release1.id: 0,
                    self.release2.id: 3,
                }),
                (timestamp(dts[3]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), {
                    self.release1.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.release1.id: 3,
                }),
                (timestamp(dts[2]), {
                    self.release1.id: 0,
                }),
                (timestamp(dts[3]), {
                    self.release1.id: 0,
                }),
            ],
        }

    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        project_id = self.proj1.id
        dts = [self.now + timedelta(hours=i) for i in range(4)]

        results = self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, 1.0)]})

        results = self.db.get_most_frequent_series(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, {1: 1.0})]})

        items = {
            # {project_id: (issue_id, issue_id, ...)}
            project_id: (self.proj1group1.id, self.proj1group2.id)
        }
        results = self.db.get_frequency_series(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: [(1, {1: 1})]})

        results = self.db.get_frequency_totals(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: {1: 1}})

        results = self.db.get_range(TSDBModel.project, [project_id], dts[0],
                                    dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: 1})

        results = self.db.get_distinct_counts_union(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, 1)
コード例 #11
0
 def __init__(self, **options):
     self.dummy = DummyTSDB()
     self.redis = RedisTSDB(**options.pop('redis', {}))
     self.snuba = SnubaTSDB(**options.pop('snuba', {}))
     super(RedisSnubaTSDB, self).__init__(**options)
コード例 #12
0
ファイル: test_snuba.py プロジェクト: hosmelq/sentry
 def setUp(self):
     self.db = SnubaTSDB()
コード例 #13
0
ファイル: test_snuba.py プロジェクト: veekram/sentry
class SnubaTSDBTest(OutcomesSnubaTest):
    def setUp(self):
        super().setUp()
        self.db = SnubaTSDB()

        # Set up the times
        self.now = datetime.now(pytz.utc)
        self.start_time = self.now - timedelta(days=7)
        self.one_day_later = self.start_time + timedelta(days=1)
        self.day_before_start_time = self.start_time - timedelta(days=1)

    def test_organization_outcomes(self):
        other_organization = self.create_organization()

        for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]:
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                other_organization.id, self.project.id, outcome.value, self.one_day_later, 1, 5
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                1,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.organization_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3),
            (TSDBModel.organization_total_rejected, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.organization_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.organization_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3),
            (TSDBModel.organization_total_rejected, 10, floor_to_10s_epoch, 3, 4),
            (TSDBModel.organization_total_blacklisted, 10, floor_to_10s_epoch, 3, 4),
        ]:
            # Query SnubaTSDB
            response = self.db.get_range(
                tsdb_model, [self.organization.id], self.start_time, self.now, granularity, None
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.organization.id in response
            response_dict = {k: v for (k, v) in response[self.organization.id]}

            assert response_dict[floor_func(self.start_time)] == start_time_count
            assert response_dict[floor_func(self.one_day_later)] == day_later_count

            for time, count in response[self.organization.id]:
                if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]:
                    assert count == 0

    def test_project_outcomes(self):
        other_project = self.create_project(organization=self.organization)

        for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]:
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.start_time, 1, 3
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.one_day_later, 1, 4
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                self.organization.id, other_project.id, outcome.value, self.one_day_later, 1, 5
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                1,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.project_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3),
            (TSDBModel.project_total_rejected, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.project_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.project_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3),
            (TSDBModel.project_total_rejected, 10, floor_to_10s_epoch, 3, 4),
            (TSDBModel.project_total_blacklisted, 10, floor_to_10s_epoch, 3, 4),
        ]:
            response = self.db.get_range(
                tsdb_model, [self.project.id], self.start_time, self.now, granularity, None
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.project.id in response
            response_dict = {k: v for (k, v) in response[self.project.id]}

            assert response_dict[floor_func(self.start_time)] == start_time_count
            assert response_dict[floor_func(self.one_day_later)] == day_later_count

            for time, count in response[self.project.id]:
                if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]:
                    assert count == 0

    def test_key_outcomes(self):
        project_key = self.create_project_key(project=self.project)
        other_project = self.create_project(organization=self.organization)
        other_project_key = self.create_project_key(project=other_project)

        for outcome in [Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED]:
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.start_time,
                project_key.id,
                3,
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.one_day_later,
                project_key.id,
                4,
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.one_day_later,
                other_project_key.id,
                5,
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                project_key.id,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.key_total_received, 3600, floor_to_hour_epoch, 3 * 3, 4 * 3),
            (TSDBModel.key_total_rejected, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.key_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.key_total_received, 10, floor_to_10s_epoch, 3 * 3, 4 * 3),
            (TSDBModel.key_total_rejected, 10, floor_to_10s_epoch, 3, 4),
            (TSDBModel.key_total_blacklisted, 10, floor_to_10s_epoch, 3, 4),
        ]:
            response = self.db.get_range(
                # with [project_key.id, str(project_key.id), we are imitating the hack in
                # project_key_stats.py cause that is what `get_range` will be called with.
                tsdb_model,
                [project_key.id, str(project_key.id)],
                self.start_time,
                self.now,
                granularity,
                None,
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert project_key.id in response
            response_dict = {k: v for (k, v) in response[project_key.id]}

            assert response_dict[floor_func(self.start_time)] == start_time_count
            assert response_dict[floor_func(self.one_day_later)] == day_later_count

            for time, count in response[project_key.id]:
                if time not in [floor_func(self.start_time), floor_func(self.one_day_later)]:
                    assert count == 0

    def test_all_tsdb_models_have_an_entry_in_model_query_settings(self):
        # Ensure that the models we expect to be using Snuba are using Snuba
        exceptions = [
            TSDBModel.project_total_forwarded  # this is not outcomes and will be moved separately
        ]

        # does not include the internal TSDB model
        models = [
            model for model in list(TSDBModel) if 0 < model.value < 700 and model not in exceptions
        ]
        for model in models:
            assert model in SnubaTSDB.model_query_settings

    def test_outcomes_have_a_10s_setting(self):
        exceptions = [
            TSDBModel.project_total_forwarded  # this is not outcomes and will be moved separately
        ]

        def is_an_outcome(model):
            if model in exceptions:
                return False

            # 100 - 200: project outcomes
            # 200 - 300: organization outcomes
            # 500 - 600: key outcomes
            # 600 - 700: filtered project based outcomes
            return (
                (100 <= model.value < 200)
                or (200 <= model.value < 300)
                or (500 <= model.value < 600)
                or (600 <= model.value < 700)
            )

        models = [x for x in list(TSDBModel) if is_an_outcome(x)]
        for model in models:
            assert model in SnubaTSDB.lower_rollup_query_settings
コード例 #14
0
ファイル: test_tsdb_backend.py プロジェクト: yaoqi/sentry
class SnubaTSDBTest(TestCase, SnubaTestCase):
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(
            hour=0,
            minute=0,
            second=0,
            microsecond=0,
            tzinfo=pytz.UTC
        )

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1, name='test')
        self.proj1env2 = self.create_environment(project=self.proj1, name='dev')
        self.proj1env3 = self.create_environment(project=self.proj1, name='staging')
        self.proj1defaultenv = self.create_environment(project=self.proj1, name='')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
        GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        self.group1release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id
        )
        self.group1release2 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id
        )
        self.group2release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id
        )

        data = json.dumps([{
            'event_id': (six.text_type(r) * 32)[:32],
            'primary_hash': [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
            'group_id': [self.proj1group1.id, self.proj1group2.id][(r // 600) % 2],
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            'data': {
                'received': calendar.timegm(self.now.timetuple()) + r,
                'tags': {
                    'foo': 'bar',
                    'baz': 'quux',
                    # Switch every 2 hours
                    'environment': [self.proj1env1.name, None][(r // 7200) % 3],
                    'sentry:user': u'id:user{}'.format(r // 3300),
                    'sentry:release': six.text_type(r // 3600) * 10,  # 1 per hour
                },
                'user': {
                    # change every 55 min so some hours have 1 user, some have 2
                    'id': u"user{}".format(r // 3300),
                    'email': u"user{}@sentry.io".format(r)
                }
            },
        } for r in range(0, 14400, 600)])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200

        # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish
        self.proj1group1.first_seen = self.now
        self.proj1group1.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group1.save()
        self.proj1group2.first_seen = self.now
        self.proj1group2.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group2.save()

    def test_range_groups(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

        # Multiple groups
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id, self.proj1group2.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

        assert self.db.get_range(
            TSDBModel.group,
            [],
            dts[0], dts[-1],
            rollup=3600
        ) == {}

    def test_range_releases(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.release,
            [self.release1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.release1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

    def test_range_project(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 6),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 6),
                (timestamp(dts[3]), 6),
            ]
        }

    def test_range_environment_filter(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600,
            environment_ids=[self.proj1env1.id]
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 6),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # No events submitted for env2
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600,
            environment_ids=[self.proj1env2.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # Events submitted with no environment should match default environment
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600,
            environment_ids=[self.proj1defaultenv.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 6),
                (timestamp(dts[3]), 6),
            ]
        }

    def test_range_rollups(self):
        # Daily
        daystart = self.now.replace(hour=0)  # day buckets start on day boundaries
        dts = [daystart + timedelta(days=i) for i in range(2)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=86400
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 24),
                (timestamp(dts[1]), 0)
            ]
        }

        # Minutely
        dts = [self.now + timedelta(minutes=i) for i in range(120)]
        # Expect every 10th minute to have a 1, else 0
        expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=60
        ) == {
            self.proj1.id: expected
        }

    def test_distinct_counts_series_users(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 2),
            ],
        }

        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 2),
                (timestamp(dts[2]), 2),
                (timestamp(dts[3]), 2),
            ],
        }

        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group,
            [],
            dts[0], dts[-1],
            rollup=3600,
        ) == {}

    def get_distinct_counts_totals_users(self):
        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600
        ) == {
            self.proj1group1.id: 2,  # 2 unique users overall
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now,
            rollup=3600
        ) == {
            self.proj1group1.id: 1,  # Only 1 unique user in the first hour
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600
        ) == {
            self.proj1.id: 2,
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600
        ) == {}

    def test_most_frequent(self):
        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: [
                (self.proj1group1.id, 2.0),
                (self.proj1group2.id, 1.0),
            ],
        }

        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {}

    def test_frequency_series(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {
                self.proj1group1.id: (self.group1release1.id, self.group1release2.id, ),
                self.proj1group2.id: (self.group2release1.id, )
            },
            dts[0], dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.group1release1.id: 3,
                    self.group1release2.id: 0,
                }),
                (timestamp(dts[2]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 3,
                }),
                (timestamp(dts[3]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 0,
                }),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), {
                    self.group2release1.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.group2release1.id: 3,
                }),
                (timestamp(dts[2]), {
                    self.group2release1.id: 0,
                }),
                (timestamp(dts[3]), {
                    self.group2release1.id: 0,
                }),
            ],
        }

        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {},
            dts[0], dts[-1],
            rollup=3600,
        ) == {}

    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        project_id = self.proj1.id
        dts = [self.now + timedelta(hours=i) for i in range(4)]

        results = self.db.get_most_frequent(TSDBModel.frequent_issues_by_project,
                                            [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, 1.0)]})

        results = self.db.get_most_frequent_series(TSDBModel.frequent_issues_by_project,
                                                   [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, {1: 1.0})]})

        items = {
            # {project_id: (issue_id, issue_id, ...)}
            project_id: (self.proj1group1.id, self.proj1group2.id)
        }
        results = self.db.get_frequency_series(TSDBModel.frequent_issues_by_project,
                                               items, dts[0], dts[-1])
        assert has_shape(results, {1: [(1, {1: 1})]})

        results = self.db.get_frequency_totals(TSDBModel.frequent_issues_by_project,
                                               items, dts[0], dts[-1])
        assert has_shape(results, {1: {1: 1}})

        results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_series(TSDBModel.users_affected_by_project,
                                                     [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_totals(TSDBModel.users_affected_by_project,
                                                     [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: 1})

        results = self.db.get_distinct_counts_union(TSDBModel.users_affected_by_project,
                                                    [project_id], dts[0], dts[-1])
        assert has_shape(results, 1)
コード例 #15
0
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0,
                                             tzinfo=pytz.UTC)

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name="test")
        self.proj1env2 = self.create_environment(project=self.proj1,
                                                 name="dev")
        self.proj1env3 = self.create_environment(project=self.proj1,
                                                 name="staging")
        self.proj1defaultenv = self.create_environment(project=self.proj1,
                                                       name="")

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = "1" * 32
        hash2 = "2" * 32
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group1,
                                 hash=hash1)
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group2,
                                 hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version="1" * 10,
            date_added=self.now)
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version="2" * 10,
            date_added=self.now)
        self.release2.add_project(self.proj1)

        self.group1release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id)
        self.group1release2 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id)
        self.group2release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id)

        data = json.dumps([
            (
                2,
                "insert",
                {
                    "event_id": (six.text_type(r) * 32)[:32],
                    "primary_hash":
                    [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
                    "group_id": [self.proj1group1.id,
                                 self.proj1group2.id][(r // 600) % 2],
                    "project_id":
                    self.proj1.id,
                    "message":
                    "message 1",
                    "platform":
                    "python",
                    "datetime":
                    (self.now +
                     timedelta(seconds=r)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "data": {
                        "type": "transaction" if r % 1200 == 0 else "error",
                        "received": calendar.timegm(self.now.timetuple()) + r,
                        "tags": {
                            "foo":
                            "bar",
                            "baz":
                            "quux",
                            # Switch every 2 hours
                            "environment": [self.proj1env1.name,
                                            None][(r // 7200) % 3],
                            "sentry:user":
                            u"id:user{}".format(r // 3300),
                            "sentry:release":
                            six.text_type(r // 3600) * 10,  # 1 per hour
                        },
                        "user": {
                            # change every 55 min so some hours have 1 user, some have 2
                            "id": u"user{}".format(r // 3300),
                            "email": u"user{}@sentry.io".format(r),
                        },
                    },
                },
            ) for r in range(0, 14400, 600)
        ])  # Every 10 min for 4 hours

        assert (requests.post(settings.SENTRY_SNUBA + "/tests/events/insert",
                              data=data).status_code == 200)

        # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish
        self.proj1group1.first_seen = self.now
        self.proj1group1.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group1.save()
        self.proj1group2.first_seen = self.now
        self.proj1group2.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group2.save()
コード例 #16
0
class SnubaTSDBTest(TestCase, SnubaTestCase):
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0,
                                             tzinfo=pytz.UTC)

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1,
                                                 name="test")
        self.proj1env2 = self.create_environment(project=self.proj1,
                                                 name="dev")
        self.proj1env3 = self.create_environment(project=self.proj1,
                                                 name="staging")
        self.proj1defaultenv = self.create_environment(project=self.proj1,
                                                       name="")

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = "1" * 32
        hash2 = "2" * 32
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group1,
                                 hash=hash1)
        GroupHash.objects.create(project=self.proj1,
                                 group=self.proj1group2,
                                 hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version="1" * 10,
            date_added=self.now)
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version="2" * 10,
            date_added=self.now)
        self.release2.add_project(self.proj1)

        self.group1release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id)
        self.group1release2 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id)
        self.group2release1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id)

        data = json.dumps([
            (
                2,
                "insert",
                {
                    "event_id": (six.text_type(r) * 32)[:32],
                    "primary_hash":
                    [hash1, hash2][(r // 600) % 2],  # Switch every 10 mins
                    "group_id": [self.proj1group1.id,
                                 self.proj1group2.id][(r // 600) % 2],
                    "project_id":
                    self.proj1.id,
                    "message":
                    "message 1",
                    "platform":
                    "python",
                    "datetime":
                    (self.now +
                     timedelta(seconds=r)).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
                    "data": {
                        "type": "transaction" if r % 1200 == 0 else "error",
                        "received": calendar.timegm(self.now.timetuple()) + r,
                        "tags": {
                            "foo":
                            "bar",
                            "baz":
                            "quux",
                            # Switch every 2 hours
                            "environment": [self.proj1env1.name,
                                            None][(r // 7200) % 3],
                            "sentry:user":
                            u"id:user{}".format(r // 3300),
                            "sentry:release":
                            six.text_type(r // 3600) * 10,  # 1 per hour
                        },
                        "user": {
                            # change every 55 min so some hours have 1 user, some have 2
                            "id": u"user{}".format(r // 3300),
                            "email": u"user{}@sentry.io".format(r),
                        },
                    },
                },
            ) for r in range(0, 14400, 600)
        ])  # Every 10 min for 4 hours

        assert (requests.post(settings.SENTRY_SNUBA + "/tests/events/insert",
                              data=data).status_code == 200)

        # snuba trims query windows based on first_seen/last_seen, so these need to be correct-ish
        self.proj1group1.first_seen = self.now
        self.proj1group1.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group1.save()
        self.proj1group2.first_seen = self.now
        self.proj1group2.last_seen = self.now + timedelta(seconds=14400)
        self.proj1group2.save()

    def test_range_groups(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.group, [self.proj1group1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1group1.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ]
                                 }

        # Multiple groups
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id, self.proj1group2.id],
            dts[0],
            dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

        assert self.db.get_range(TSDBModel.group, [],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {}

    def test_range_releases(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.release, [self.release1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.release1.id: [
                                         (timestamp(dts[0]), 0),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 0),
                                         (timestamp(dts[3]), 0),
                                     ]
                                 }

    def test_range_project(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ]
                                 }

    def test_range_environment_filter(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.proj1env1.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # No events submitted for env2
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.proj1env2.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # Events submitted with no environment should match default environment
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.proj1defaultenv.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ]
        }

    def test_range_rollups(self):
        # Daily
        daystart = self.now.replace(
            hour=0)  # day buckets start on day boundaries
        dts = [daystart + timedelta(days=i) for i in range(2)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=86400) == {
                                     self.proj1.id: [(timestamp(dts[0]), 12),
                                                     (timestamp(dts[1]), 0)]
                                 }

        # Minutely
        dts = [self.now + timedelta(minutes=i) for i in range(120)]
        # Expect every 20th minute to have a 1, else 0
        expected = [(to_timestamp(d), 1 if i % 10 == 0 and i % 20 != 0 else 0)
                    for i, d in enumerate(dts)]
        expected[0] = (expected[0][0], 0)
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=60) == {
                                     self.proj1.id: expected
                                 }

    def test_distinct_counts_series_users(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group, [self.proj1group1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1group1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 2),
                ]
            }

        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 2),
                    (timestamp(dts[2]), 2),
                    (timestamp(dts[3]), 2),
                ]
            }

        assert (self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group, [],
            dts[0],
            dts[-1],
            rollup=3600) == {})

    def get_distinct_counts_totals_users(self):
        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1group1.id: 2  # 2 unique users overall
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now,
            rollup=3600,
        ) == {
            self.proj1group1.id: 1  # Only 1 unique user in the first hour
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: 2
        }

        assert (self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {})

    def test_most_frequent(self):
        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: [(self.proj1group1.id, 2.0),
                            (self.proj1group2.id, 1.0)]
        }

        assert (self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {})

    def test_frequency_series(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {
                self.proj1group1.id:
                (self.group1release1.id, self.group1release2.id),
                self.proj1group2.id: (self.group2release1.id, ),
            },
            dts[0],
            dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 0
                }),
                (timestamp(dts[1]), {
                    self.group1release1.id: 3,
                    self.group1release2.id: 0
                }),
                (timestamp(dts[2]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 3
                }),
                (timestamp(dts[3]), {
                    self.group1release1.id: 0,
                    self.group1release2.id: 0
                }),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), {
                    self.group2release1.id: 0
                }),
                (timestamp(dts[1]), {
                    self.group2release1.id: 3
                }),
                (timestamp(dts[2]), {
                    self.group2release1.id: 0
                }),
                (timestamp(dts[3]), {
                    self.group2release1.id: 0
                }),
            ],
        }

        assert (self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group, {},
            dts[0],
            dts[-1],
            rollup=3600) == {})

    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        project_id = self.proj1.id
        dts = [self.now + timedelta(hours=i) for i in range(4)]

        results = self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, 1.0)]})

        results = self.db.get_most_frequent_series(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, {1: 1.0})]})

        items = {
            # {project_id: (issue_id, issue_id, ...)}
            project_id: (self.proj1group1.id, self.proj1group2.id)
        }
        results = self.db.get_frequency_series(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: [(1, {1: 1})]})

        results = self.db.get_frequency_totals(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: {1: 1}})

        results = self.db.get_range(TSDBModel.project, [project_id], dts[0],
                                    dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: 1})

        results = self.db.get_distinct_counts_union(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, 1)

    def test_calculated_limit(self):

        with patch("sentry.tsdb.snuba.snuba") as snuba:
            # 24h test
            rollup = 3600
            end = self.now
            start = end + timedelta(days=-1, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 120

            # 14 day test
            rollup = 86400
            start = end + timedelta(days=-14, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 70

            # 1h test
            rollup = 3600
            end = self.now
            start = end + timedelta(hours=-1, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 5
コード例 #17
0
ファイル: test_snuba.py プロジェクト: yangnaihua/sentry
class SnubaTSDBTest(OutcomesSnubaTest):
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()
        self.db = SnubaTSDB()

        # Set up the times
        self.now = datetime.now(pytz.utc)
        self.start_time = self.now - timedelta(days=7)
        self.one_day_later = self.start_time + timedelta(days=1)
        self.day_before_start_time = self.start_time - timedelta(days=1)

    def test_organization_outcomes(self):
        other_organization = self.create_organization()

        for outcome in [
                Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED
        ]:
            self.store_outcomes(self.organization.id, self.project.id,
                                outcome.value, self.start_time, 1, 3)
            self.store_outcomes(self.organization.id, self.project.id,
                                outcome.value, self.one_day_later, 1, 4)

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(other_organization.id, self.project.id,
                                outcome.value, self.one_day_later, 1, 5)
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                1,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.organization_total_received, 3600, floor_to_hour_epoch,
             3 * 3, 4 * 3),
            (TSDBModel.organization_total_rejected, 3600, floor_to_hour_epoch,
             3, 4),
            (TSDBModel.organization_total_blacklisted, 3600,
             floor_to_hour_epoch, 3, 4),
            (TSDBModel.organization_total_received, 10, floor_to_10s_epoch,
             3 * 3, 4 * 3),
            (TSDBModel.organization_total_rejected, 10, floor_to_10s_epoch, 3,
             4),
            (TSDBModel.organization_total_blacklisted, 10, floor_to_10s_epoch,
             3, 4),
        ]:
            # Query SnubaTSDB
            response = self.db.get_range(tsdb_model, [self.organization.id],
                                         self.start_time, self.now,
                                         granularity, None)

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.organization.id in response.keys()
            response_dict = {k: v for (k, v) in response[self.organization.id]}

            assert response_dict[floor_func(
                self.start_time)] == start_time_count
            assert response_dict[floor_func(
                self.one_day_later)] == day_later_count

            for time, count in response[self.organization.id]:
                if time not in [
                        floor_func(self.start_time),
                        floor_func(self.one_day_later)
                ]:
                    assert count == 0

    def test_project_outcomes(self):
        other_project = self.create_project(organization=self.organization)

        for outcome in [
                Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED
        ]:
            self.store_outcomes(self.organization.id, self.project.id,
                                outcome.value, self.start_time, 1, 3)
            self.store_outcomes(self.organization.id, self.project.id,
                                outcome.value, self.one_day_later, 1, 4)

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(self.organization.id, other_project.id,
                                outcome.value, self.one_day_later, 1, 5)
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                1,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.project_total_received, 3600, floor_to_hour_epoch,
             3 * 3, 4 * 3),
            (TSDBModel.project_total_rejected, 3600, floor_to_hour_epoch, 3,
             4),
            (TSDBModel.project_total_blacklisted, 3600, floor_to_hour_epoch, 3,
             4),
            (TSDBModel.project_total_received, 10, floor_to_10s_epoch, 3 * 3,
             4 * 3),
            (TSDBModel.project_total_rejected, 10, floor_to_10s_epoch, 3, 4),
            (TSDBModel.project_total_blacklisted, 10, floor_to_10s_epoch, 3,
             4),
        ]:
            response = self.db.get_range(tsdb_model, [self.project.id],
                                         self.start_time, self.now,
                                         granularity, None)

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.project.id in response.keys()
            response_dict = {k: v for (k, v) in response[self.project.id]}

            assert response_dict[floor_func(
                self.start_time)] == start_time_count
            assert response_dict[floor_func(
                self.one_day_later)] == day_later_count

            for time, count in response[self.project.id]:
                if time not in [
                        floor_func(self.start_time),
                        floor_func(self.one_day_later)
                ]:
                    assert count == 0

    def test_key_outcomes(self):
        project_key = self.create_project_key(project=self.project)
        other_project = self.create_project(organization=self.organization)
        other_project_key = self.create_project_key(project=other_project)

        for outcome in [
                Outcome.ACCEPTED, Outcome.RATE_LIMITED, Outcome.FILTERED
        ]:
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.start_time,
                project_key.id,
                3,
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.one_day_later,
                project_key.id,
                4,
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.one_day_later,
                other_project_key.id,
                5,
            )
            self.store_outcomes(
                self.organization.id,
                self.project.id,
                outcome.value,
                self.day_before_start_time,
                project_key.id,
                6,
            )

        for tsdb_model, granularity, floor_func, start_time_count, day_later_count in [
            (TSDBModel.key_total_received, 3600, floor_to_hour_epoch, 3 * 3,
             4 * 3),
            (TSDBModel.key_total_rejected, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.key_total_blacklisted, 3600, floor_to_hour_epoch, 3, 4),
            (TSDBModel.key_total_received, 10, floor_to_10s_epoch, 3 * 3,
             4 * 3),
            (TSDBModel.key_total_rejected, 10, floor_to_10s_epoch, 3, 4),
            (TSDBModel.key_total_blacklisted, 10, floor_to_10s_epoch, 3, 4),
        ]:
            response = self.db.get_range(
                # with [project_key.id, six.text_type(project_key.id)], we are imitating the hack in
                # project_key_stats.py cause that is what `get_range` will be called with.
                tsdb_model,
                [project_key.id, six.text_type(project_key.id)],
                self.start_time,
                self.now,
                granularity,
                None,
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert project_key.id in response.keys()
            response_dict = {k: v for (k, v) in response[project_key.id]}

            assert response_dict[floor_func(
                self.start_time)] == start_time_count
            assert response_dict[floor_func(
                self.one_day_later)] == day_later_count

            for time, count in response[project_key.id]:
                if time not in [
                        floor_func(self.start_time),
                        floor_func(self.one_day_later)
                ]:
                    assert count == 0
コード例 #18
0
ファイル: test_snuba.py プロジェクト: zhangdinet/sentry
class SnubaTSDBTest(OutcomesSnubaTest):
    def setUp(self):
        super(SnubaTSDBTest, self).setUp()
        self.db = SnubaTSDB()

        # Set up the times
        self.now = datetime.now(pytz.utc)
        self.start_time = self.now - timedelta(days=7)
        self.one_day_later = self.start_time + timedelta(days=1)
        self.day_before_start_time = self.start_time - timedelta(days=1)

    def test_organization_outcomes(self):
        other_organization = self.create_organization()

        for tsdb_model, outcome in [
            (TSDBModel.organization_total_received, Outcome.ACCEPTED),
            (TSDBModel.organization_total_rejected, Outcome.RATE_LIMITED),
            (TSDBModel.organization_total_blacklisted, Outcome.FILTERED),
        ]:
            # Create all the outcomes we will be querying
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.start_time, 3
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.one_day_later, 4
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                other_organization.id, self.project.id, outcome.value, self.one_day_later, 5
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 6
            )

            # Query SnubaTSDB
            response = self.db.get_range(
                tsdb_model, [self.organization.id], self.start_time, self.now, 3600, None
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.organization.id in response.keys()
            response_dict = {k: v for (k, v) in response[self.organization.id]}

            assert response_dict[floor_to_hour_epoch(self.start_time)] == 3
            assert response_dict[floor_to_hour_epoch(self.one_day_later)] == 4

            for time, count in response[self.organization.id]:
                if time not in [
                    floor_to_hour_epoch(self.start_time),
                    floor_to_hour_epoch(self.one_day_later),
                ]:
                    assert count == 0

    def test_project_outcomes(self):
        other_project = self.create_project(organization=self.organization)

        for tsdb_model, outcome in [
            (TSDBModel.project_total_received, Outcome.ACCEPTED),
            (TSDBModel.project_total_rejected, Outcome.RATE_LIMITED),
            (TSDBModel.project_total_blacklisted, Outcome.FILTERED),
        ]:
            # Create all the outcomes we will be querying
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.start_time, 3
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.one_day_later, 4
            )

            # Also create some outcomes we shouldn't be querying
            self.store_outcomes(
                self.organization.id, other_project.id, outcome.value, self.one_day_later, 5
            )
            self.store_outcomes(
                self.organization.id, self.project.id, outcome.value, self.day_before_start_time, 6
            )

            # Query SnubaTSDB
            response = self.db.get_range(
                tsdb_model, [self.project.id], self.start_time, self.now, 3600, None
            )

            # Assert that the response has values set for the times we expect, and nothing more
            assert self.project.id in response.keys()
            response_dict = {k: v for (k, v) in response[self.project.id]}

            assert response_dict[floor_to_hour_epoch(self.start_time)] == 3
            assert response_dict[floor_to_hour_epoch(self.one_day_later)] == 4

            for time, count in response[self.project.id]:
                if time not in [
                    floor_to_hour_epoch(self.start_time),
                    floor_to_hour_epoch(self.one_day_later),
                ]:
                    assert count == 0
コード例 #19
0
class SnubaTSDBTest(TestCase, SnubaTestCase):
    def setUp(self):
        super().setUp()

        self.db = SnubaTSDB()
        self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
            hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC)
        self.proj1 = self.create_project()
        env1 = "test"
        env2 = "dev"
        defaultenv = ""

        release1 = "1" * 10
        release2 = "2" * 10

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version=release1,
            date_added=self.now)
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version=release2,
            date_added=self.now)
        self.release2.add_project(self.proj1)

        for r in range(0, 14400, 600):  # Every 10 min for 4 hours
            self.store_event(
                data={
                    "event_id": (str(r) * 32)[:32],
                    "message": "message 1",
                    "platform": "python",
                    "fingerprint":
                    [["group-1"],
                     ["group-2"]][(r // 600) % 2],  # Switch every 10 mins
                    "timestamp": iso_format(self.now + timedelta(seconds=r)),
                    "tags": {
                        "foo": "bar",
                        "baz": "quux",
                        # Switch every 2 hours
                        "environment": [env1, None][(r // 7200) % 3],
                        "sentry:user": "******".format(r // 3300),
                    },
                    "user": {
                        # change every 55 min so some hours have 1 user, some have 2
                        "id": "user{}".format(r // 3300),
                        "email": f"user{r}@sentry.io",
                    },
                    "release": str(r // 3600) * 10,  # 1 per hour,
                },
                project_id=self.proj1.id,
            )

        groups = Group.objects.filter(project=self.proj1).order_by("id")
        self.proj1group1 = groups[0]
        self.proj1group2 = groups[1]

        self.env1 = Environment.objects.get(name=env1)
        self.env2 = self.create_environment(name=env2)  # No events
        self.defaultenv = Environment.objects.get(name=defaultenv)

        self.group1release1env1 = GroupRelease.objects.get(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id,
            environment=env1,
        )

        self.group1release2env1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id,
            environment=env1,
        )

        self.group2release1env1 = GroupRelease.objects.get(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id,
            environment=env1,
        )

    def test_range_groups(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.group, [self.proj1group1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1group1.id: [
                                         (timestamp(dts[0]), 3),
                                         (timestamp(dts[1]), 3),
                                         (timestamp(dts[2]), 3),
                                         (timestamp(dts[3]), 3),
                                     ]
                                 }

        # Multiple groups
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id, self.proj1group2.id],
            dts[0],
            dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

        assert self.db.get_range(TSDBModel.group, [],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {}

    def test_range_releases(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.release, [self.release1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.release1.id: [
                                         (timestamp(dts[0]), 0),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 0),
                                         (timestamp(dts[3]), 0),
                                     ]
                                 }

    def test_range_project(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=3600) == {
                                     self.proj1.id: [
                                         (timestamp(dts[0]), 6),
                                         (timestamp(dts[1]), 6),
                                         (timestamp(dts[2]), 6),
                                         (timestamp(dts[3]), 6),
                                     ]
                                 }

    def test_range_environment_filter(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.env1.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 6),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # No events submitted for env2
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.env2.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

        # Events submitted with no environment should match default environment
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600,
            environment_ids=[self.defaultenv.id],
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 6),
                (timestamp(dts[3]), 6),
            ]
        }

    def test_range_rollups(self):
        # Daily
        daystart = self.now.replace(
            hour=0)  # day buckets start on day boundaries
        dts = [daystart + timedelta(days=i) for i in range(2)]
        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=86400) == {
                                     self.proj1.id: [(timestamp(dts[0]), 24),
                                                     (timestamp(dts[1]), 0)]
                                 }

        # Minutely
        dts = [self.now + timedelta(minutes=i) for i in range(120)]
        # Expect every 10th minute to have a 1, else 0
        expected = [(to_timestamp(d), 1 if i % 10 == 0 else 0)
                    for i, d in enumerate(dts)]

        assert self.db.get_range(TSDBModel.project, [self.proj1.id],
                                 dts[0],
                                 dts[-1],
                                 rollup=60) == {
                                     self.proj1.id: expected
                                 }

    def test_distinct_counts_series_users(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group, [self.proj1group1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1group1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 1),
                    (timestamp(dts[2]), 1),
                    (timestamp(dts[3]), 2),
                ]
            }

        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [self.proj1.id],
            dts[0],
            dts[-1],
            rollup=3600) == {
                self.proj1.id: [
                    (timestamp(dts[0]), 1),
                    (timestamp(dts[1]), 2),
                    (timestamp(dts[2]), 2),
                    (timestamp(dts[3]), 2),
                ]
            }

        assert (self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group, [],
            dts[0],
            dts[-1],
            rollup=3600) == {})

    def get_distinct_counts_totals_users(self):
        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1group1.id: 2  # 2 unique users overall
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now,
            rollup=3600,
        ) == {
            self.proj1group1.id: 1  # Only 1 unique user in the first hour
        }

        assert (self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: 2
        })

        assert (self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {})

    def test_most_frequent(self):
        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) in [
            {
                self.proj1.id: [(self.proj1group1.id, 2.0),
                                (self.proj1group2.id, 1.0)]
            },
            {
                self.proj1.id: [(self.proj1group2.id, 2.0),
                                (self.proj1group1.id, 1.0)]
            },
        ]  # Both issues equally frequent

        assert (self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {})

    def test_frequency_series(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {
                self.proj1group1.id:
                (self.group1release1env1.id, self.group1release2env1.id),
                self.proj1group2.id: (self.group2release1env1.id, ),
            },
            dts[0],
            dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), {
                    self.group1release1env1.id: 0,
                    self.group1release2env1.id: 0
                }),
                (timestamp(dts[1]), {
                    self.group1release1env1.id: 3,
                    self.group1release2env1.id: 0
                }),
                (timestamp(dts[2]), {
                    self.group1release1env1.id: 0,
                    self.group1release2env1.id: 3
                }),
                (timestamp(dts[3]), {
                    self.group1release1env1.id: 0,
                    self.group1release2env1.id: 0
                }),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), {
                    self.group2release1env1.id: 0
                }),
                (timestamp(dts[1]), {
                    self.group2release1env1.id: 3
                }),
                (timestamp(dts[2]), {
                    self.group2release1env1.id: 0
                }),
                (timestamp(dts[3]), {
                    self.group2release1env1.id: 0
                }),
            ],
        }

        assert (self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group, {},
            dts[0],
            dts[-1],
            rollup=3600) == {})

    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        project_id = self.proj1.id
        dts = [self.now + timedelta(hours=i) for i in range(4)]

        results = self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, 1.0)]})

        results = self.db.get_most_frequent_series(
            TSDBModel.frequent_issues_by_project, [project_id], dts[0], dts[0])
        assert has_shape(results, {1: [(1, {1: 1.0})]})

        items = {
            # {project_id: (issue_id, issue_id, ...)}
            project_id: (self.proj1group1.id, self.proj1group2.id)
        }
        results = self.db.get_frequency_series(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: [(1, {1: 1})]})

        results = self.db.get_frequency_totals(
            TSDBModel.frequent_issues_by_project, items, dts[0], dts[-1])
        assert has_shape(results, {1: {1: 1}})

        results = self.db.get_range(TSDBModel.project, [project_id], dts[0],
                                    dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: [(1, 1)]})

        results = self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, {1: 1})

        results = self.db.get_distinct_counts_union(
            TSDBModel.users_affected_by_project, [project_id], dts[0], dts[-1])
        assert has_shape(results, 1)

    def test_calculated_limit(self):

        with patch("sentry.tsdb.snuba.snuba") as snuba:
            # 24h test
            rollup = 3600
            end = self.now
            start = end + timedelta(days=-1, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 120

            # 14 day test
            rollup = 86400
            start = end + timedelta(days=-14, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 70

            # 1h test
            rollup = 3600
            end = self.now
            start = end + timedelta(hours=-1, seconds=rollup)
            self.db.get_data(TSDBModel.group, [1, 2, 3, 4, 5],
                             start,
                             end,
                             rollup=rollup)
            assert snuba.query.call_args[1]["limit"] == 5
コード例 #20
0
ファイル: test_snuba.py プロジェクト: hosmelq/sentry
class SnubaTSDBRequestsTest(TestCase):
    """
    Tests that the Snuba TSDB backend makes correctly formatted requests
    to the Snuba service, and formats the results correctly.

    Mocks the Snuba service request/response.
    """

    def setUp(self):
        self.db = SnubaTSDB()

    @responses.activate
    def test_result_shape(self):
        """
        Tests that the results from the different TSDB methods have the
        expected format.
        """
        now = parse_datetime('2018-03-09T01:00:00Z')
        project_id = 194503
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                aggs = body.get('aggregations', [])
                meta = [{'name': col} for col in body['groupby'] + [a[2] for a in aggs]]
                datum = {col['name']: 1 for col in meta}
                datum['project_id'] = project_id
                if 'time' in datum:
                    datum['time'] = '2018-03-09T01:00:00Z'
                for agg in aggs:
                    if agg[0].startswith('topK'):
                        datum[agg[2]] = [99]
                return (200, {}, json.dumps({'data': [datum], 'meta': meta}))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)

            results = self.db.get_most_frequent(TSDBModel.frequent_issues_by_project,
                                                [project_id], dts[0], dts[0])
            assert has_shape(results, {1: [(1, 1.0)]})

            results = self.db.get_most_frequent_series(TSDBModel.frequent_issues_by_project,
                                                       [project_id], dts[0], dts[0])
            assert has_shape(results, {1: [(1, {1: 1.0})]})

            items = {
                project_id: (0, 1, 2)  # {project_id: (issue_id, issue_id, ...)}
            }
            results = self.db.get_frequency_series(TSDBModel.frequent_issues_by_project,
                                                   items, dts[0], dts[-1])
            assert has_shape(results, {1: [(1, {1: 1})]})

            results = self.db.get_frequency_totals(TSDBModel.frequent_issues_by_project,
                                                   items, dts[0], dts[-1])
            assert has_shape(results, {1: {1: 1}})

            results = self.db.get_range(TSDBModel.project, [project_id], dts[0], dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_series(TSDBModel.users_affected_by_project,
                                                         [project_id], dts[0], dts[-1])
            assert has_shape(results, {1: [(1, 1)]})

            results = self.db.get_distinct_counts_totals(TSDBModel.users_affected_by_project,
                                                         [project_id], dts[0], dts[-1])
            assert has_shape(results, {1: 1})

            results = self.db.get_distinct_counts_union(TSDBModel.users_affected_by_project,
                                                        [project_id], dts[0], dts[-1])
            assert has_shape(results, 1)

    @responses.activate
    def test_groups_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        dts = [now + timedelta(hours=i) for i in range(4)]
        project = self.create_project()
        group = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group, hash='0' * 32)
        group2 = self.create_group(project=project)
        GroupHash.objects.create(project=project, group=group2, hash='1' * 32)

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['issue', 'time']

                # Assert issue->hash map is generated, but only for referenced issues
                assert [group.id, ['0' * 32]] in body['issues']
                assert [group2.id, ['1' * 32]] not in body['issues']

                return (200, {}, json.dumps({
                    'data': [{'time': '2018-03-09T01:00:00Z', 'issue': 1, 'aggregate': 100}],
                    'meta': [{'name': 'time'}, {'name': 'issue'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(TSDBModel.group, [group.id], dts[0], dts[-1])
            assert results is not None

    @responses.activate
    def test_releases_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        release = Release.objects.create(
            organization_id=self.organization.id,
            version='version X',
            date_added=now,
        )
        release.add_project(project)
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['release', 'time']
                assert ['release', 'IN', ['version X']] in body['conditions']
                return (200, {}, json.dumps({
                    'data': [{'release': 'version X', 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}],
                    'meta': [{'name': 'release'}, {'name': 'time'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(
                TSDBModel.release, [release.id], dts[0], dts[-1], rollup=3600)
            assert results == {
                release.id: [
                    (int(to_timestamp(d)), 100 if d == now else 0)
                    for d in dts]
            }

    @responses.activate
    def test_environment_request(self):
        now = parse_datetime('2018-03-09T01:00:00Z')
        project = self.create_project()
        env = self.create_environment(project=project, name="prod")
        dts = [now + timedelta(hours=i) for i in range(4)]

        with responses.RequestsMock() as rsps:
            def snuba_response(request):
                body = json.loads(request.body)
                assert body['aggregations'] == [['count()', None, 'aggregate']]
                assert body['project'] == [project.id]
                assert body['groupby'] == ['project_id', 'time']
                assert ['environment', 'IN', ['prod']] in body['conditions']
                return (200, {}, json.dumps({
                    'data': [{'project_id': project.id, 'time': '2018-03-09T01:00:00Z', 'aggregate': 100}],
                    'meta': [{'name': 'project_id'}, {'name': 'time'}, {'name': 'aggregate'}]
                }))

            rsps.add_callback(
                responses.POST,
                settings.SENTRY_SNUBA + '/query',
                callback=snuba_response)
            results = self.db.get_range(TSDBModel.project, [project.id],
                                        dts[0], dts[-1], environment_id=env.id, rollup=3600)
            assert results == {
                project.id: [
                    (int(to_timestamp(d)), 100 if d == now else 0)
                    for d in dts]
            }

    def test_invalid_model(self):
        with pytest.raises(Exception) as ex:
            self.db.get_range(TSDBModel.project_total_received_discarded, [], None, None)
        assert "Unsupported TSDBModel" in ex.value.message
コード例 #21
0
    def setUp(self):
        super().setUp()

        self.db = SnubaTSDB()
        self.now = (datetime.utcnow() - timedelta(hours=4)).replace(
            hour=0, minute=0, second=0, microsecond=0, tzinfo=pytz.UTC)
        self.proj1 = self.create_project()
        env1 = "test"
        env2 = "dev"
        defaultenv = ""

        release1 = "1" * 10
        release2 = "2" * 10

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version=release1,
            date_added=self.now)
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version=release2,
            date_added=self.now)
        self.release2.add_project(self.proj1)

        for r in range(0, 14400, 600):  # Every 10 min for 4 hours
            self.store_event(
                data={
                    "event_id": (str(r) * 32)[:32],
                    "message": "message 1",
                    "platform": "python",
                    "fingerprint":
                    [["group-1"],
                     ["group-2"]][(r // 600) % 2],  # Switch every 10 mins
                    "timestamp": iso_format(self.now + timedelta(seconds=r)),
                    "tags": {
                        "foo": "bar",
                        "baz": "quux",
                        # Switch every 2 hours
                        "environment": [env1, None][(r // 7200) % 3],
                        "sentry:user": "******".format(r // 3300),
                    },
                    "user": {
                        # change every 55 min so some hours have 1 user, some have 2
                        "id": "user{}".format(r // 3300),
                        "email": f"user{r}@sentry.io",
                    },
                    "release": str(r // 3600) * 10,  # 1 per hour,
                },
                project_id=self.proj1.id,
            )

        groups = Group.objects.filter(project=self.proj1).order_by("id")
        self.proj1group1 = groups[0]
        self.proj1group2 = groups[1]

        self.env1 = Environment.objects.get(name=env1)
        self.env2 = self.create_environment(name=env2)  # No events
        self.defaultenv = Environment.objects.get(name=defaultenv)

        self.group1release1env1 = GroupRelease.objects.get(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release1.id,
            environment=env1,
        )

        self.group1release2env1 = GroupRelease.objects.create(
            project_id=self.proj1.id,
            group_id=self.proj1group1.id,
            release_id=self.release2.id,
            environment=env1,
        )

        self.group2release1env1 = GroupRelease.objects.get(
            project_id=self.proj1.id,
            group_id=self.proj1group2.id,
            release_id=self.release1.id,
            environment=env1,
        )
コード例 #22
0
ファイル: group.py プロジェクト: k2snowman69/sentry
from sentry.reprocessing2 import get_progress

SUBSCRIPTION_REASON_MAP = {
    GroupSubscriptionReason.comment: "commented",
    GroupSubscriptionReason.assigned: "assigned",
    GroupSubscriptionReason.bookmark: "bookmarked",
    GroupSubscriptionReason.status_change: "changed_status",
    GroupSubscriptionReason.mentioned: "mentioned",
}


disabled = object()


# TODO(jess): remove when snuba is primary backend
snuba_tsdb = SnubaTSDB(**settings.SENTRY_TSDB_OPTIONS)


logger = logging.getLogger(__name__)


def merge_list_dictionaries(dict1, dict2):
    for key, val in six.iteritems(dict2):
        dict1.setdefault(key, []).extend(val)


class GroupSerializerBase(Serializer):
    def __init__(
        self,
        collapse=None,
        expand=None,
コード例 #23
0
ファイル: test_tsdb_backend.py プロジェクト: hosmelq/sentry
class SnubaTSDBTest(TestCase):
    def setUp(self):
        assert requests.post(settings.SENTRY_SNUBA + '/tests/drop').status_code == 200

        self.db = SnubaTSDB()
        self.now = datetime.utcnow().replace(
            hour=0,
            minute=0,
            second=0,
            microsecond=0,
            tzinfo=pytz.UTC
        )

        self.proj1 = self.create_project()
        self.proj1env1 = self.create_environment(project=self.proj1, name='test')
        self.proj1env2 = self.create_environment(project=self.proj1, name='dev')

        self.proj1group1 = self.create_group(self.proj1)
        self.proj1group2 = self.create_group(self.proj1)

        hash1 = '1' * 32
        hash2 = '2' * 32
        GroupHash.objects.create(project=self.proj1, group=self.proj1group1, hash=hash1)
        GroupHash.objects.create(project=self.proj1, group=self.proj1group2, hash=hash2)

        self.release1 = Release.objects.create(
            organization_id=self.organization.id,
            version='1' * 10,
            date_added=self.now,
        )
        self.release1.add_project(self.proj1)
        self.release2 = Release.objects.create(
            organization_id=self.organization.id,
            version='2' * 10,
            date_added=self.now,
        )
        self.release2.add_project(self.proj1)

        data = json.dumps([{
            'event_id': (six.text_type(r) * 32)[:32],
            'primary_hash': [hash1, hash2][(r // 600) % 2],
            'project_id': self.proj1.id,
            'message': 'message 1',
            'platform': 'python',
            'datetime': (self.now + timedelta(seconds=r)).strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            'data': {
                'received': calendar.timegm(self.now.timetuple()) + r,
                'tags': {
                    'foo': 'bar',
                    'baz': 'quux',
                    'environment': self.proj1env1.name,
                    'sentry:release': six.text_type(r // 3600) * 10,  # 1 per hour
                },
                'sentry.interfaces.User': {
                    # change every 55 min so some hours have 1 user, some have 2
                    'id': "user{}".format(r // 3300),
                    'email': "user{}@sentry.io".format(r)
                }
            },
        } for r in range(0, 14400, 600)])  # Every 10 min for 4 hours

        assert requests.post(settings.SENTRY_SNUBA + '/tests/insert', data=data).status_code == 200

    def test_range_groups(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

        # Multiple groups
        assert self.db.get_range(
            TSDBModel.group,
            [self.proj1group1.id, self.proj1group2.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), 3),
                (timestamp(dts[1]), 3),
                (timestamp(dts[2]), 3),
                (timestamp(dts[3]), 3),
            ],
        }

    def test_range_releases(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.release,
            [self.release1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.release1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

    def test_range_project(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 6),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 6),
                (timestamp(dts[3]), 6),
            ]
        }

        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600,
            environment_id=self.proj1env1.id
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 6),
                (timestamp(dts[1]), 6),
                (timestamp(dts[2]), 6),
                (timestamp(dts[3]), 6),
            ]
        }

        # No events submitted for env2
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600,
            environment_id=self.proj1env2.id
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 0),
                (timestamp(dts[1]), 0),
                (timestamp(dts[2]), 0),
                (timestamp(dts[3]), 0),
            ]
        }

    def test_range_rollups(self):
        # Daily
        daystart = self.now.replace(hour=0)  # day buckets start on day boundaries
        dts = [daystart + timedelta(days=i) for i in range(2)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=86400
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 24),
                (timestamp(dts[1]), 0)
            ]
        }

        # Minutely
        dts = [self.now + timedelta(minutes=i) for i in range(120)]
        # Expect every 10th minute to have a 1, else 0
        expected = [(to_timestamp(d), int(i % 10 == 0)) for i, d in enumerate(dts)]
        assert self.db.get_range(
            TSDBModel.project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=60
        ) == {
            self.proj1.id: expected
        }

    def test_distinct_counts_series_users(self):
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 1),
                (timestamp(dts[2]), 1),
                (timestamp(dts[3]), 2),
            ],
        }

        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_distinct_counts_series(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            dts[0], dts[-1],
            rollup=3600
        ) == {
            self.proj1.id: [
                (timestamp(dts[0]), 1),
                (timestamp(dts[1]), 2),
                (timestamp(dts[2]), 2),
                (timestamp(dts[3]), 2),
            ],
        }

    def get_distinct_counts_totals_users(self):
        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600
        ) == {
            self.proj1group1.id: 2,  # 2 unique users overall
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_group,
            [self.proj1group1.id],
            self.now,
            self.now,
            rollup=3600
        ) == {
            self.proj1group1.id: 1,  # Only 1 unique user in the first hour
        }

        assert self.db.get_distinct_counts_totals(
            TSDBModel.users_affected_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600
        ) == {
            self.proj1.id: 2,
        }

    def test_most_frequent(self):
        assert self.db.get_most_frequent(
            TSDBModel.frequent_issues_by_project,
            [self.proj1.id],
            self.now,
            self.now + timedelta(hours=4),
            rollup=3600,
        ) == {
            self.proj1.id: [
                (self.proj1group1.id, 2.0),
                (self.proj1group2.id, 1.0),
            ],
        }

    def test_frequency_series(self):
        # Technically while we request both releases for group1
        # and only release 1 on group2, that distinction is lost
        # in the snuba query, and we return a frequency series for
        # both releases * both groups
        dts = [self.now + timedelta(hours=i) for i in range(4)]
        assert self.db.get_frequency_series(
            TSDBModel.frequent_releases_by_group,
            {
                self.proj1group1.id: (self.release1.id, self.release2.id, ),
                self.proj1group2.id: (self.release1.id, )
            },
            dts[0], dts[-1],
            rollup=3600,
        ) == {
            self.proj1group1.id: [
                (timestamp(dts[0]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.release1.id: 3,
                    self.release2.id: 0,
                }),
                (timestamp(dts[2]), {
                    self.release1.id: 0,
                    self.release2.id: 3,
                }),
                (timestamp(dts[3]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
            ],
            self.proj1group2.id: [
                (timestamp(dts[0]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
                (timestamp(dts[1]), {
                    self.release1.id: 3,
                    self.release2.id: 0,
                }),
                (timestamp(dts[2]), {
                    self.release1.id: 0,
                    self.release2.id: 3,
                }),
                (timestamp(dts[3]), {
                    self.release1.id: 0,
                    self.release2.id: 0,
                }),
            ],
        }