def test_chunked():
    assert list(chunked(range(5), 5)) == [
        [0, 1, 2, 3, 4],
    ]

    assert list(chunked(range(10), 4)) == [
        [0, 1, 2, 3],
        [4, 5, 6, 7],
        [8, 9],
    ]
Exemple #2
0
def test_chunked():
    assert list(chunked(range(5), 5)) == [
        [0, 1, 2, 3, 4],
    ]

    assert list(chunked(range(10), 4)) == [
        [0, 1, 2, 3],
        [4, 5, 6, 7],
        [8, 9],
    ]
Exemple #3
0
def _query_tsdb_groups_chunked(func, issue_ids, start, stop, rollup):
    combined = {}

    for chunk in chunked(issue_ids, BATCH_SIZE):
        combined.update(func(tsdb.models.group, chunk, start, stop, rollup=rollup))

    return combined
Exemple #4
0
def get_event_counts(issue_ids, start, stop, rollup):
    combined = {}

    for chunk in chunked(issue_ids, BATCH_SIZE):
        combined.update(
            tsdb.get_sums(tsdb.models.group, chunk, start, stop,
                          rollup=rollup))

    return combined
Exemple #5
0
def organizations(metrics, since, until):
    """
    Fetch metrics for organizations.
    """
    from django.utils import timezone
    from sentry.app import tsdb
    from sentry.models import Organization

    stdout = click.get_text_stream('stdout')
    stderr = click.get_text_stream('stderr')

    def aggregate(series):
        return sum(value for timestamp, value in series)

    metrics = OrderedDict(
        (name, getattr(tsdb.models, name)) for name in metrics)
    if not metrics:
        return

    if until is None:
        until = timezone.now()

    if since is None:
        since = until - timedelta(minutes=60)

    if until < since:
        raise click.ClickException(
            u'invalid time range provided: {} to {}'.format(since, until))

    stderr.write(
        u'Dumping {} from {} to {}...\n'.format(
            ', '.join(metrics.keys()),
            since,
            until,
        ), )

    objects = Organization.objects.all()

    for chunk in chunked(objects, 100):
        instances = OrderedDict((instance.pk, instance) for instance in chunk)

        results = {}
        for metric in metrics.values():
            results[metric] = tsdb.get_range(metric, instances.keys(), since,
                                             until)

        for key, instance in six.iteritems(instances):
            values = []
            for metric in metrics.values():
                values.append(aggregate(results[metric][key]))

            stdout.write(
                u'{} {} {}\n'.format(
                    instance.id,
                    instance.slug,
                    ' '.join(map(six.binary_type, values)),
                ), )
Exemple #6
0
def organizations(metrics, since, until):
    """
    Fetch metrics for organizations.
    """
    from django.utils import timezone
    from sentry.app import tsdb
    from sentry.models import Organization

    stdout = click.get_text_stream('stdout')
    stderr = click.get_text_stream('stderr')

    def aggregate(series):
        return sum(value for timestamp, value in series)

    metrics = OrderedDict((name, getattr(tsdb.models, name)) for name in metrics)
    if not metrics:
        return

    if until is None:
        until = timezone.now()

    if since is None:
        since = until - timedelta(minutes=60)

    if until < since:
        raise click.ClickException('invalid time range provided: {} to {}'.format(since, until))

    stderr.write(
        'Dumping {} from {} to {}...\n'.format(
            ', '.join(metrics.keys()),
            since,
            until,
        ),
    )

    objects = Organization.objects.all()

    for chunk in chunked(objects, 100):
        instances = OrderedDict((instance.pk, instance) for instance in chunk)

        results = {}
        for metric in metrics.values():
            results[metric] = tsdb.get_range(metric, instances.keys(), since, until)

        for key, instance in six.iteritems(instances):
            values = []
            for metric in metrics.values():
                values.append(aggregate(results[metric][key]))

            stdout.write(
                '{} {} {}\n'.format(
                    instance.id,
                    instance.slug,
                    ' '.join(map(six.binary_type, values)),
                ),
            )
Exemple #7
0
def update_user_reports(**kwargs: Any) -> None:
    now = timezone.now()
    user_reports = UserReport.objects.filter(
        group_id__isnull=True, environment_id__isnull=True, date_added__gte=now - timedelta(days=1)
    )

    # We do one query per project, just to avoid the small case that two projects have the same event ID
    project_map: Dict[int, Any] = {}
    for r in user_reports:
        project_map.setdefault(r.project_id, []).append(r)

    # Logging values
    total_reports = len(user_reports)
    reports_with_event = 0
    updated_reports = 0
    samples = None

    MAX_EVENTS = kwargs.get("max_events", 5000)
    for project_id, reports in project_map.items():
        event_ids = [r.event_id for r in reports]
        report_by_event = {r.event_id: r for r in reports}
        events = []
        for event_id_chunk in chunked(event_ids, MAX_EVENTS):
            snuba_filter = eventstore.Filter(
                project_ids=[project_id],
                event_ids=event_id_chunk,
                start=now - timedelta(days=2),
                end=now + timedelta(minutes=5),  # Just to catch clock skew
            )
            events_chunk = eventstore.get_events(filter=snuba_filter)
            events.extend(events_chunk)

        for event in events:
            report = report_by_event.get(event.event_id)
            if report:
                reports_with_event += 1
                report.update(group_id=event.group_id, environment_id=event.get_environment().id)
                updated_reports += 1

        if not samples and len(reports) <= 10:
            samples = {
                "project_id": project_id,
                "event_ids": event_ids,
                "reports_event_ids": {r.id: r.event_id for r in reports},
            }

    logger.info(
        "update_user_reports.records_updated",
        extra={
            "reports_to_update": total_reports,
            "reports_with_event": reports_with_event,
            "updated_reports": updated_reports,
            "samples": samples,
        },
    )
Exemple #8
0
    def _query(self, project, retention_window_start, group_queryset, tags, environment,
               sort_by, limit, cursor, count_hits, paginator_options, **parameters):

        # TODO: Product decision: we currently search Group.message to handle
        # the `query` parameter, because that's what we've always done. We could
        # do that search against every event in Snuba instead, but results may
        # differ.

        now = timezone.now()
        end = parameters.get('date_to') or (now + ALLOWED_FUTURE_DELTA)
        # TODO: Presumably we want to search back to the project's full retention,
        #       which may be higher than 90 days in the future, but apparently
        #       `retention_window_start` can be None?
        start = max(
            filter(None, [
                retention_window_start,
                parameters.get('date_from'),
                now - timedelta(days=90)
            ])
        )
        assert start < end

        # TODO: It's possible `first_release` could be handled by Snuba.
        if environment is not None:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.extra(
                        where=[
                            '{} = {}'.format(
                                ds.get_sql_column(GroupEnvironment, 'first_release_id'),
                                ds.get_sql_column(Release, 'id'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'organization'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'version'),
                            ),
                        ],
                        params=[project.organization_id, version],
                        tables=[Release._meta.db_table],
                    ),
                ),
            }).build(
                group_queryset.extra(
                    where=[
                        u'{} = {}'.format(
                            ds.get_sql_column(Group, 'id'),
                            ds.get_sql_column(GroupEnvironment, 'group_id'),
                        ),
                        u'{} = %s'.format(
                            ds.get_sql_column(GroupEnvironment, 'environment_id'),
                        ),
                    ],
                    params=[environment.id],
                    tables=[GroupEnvironment._meta.db_table],
                ),
                parameters,
            )
        else:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.filter(
                        first_release__organization_id=project.organization_id,
                        first_release__version=version,
                    ),
                ),
            }).build(
                group_queryset,
                parameters,
            )

        # pre-filter query
        candidate_hashes = dict(
            GroupHash.objects.filter(
                group__in=group_queryset
            ).values_list(
                'hash', 'group_id'
            )[:MAX_PRE_SNUBA_CANDIDATES + 1]
        )
        metrics.timing('snuba.search.num_candidates', len(candidate_hashes))

        if not candidate_hashes:
            # no matches could possibly be found from this point on
            metrics.incr('snuba.search.no_candidates')
            return Paginator(Group.objects.none()).get_result()
        elif len(candidate_hashes) > MAX_PRE_SNUBA_CANDIDATES:
            # If the pre-filter query didn't include anything to significantly
            # filter down the number of results (from 'first_release', 'query',
            # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
            # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
            # might have surpassed the MAX_PRE_SNUBA_CANDIDATES. In this case,
            # we *don't* want to pass candidates down to Snuba, and instead we
            # want Snuba to do all the filtering/sorting it can and *then* apply
            # this queryset to the results from Snuba, which we call
            # post-filtering.
            metrics.incr('snuba.search.too_many_candidates')
            candidate_hashes = None

        sort, extra_aggregations, score_fn = sort_strategies[sort_by]

        # {group_id: group_score, ...}
        snuba_groups = snuba_search(
            project_id=project.id,
            environment_id=environment and environment.id,
            tags=tags,
            start=start,
            end=end,
            sort=sort,
            extra_aggregations=extra_aggregations,
            score_fn=score_fn,
            candidate_hashes=candidate_hashes,
            **parameters
        )
        metrics.timing('snuba.search.num_snuba_results', len(snuba_groups))

        if candidate_hashes:
            # pre-filtered candidates were passed down to Snuba,
            # so we're finished with filtering
            result_groups = snuba_groups.items()
        else:
            # pre-filtered candidates were *not* passed down to Snuba,
            # so we need to do post-filtering to verify Sentry DB predicates
            result_groups = []
            i = 0
            for i, chunk in enumerate(chunked(snuba_groups.items(), MAX_POST_SNUBA_CHUNK), 1):
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid for gid, _ in chunk]
                ).values_list('id', flat=True)

                result_groups.extend(
                    (group_id, snuba_groups[group_id])
                    for group_id in filtered_group_ids
                )

            metrics.timing('snuba.search.num_post_filters', i)

        paginator_results = SequencePaginator(
            [(score, id) for (id, score) in result_groups],
            reverse=True,
            **paginator_options
        ).get_result(limit, cursor, count_hits=count_hits)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [groups[k] for k in paginator_results.results if k in groups]

        return paginator_results
Exemple #9
0
def band(n, value):
    assert len(value) % n == 0
    return list(chunked(value, len(value) / n))
Exemple #10
0
    def _query(self, project, retention_window_start, group_queryset, tags, environment,
               sort_by, limit, cursor, count_hits, paginator_options, **parameters):

        # TODO: Product decision: we currently search Group.message to handle
        # the `query` parameter, because that's what we've always done. We could
        # do that search against every event in Snuba instead, but results may
        # differ.

        now = timezone.now()
        end = parameters.get('date_to') or (now + ALLOWED_FUTURE_DELTA)
        # TODO: Presumably we want to search back to the project's full retention,
        #       which may be higher than 90 days in the future, but apparently
        #       `retention_window_start` can be None?
        start = max(
            filter(None, [
                retention_window_start,
                parameters.get('date_from'),
                now - timedelta(days=90)
            ])
        )
        assert start < end

        # TODO: It's possible `first_release` could be handled by Snuba.
        if environment is not None:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.extra(
                        where=[
                            '{} = {}'.format(
                                ds.get_sql_column(GroupEnvironment, 'first_release_id'),
                                ds.get_sql_column(Release, 'id'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'organization'),
                            ),
                            '{} = %s'.format(
                                ds.get_sql_column(Release, 'version'),
                            ),
                        ],
                        params=[project.organization_id, version],
                        tables=[Release._meta.db_table],
                    ),
                ),
            }).build(
                group_queryset.extra(
                    where=[
                        '{} = {}'.format(
                            ds.get_sql_column(Group, 'id'),
                            ds.get_sql_column(GroupEnvironment, 'group_id'),
                        ),
                        '{} = %s'.format(
                            ds.get_sql_column(GroupEnvironment, 'environment_id'),
                        ),
                    ],
                    params=[environment.id],
                    tables=[GroupEnvironment._meta.db_table],
                ),
                parameters,
            )
        else:
            group_queryset = ds.QuerySetBuilder({
                'first_release': ds.CallbackCondition(
                    lambda queryset, version: queryset.filter(
                        first_release__organization_id=project.organization_id,
                        first_release__version=version,
                    ),
                ),
            }).build(
                group_queryset,
                parameters,
            )

        # pre-filter query
        candidate_hashes = dict(
            GroupHash.objects.filter(
                group__in=group_queryset
            ).values_list(
                'hash', 'group_id'
            )[:MAX_PRE_SNUBA_CANDIDATES + 1]
        )
        metrics.timing('snuba.search.num_candidates', len(candidate_hashes))

        if not candidate_hashes:
            # no matches could possibly be found from this point on
            metrics.incr('snuba.search.no_candidates')
            return Paginator(Group.objects.none()).get_result()
        elif len(candidate_hashes) > MAX_PRE_SNUBA_CANDIDATES:
            # If the pre-filter query didn't include anything to significantly
            # filter down the number of results (from 'first_release', 'query',
            # 'status', 'bookmarked_by', 'assigned_to', 'unassigned',
            # 'subscribed_by', 'active_at_from', or 'active_at_to') then it
            # might have surpassed the MAX_PRE_SNUBA_CANDIDATES. In this case,
            # we *don't* want to pass candidates down to Snuba, and instead we
            # want Snuba to do all the filtering/sorting it can and *then* apply
            # this queryset to the results from Snuba, which we call
            # post-filtering.
            metrics.incr('snuba.search.too_many_candidates')
            candidate_hashes = None

        sort, extra_aggregations, score_fn = sort_strategies[sort_by]

        # {group_id: group_score, ...}
        snuba_groups = snuba_search(
            project_id=project.id,
            environment_id=environment and environment.id,
            tags=tags,
            start=start,
            end=end,
            sort=sort,
            extra_aggregations=extra_aggregations,
            score_fn=score_fn,
            candidate_hashes=candidate_hashes,
            **parameters
        )
        metrics.timing('snuba.search.num_snuba_results', len(snuba_groups))

        if candidate_hashes:
            # pre-filtered candidates were passed down to Snuba,
            # so we're finished with filtering
            result_groups = snuba_groups.items()
        else:
            # pre-filtered candidates were *not* passed down to Snuba,
            # so we need to do post-filtering to verify Sentry DB predicates
            result_groups = []
            i = 0
            for i, chunk in enumerate(chunked(snuba_groups.items(), MAX_POST_SNUBA_CHUNK), 1):
                filtered_group_ids = group_queryset.filter(
                    id__in=[gid for gid, _ in chunk]
                ).values_list('id', flat=True)

                result_groups.extend(
                    (group_id, snuba_groups[group_id])
                    for group_id in filtered_group_ids
                )

            metrics.timing('snuba.search.num_post_filters', i)

        paginator_results = SequencePaginator(
            [(score, id) for (id, score) in result_groups],
            reverse=True,
            **paginator_options
        ).get_result(limit, cursor, count_hits=count_hits)

        groups = Group.objects.in_bulk(paginator_results.results)
        paginator_results.results = [groups[k] for k in paginator_results.results if k in groups]

        return paginator_results