def hash_user_identifier(identifier):
    identifier = force_text(identifier, errors="replace")
    return sum(map(ord, identifier))
Exemple #2
0
def build_project_breakdown_series(reports):
    Key = namedtuple("Key", "label url color data")

    def get_legend_data(report):
        filtered, rate_limited = report.usage_summary
        return {
            "events": sum(sum(value) for timestamp, value in report.series),
            "filtered": filtered,
            "rate_limited": rate_limited,
        }

    # Find the reports with the most total events. (The number of reports to
    # keep is the same as the number of colors available to use in the legend.)
    instances = map(
        operator.itemgetter(0),
        sorted(
            reports.items(),
            key=lambda instance__report: sum(
                sum(values) for timestamp, values in instance__report[1][0]
            ),
            reverse=True,
        ),
    )[: len(colors)]

    # Starting building the list of items to include in the report chart. This
    # is a list of [Key, Report] pairs, in *ascending* order of the total sum
    # of values in the series. (This is so when we render the series, the
    # largest color blocks are at the bottom and it feels appropriately
    # weighted.)
    selections = map(
        lambda instance__color: (
            Key(
                instance__color[0].slug,
                instance__color[0].get_absolute_url(),
                instance__color[1],
                get_legend_data(reports[instance__color[0]]),
            ),
            reports[instance__color[0]],
        ),
        zip(instances, colors),
    )[::-1]

    # Collect any reports that weren't in the selection set, merge them
    # together and add it at the top (front) of the stack.
    overflow = set(reports) - set(instances)
    if overflow:
        overflow_report = reduce(merge_reports, [reports[instance] for instance in overflow])
        selections.insert(
            0, (Key("Other", None, "#f2f0fa", get_legend_data(overflow_report)), overflow_report)
        )

    def summarize(key, points):
        total = sum(points)
        return [(key, total)] if total else []

    # Collect all of the independent series into a single series to make it
    # easier to render, resulting in a series where each value is a sequence of
    # (key, count) pairs.
    series = reduce(
        merge_series,
        [series_map(functools.partial(summarize, key), report[0]) for key, report in selections],
    )

    legend = [key for key, value in reversed(selections)]
    return {
        "points": [(to_datetime(timestamp), value) for timestamp, value in series],
        "maximum": max(sum(count for key, count in value) for timestamp, value in series),
        "legend": {
            "rows": legend,
            "total": Key("Total", None, None, reduce(merge_mappings, [key.data for key in legend])),
        },
    }
Exemple #3
0
def bulk_raw_query(snuba_param_list, referrer=None):
    headers = {}
    if referrer:
        headers["referer"] = referrer

    query_param_list = map(_prepare_query_params, snuba_param_list)

    def snuba_query(params):
        query_params, forward, reverse, thread_hub = params
        try:
            with timer("snuba_query"):
                referrer = headers.get("referer", "<unknown>")
                if SNUBA_INFO:
                    logger.info("{}.body: {}".format(referrer, json.dumps(query_params)))
                    query_params["debug"] = True
                body = json.dumps(query_params)
                with thread_hub.start_span(
                    op="snuba", description=u"query {}".format(referrer)
                ) as span:
                    span.set_tag("referrer", referrer)
                    for param_key, param_data in six.iteritems(query_params):
                        span.set_data(param_key, param_data)
                    return (
                        _snuba_pool.urlopen("POST", "/query", body=body, headers=headers),
                        forward,
                        reverse,
                    )
        except urllib3.exceptions.HTTPError as err:
            raise SnubaError(err)

    with sentry_sdk.start_span(
        op="start_snuba_query",
        description=u"running {} snuba queries".format(len(snuba_param_list)),
    ) as span:
        span.set_tag("referrer", headers.get("referer", "<unknown>"))
        if len(snuba_param_list) > 1:
            query_results = list(
                _query_thread_pool.map(
                    snuba_query, [params + (Hub(Hub.current),) for params in query_param_list]
                )
            )
        else:
            # No need to submit to the thread pool if we're just performing a
            # single query
            query_results = [snuba_query(query_param_list[0] + (Hub(Hub.current),))]

    results = []
    for response, _, reverse in query_results:
        try:
            body = json.loads(response.data)
            if SNUBA_INFO:
                if "sql" in body:
                    logger.info(
                        "{}.sql: {}".format(headers.get("referer", "<unknown>"), body["sql"])
                    )
                if "error" in body:
                    logger.info(
                        "{}.err: {}".format(headers.get("referer", "<unknown>"), body["error"])
                    )
        except ValueError:
            if response.status != 200:
                logger.error("snuba.query.invalid-json")
                raise SnubaError("Failed to parse snuba error response")
            raise UnexpectedResponseError(
                u"Could not decode JSON response: {}".format(response.data)
            )

        if response.status != 200:
            if body.get("error"):
                error = body["error"]
                if response.status == 429:
                    raise RateLimitExceeded(error["message"])
                elif error["type"] == "schema":
                    raise SchemaValidationError(error["message"])
                elif error["type"] == "clickhouse":
                    raise clickhouse_error_codes_map.get(error["code"], QueryExecutionError)(
                        error["message"]
                    )
                else:
                    raise SnubaError(error["message"])
            else:
                raise SnubaError(u"HTTP {}".format(response.status))

        # Forward and reverse translation maps from model ids to snuba keys, per column
        body["data"] = [reverse(d) for d in body["data"]]
        results.append(body)

    return results
Exemple #4
0
 def make_frequency_table_keys(self, model, rollup, timestamp, key, environment_id):
     prefix = self.make_key(model, rollup, timestamp, key, environment_id)
     return map(operator.methodcaller("format", prefix), ("{}:i", "{}:e"))
Exemple #5
0
    def get(self, request, organization):
        """
        List an Organization's Issues
        `````````````````````````````

        Return a list of issues (groups) bound to an organization.  All parameters are
        supplied as query string parameters.

        A default query of ``is:unresolved`` is applied. To return results
        with other statuses send an new query value (i.e. ``?query=`` for all
        results).

        The ``groupStatsPeriod`` parameter can be used to select the timeline
        stats which should be present. Possible values are: '' (disable),
        '24h', '14d'

        The ``statsPeriod`` parameter can be used to select a date window starting
        from now. Ex. ``14d``.

        The ``start`` and ``end`` parameters can be used to select an absolute
        date period to fetch issues from.

        :qparam string statsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string groupStatsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string start:       Beginning date. You must also provide ``end``.
        :qparam string end:         End date. You must also provide ``start``.
        :qparam bool shortIdLookup: if this is set to true then short IDs are
                                    looked up by this function as well.  This
                                    can cause the return value of the function
                                    to return an event issue of a different
                                    project which is why this is an opt-in.
                                    Set to `1` to enable.
        :qparam querystring query: an optional Sentry structured search
                                   query.  If not provided an implied
                                   ``"is:unresolved"`` is assumed.)
        :pparam string organization_slug: the slug of the organization the
                                          issues belong to.
        :auth: required
        """
        stats_period = request.GET.get("groupStatsPeriod")
        try:
            start, end = get_date_range_from_params(request.GET)
        except InvalidParams as e:
            raise ParseError(detail=six.text_type(e))

        has_dynamic_issue_counts = features.has(
            "organizations:dynamic-issue-counts",
            organization,
            actor=request.user)

        if stats_period not in (None, "", "24h", "14d", "auto"):
            return Response({"detail": ERR_INVALID_STATS_PERIOD}, status=400)
        elif stats_period is None:
            # default if no dynamic-issue-counts
            stats_period = "24h"
        elif stats_period == "":
            # disable stats
            stats_period = None

        if stats_period == "auto":
            stats_period_start = start
            stats_period_end = end
        else:
            stats_period_start = None
            stats_period_end = None

        environments = self.get_environments(request, organization)

        serializer = functools.partial(
            StreamGroupSerializerSnuba,
            environment_ids=[env.id for env in environments],
            stats_period=stats_period,
            stats_period_start=stats_period_start,
            stats_period_end=stats_period_end,
        )

        projects = self.get_projects(request, organization)
        project_ids = [p.id for p in projects]

        if not projects:
            return Response([])

        if len(projects) > 1 and not features.has("organizations:global-views",
                                                  organization,
                                                  actor=request.user):
            return Response(
                {
                    "detail":
                    "You do not have the multi project stream feature enabled"
                },
                status=400)

        # we ignore date range for both short id and event ids
        query = request.GET.get("query", "").strip()
        if query:
            # check to see if we've got an event ID
            event_id = normalize_event_id(query)
            if event_id:
                # For a direct hit lookup we want to use any passed project ids
                # (we've already checked permissions on these) plus any other
                # projects that the user is a member of. This gives us a better
                # chance of returning the correct result, even if the wrong
                # project is selected.
                direct_hit_projects = set(project_ids) | set(
                    [project.id for project in request.access.projects])
                groups = list(
                    Group.objects.filter_by_event_id(direct_hit_projects,
                                                     event_id))
                if len(groups) == 1:
                    response = Response(
                        serialize(groups, request.user,
                                  serializer(matching_event_id=event_id)))
                    response["X-Sentry-Direct-Hit"] = "1"
                    return response

                if groups:
                    return Response(
                        serialize(groups, request.user, serializer()))

            group = get_by_short_id(organization.id,
                                    request.GET.get("shortIdLookup"), query)
            if group is not None:
                # check all projects user has access to
                if request.access.has_project_access(group.project):
                    response = Response(
                        serialize([group], request.user, serializer()))
                    response["X-Sentry-Direct-Hit"] = "1"
                    return response

        # If group ids specified, just ignore any query components
        try:
            group_ids = set(map(int, request.GET.getlist("group")))
        except ValueError:
            return Response({"detail": "Group ids must be integers"},
                            status=400)

        if group_ids:
            groups = list(
                Group.objects.filter(id__in=group_ids,
                                     project_id__in=project_ids))
            if any(g for g in groups
                   if not request.access.has_project_access(g.project)):
                raise PermissionDenied
            return Response(serialize(groups, request.user, serializer()))

        try:
            cursor_result, query_kwargs = self._search(
                request,
                organization,
                projects,
                environments,
                {
                    "count_hits": True,
                    "date_to": end,
                    "date_from": start
                },
            )
        except (ValidationError, discover.InvalidSearchQuery) as exc:
            return Response({"detail": six.text_type(exc)}, status=400)

        results = list(cursor_result)

        if has_dynamic_issue_counts:
            context = serialize(
                results,
                request.user,
                serializer(
                    start=start,
                    end=end,
                    search_filters=query_kwargs["search_filters"]
                    if "search_filters" in query_kwargs else None,
                    has_dynamic_issue_counts=True,
                ),
            )
        else:
            context = serialize(results, request.user, serializer())

        # HACK: remove auto resolved entries
        # TODO: We should try to integrate this into the search backend, since
        # this can cause us to arbitrarily return fewer results than requested.
        status = [
            search_filter
            for search_filter in query_kwargs.get("search_filters", [])
            if search_filter.key.name == "status"
        ]
        if status and status[0].value.raw_value == GroupStatus.UNRESOLVED:
            context = [r for r in context if r["status"] == "unresolved"]

        response = Response(context)

        self.add_cursor_headers(request, response, cursor_result)

        # TODO(jess): add metrics that are similar to project endpoint here
        return response
Exemple #6
0
    def validate(self, data):
        organization = self.context["organization"]
        query_info = data["query_info"]

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid
        project_query = query_info.get("project")
        if project_query:
            get_projects_by_id = self.context["get_projects_by_id"]
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = get_projects_by_id(set(map(int, project_query)))
            else:
                projects = get_projects_by_id({int(project_query)})
            query_info["project"] = [project.id for project in projects]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            # coerce the fields into a list as needed
            base_fields = query_info.get("field", [])
            if not isinstance(base_fields, list):
                base_fields = [base_fields]

            equations, fields = categorize_columns(base_fields)

            if len(base_fields) > MAX_FIELDS:
                detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again."
                raise serializers.ValidationError(detail)
            elif len(base_fields) == 0:
                raise serializers.ValidationError(
                    "at least one field is required to export")

            if "query" not in query_info:
                detail = "query is a required to export, please pass an empty string if you don't want to set one"
                raise serializers.ValidationError(detail)

            query_info["field"] = fields
            query_info["equations"] = equations

            if not query_info.get("project"):
                projects = self.context["get_projects"]()
                query_info["project"] = [project.id for project in projects]

            # make sure to fix the export start/end times to ensure consistent results
            try:
                start, end = get_date_range_from_params(query_info)
            except InvalidParams as e:
                sentry_sdk.set_tag("query.error_reason", "Invalid date params")
                raise serializers.ValidationError(str(e))

            if "statsPeriod" in query_info:
                del query_info["statsPeriod"]
            if "statsPeriodStart" in query_info:
                del query_info["statsPeriodStart"]
            if "statsPeriodEnd" in query_info:
                del query_info["statsPeriodEnd"]
            query_info["start"] = start.isoformat()
            query_info["end"] = end.isoformat()
            query_info["use_snql"] = features.has(
                "organizations:discover-use-snql", organization)

            # validate the query string by trying to parse it
            processor = DiscoverProcessor(
                discover_query=query_info,
                organization_id=organization.id,
            )
            try:
                snuba_filter = get_filter(query_info["query"],
                                          processor.params)
                if len(equations) > 0:
                    resolved_equations, _, _ = resolve_equation_list(
                        equations, fields)
                else:
                    resolved_equations = []
                resolve_field_list(
                    fields.copy(),
                    snuba_filter,
                    auto_fields=True,
                    auto_aggregations=True,
                    resolved_equations=resolved_equations,
                )
            except InvalidSearchQuery as err:
                raise serializers.ValidationError(str(err))

        return data
Exemple #7
0
    def get(self, request, organization):
        """
        Get the stats on an Organization's Issues
        `````````````````````````````
        Return a list of issues (groups) with the requested stats.  All parameters are
        supplied as query string parameters.

        :qparam list groups: A list of group ids
        :qparam list expand: an optional list of strings to opt in to additional data. Supports `inbox`
        :qparam list collapse: an optional list of strings to opt out of certain pieces of data. Supports `stats`, `lifetime`, `filtered`, and `base`

        The ``groupStatsPeriod`` parameter can be used to select the timeline
        stats which should be present. Possible values are: '' (disable),
        '24h', '14d'

        The ``statsPeriod`` parameter can be used to select a date window starting
        from now. Ex. ``14d``.

        The ``start`` and ``end`` parameters can be used to select an absolute
        date period to fetch issues from.

        :qparam string statsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string groupStatsPeriod: an optional stat period (can be one of
                                    ``"24h"``, ``"14d"``, and ``""``).
        :qparam string start:       Beginning date. You must also provide ``end``.
        :qparam string end:         End date. You must also provide ``start``.
        """

        stats_period = request.GET.get("groupStatsPeriod")
        try:
            start, end = get_date_range_from_params(request.GET)
        except InvalidParams as e:
            raise ParseError(detail=six.text_type(e))

        expand = request.GET.getlist("expand", [])
        collapse = request.GET.getlist("collapse", ["base"])
        has_inbox = features.has("organizations:inbox",
                                 organization,
                                 actor=request.user)
        projects = self.get_projects(request, organization)
        project_ids = [p.id for p in projects]

        try:
            group_ids = set(map(int, request.GET.getlist("groups")))
        except ValueError:
            raise ParseError(detail="Group ids must be integers")

        if not group_ids:
            raise ParseError(
                detail=
                "You should include `groups` with your request. (i.e. groups=1,2,3)"
            )

        else:
            groups = list(
                Group.objects.filter(id__in=group_ids,
                                     project_id__in=project_ids))
            if not groups:
                raise ParseError(detail="No matching groups found")
            elif len(groups) > 25:
                raise ParseError(detail="Too many groups requested.")
            elif any(g for g in groups
                     if not request.access.has_project_access(g.project)):
                raise PermissionDenied

        if stats_period not in (None, "", "24h", "14d", "auto"):
            raise ParseError(detail=ERR_INVALID_STATS_PERIOD)
        stats_period, stats_period_start, stats_period_end = calculate_stats_period(
            stats_period, start, end)

        environments = self.get_environments(request, organization)
        query_kwargs = build_query_params_from_request(request, organization,
                                                       projects, environments)
        context = serialize(
            groups,
            request.user,
            StreamGroupSerializerSnuba(
                environment_ids=[env.id for env in environments],
                stats_period=stats_period,
                stats_period_start=stats_period_start,
                stats_period_end=stats_period_end,
                collapse=collapse,
                expand=expand,
                has_inbox=has_inbox,
                start=start,
                end=end,
                search_filters=query_kwargs["search_filters"]
                if "search_filters" in query_kwargs else None,
            ),
        )

        response = Response(context)
        return response
Exemple #8
0
 def __str__(self):
     return ".".join(map(force_text, self))
Exemple #9
0
    def post(self, request, organization):
        """
        Create a new asynchronous file export task, and
        email user upon completion,
        """
        # Ensure new data-export features are enabled
        if not features.has("organizations:data-export", organization):
            return Response(status=404)

        # Get environment_id and limit if available
        try:
            environment_id = self._get_environment_id_from_request(
                request, organization.id)
        except Environment.DoesNotExist as error:
            return Response(error, status=400)
        limit = request.data.get("limit")

        # Validate the data export payload
        serializer = DataExportQuerySerializer(data=request.data)
        if not serializer.is_valid():
            return Response(serializer.errors, status=400)
        data = serializer.validated_data

        # Validate the project field, if provided
        # A PermissionDenied error will be raised in `_get_projects_by_id` if the request is invalid
        project_query = data["query_info"].get("project")
        if project_query:
            # Coerce the query into a set
            if isinstance(project_query, list):
                projects = self._get_projects_by_id(
                    set(map(int, project_query)), request, organization)
            else:
                projects = self._get_projects_by_id({int(project_query)},
                                                    request, organization)
            data["query_info"]["project"] = [
                project.id for project in projects
            ]

        # Discover Pre-processing
        if data["query_type"] == ExportQueryType.DISCOVER_STR:
            if not features.has("organizations:discover-basic",
                                organization,
                                actor=request.user):
                return Response(status=403)
            if "project" not in data["query_info"]:
                projects = self.get_projects(request, organization)
                data["query_info"]["project"] = [
                    project.id for project in projects
                ]

        try:
            # If this user has sent a sent a request with the same payload and organization,
            # we return them the latest one that is NOT complete (i.e. don't start another)
            query_type = ExportQueryType.from_str(data["query_type"])
            data_export, created = ExportedData.objects.get_or_create(
                organization=organization,
                user=request.user,
                query_type=query_type,
                query_info=data["query_info"],
                date_finished=None,
            )
            status = 200
            if created:
                metrics.incr("dataexport.enqueue",
                             tags={"query_type": data["query_type"]},
                             sample_rate=1.0)
                assemble_download.delay(data_export_id=data_export.id,
                                        export_limit=limit,
                                        environment_id=environment_id)
                status = 201
        except ValidationError as e:
            # This will handle invalid JSON requests
            metrics.incr("dataexport.invalid",
                         tags={"query_type": data.get("query_type")},
                         sample_rate=1.0)
            return Response({"detail": six.text_type(e)}, status=400)
        return Response(serialize(data_export, request.user), status=status)
Exemple #10
0
def _start_service(client, name, containers, project, fast=False, always_start=False):
    from django.conf import settings
    import docker

    options = containers[name]

    # HACK(mattrobenolt): special handle snuba backend because it needs to
    # handle different values based on the eventstream backend
    # For snuba, we can't run the full suite of devserver, but can only
    # run the api.
    if name == "snuba" and "snuba" in settings.SENTRY_EVENTSTREAM:
        options["environment"].pop("DEFAULT_BROKERS", None)
        options["command"] = ["devserver", "--no-workers"]

    for key, value in options["environment"].items():
        options["environment"][key] = value.format(containers=containers)

    pull = options.pop("pull", False)
    if not fast:
        if pull:
            click.secho("> Pulling image '%s'" % options["image"], err=True, fg="green")
            client.images.pull(options["image"])
        else:
            # We want make sure to pull everything on the first time,
            # (the image doesn't exist), regardless of pull=True.
            try:
                client.images.get(options["image"])
            except docker.errors.NotFound:
                click.secho("> Pulling image '%s'" % options["image"], err=True, fg="green")
                client.images.pull(options["image"])

    for mount in options.get("volumes", {}).keys():
        if "/" not in mount:
            get_or_create(client, "volume", project + "_" + mount)
            options["volumes"][project + "_" + mount] = options["volumes"].pop(mount)

    listening = ""
    if options["ports"]:
        listening = "(listening: %s)" % ", ".join(map(text_type, options["ports"].values()))

    # If a service is associated with the devserver, then do not run the created container.
    # This was mainly added since it was not desirable for reverse_proxy to occupy port 8000 on the
    # first "devservices up".
    # See https://github.com/getsentry/sentry/pull/18362#issuecomment-616785458
    with_devserver = options.pop("with_devserver", False)

    # Two things call _start_service.
    # devservices up, and devservices attach.
    # Containers that should be started on-demand with devserver
    # should ONLY be started via the latter, which sets `always_start`.
    if with_devserver and not always_start:
        click.secho(
            "> Not starting container '%s' because it should be started on-demand with devserver."
            % options["name"],
            fg="yellow",
        )
        # XXX: if always_start=False, do not expect to have a container returned 100% of the time.
        return None

    container = None
    try:
        container = client.containers.get(options["name"])
    except docker.errors.NotFound:
        pass

    if container is not None:
        # devservices which are marked with pull True will need their containers
        # to be recreated with the freshly pulled image.
        should_reuse_container = not pull

        # Except if the container is started as part of devserver we should reuse it.
        # Or, if we're in fast mode (devservices up --fast)
        if with_devserver or fast:
            should_reuse_container = True

        if should_reuse_container:
            click.secho(
                "> Starting EXISTING container '%s' %s" % (container.name, listening),
                err=True,
                fg="yellow",
            )
            # Note that if the container is already running, this will noop.
            # This makes repeated `devservices up` quite fast.
            container.start()
            return container

        click.secho("> Stopping container '%s'" % container.name, err=True, fg="yellow")
        container.stop()
        click.secho("> Removing container '%s'" % container.name, err=True, fg="yellow")
        container.remove()

    click.secho("> Creating container '%s'" % options["name"], err=True, fg="yellow")
    container = client.containers.create(**options)
    click.secho("> Starting container '%s' %s" % (container.name, listening), err=True, fg="yellow")
    container.start()
    return container
Exemple #11
0
def get_function_layer_arns(function):
    layers = function.get("Layers", [])
    return map(_get_arn_from_layer, layers)
Exemple #12
0
 def encode_row(row):
     return map(force_bytes, row)
Exemple #13
0
from __future__ import absolute_import

import six
from sentry.utils.compat import map

version = (0, 7, 28)

__version__ = ".".join(map(six.text_type, version))
Exemple #14
0
def resolve_field_list(fields, snuba_args, params=None, auto_fields=True):
    """
    Expand a list of fields based on aliases and aggregate functions.

    Returns a dist of aggregations, selected_columns, and
    groupby that can be merged into the result of get_snuba_query_args()
    to build a more complete snuba query based on event search conventions.
    """
    # If project.name is requested, get the project.id from Snuba so we
    # can use this to look up the name in Sentry
    if "project.name" in fields:
        fields.remove("project.name")
        if "project.id" not in fields:
            fields.append("project.id")

    aggregations = []
    columns = []
    groupby = []
    for field in fields:
        column_additions, agg_additions = resolve_field(field, params)
        if column_additions:
            columns.extend(column_additions)

        if agg_additions:
            aggregations.extend(agg_additions)

    rollup = snuba_args.get("rollup")
    if not rollup and auto_fields:
        # Ensure fields we require to build a functioning interface
        # are present. We don't add fields when using a rollup as the additional fields
        # would be aggregated away. When there are aggregations
        # we use argMax to get the latest event/projectid so we can create links.
        # The `projectid` output name is not a typo, using `project_id` triggers
        # generates invalid queries.
        if not aggregations and "id" not in columns:
            columns.append("id")
        if not aggregations and "project.id" not in columns:
            columns.append("project.id")
        if aggregations and "latest_event" not in map(lambda a: a[-1], aggregations):
            aggregations.extend(deepcopy(FIELD_ALIASES["latest_event"]["aggregations"]))
        if aggregations and "project.id" not in columns:
            aggregations.append(["argMax", ["project.id", "timestamp"], "projectid"])

    if rollup and columns and not aggregations:
        raise InvalidSearchQuery("You cannot use rollup without an aggregate field.")

    orderby = snuba_args.get("orderby")
    if orderby:
        orderby = resolve_orderby(orderby, columns, aggregations)

    # If aggregations are present all columns
    # need to be added to the group by so that the query is valid.
    if aggregations:
        groupby.extend(columns)

    return {
        "selected_columns": columns,
        "aggregations": aggregations,
        "groupby": groupby,
        "orderby": orderby,
    }
Exemple #15
0
 def __str__(self):
     return "".join(map(six.text_type, (self.key.name, self.operator, self.value.raw_value)))
 def providers(self):
     # TODO: use feature flag in the future
     providers = filter(lambda x: x.has_stacktrace_linking, list(integrations.all()))
     return map(lambda x: x.key, providers)
Exemple #17
0
    def _get_subscriptions(self, item_list, user):
        """
        Returns a mapping of group IDs to a two-tuple of (subscribed: bool,
        subscription: GroupSubscription or None) for the provided user and
        groups.
        """
        if not item_list:
            return {}

        # Collect all of the projects to look up, and keep a set of groups that
        # are part of that project. (Note that the common -- but not only --
        # case here is that all groups are part of the same project.)
        projects = defaultdict(set)
        for group in item_list:
            projects[group.project].add(group)

        # Fetch the options for each project -- we'll need this to identify if
        # a user has totally disabled workflow notifications for a project.
        # NOTE: This doesn't use `values_list` because that bypasses field
        # value decoding, so the `value` field would not be unpickled.
        options = {
            option.project_id: option.value
            for option in UserOption.objects.filter(
                Q(project__in=projects.keys()) | Q(project__isnull=True),
                user=user,
                key="workflow:notifications",
            )
        }

        # If there is a subscription record associated with the group, we can
        # just use that to know if a user is subscribed or not, as long as
        # notifications aren't disabled for the project.
        subscriptions = {
            subscription.group_id: subscription
            for subscription in GroupSubscription.objects.filter(
                group__in=list(
                    itertools.chain.from_iterable(
                        map(
                            lambda project__groups: project__groups[1]
                            if not options.get(project__groups[0].id,
                                               options.get(None)) ==
                            UserOptionValue.no_conversations else [],
                            projects.items(),
                        ))),
                user=user,
            )
        }

        # This is the user's default value for any projects that don't have
        # the option value specifically recorded. (The default
        # "participating_only" value is convention.)
        global_default_workflow_option = options.get(
            None, UserOptionValue.participating_only)

        results = {}
        for project, groups in projects.items():
            project_default_workflow_option = options.get(
                project.id, global_default_workflow_option)
            for group in groups:
                subscription = subscriptions.get(group.id)
                if subscription is not None:
                    results[group.id] = (subscription.is_active, subscription)
                else:
                    results[group.id] = (
                        (project_default_workflow_option
                         == UserOptionValue.all_conversations,
                         None) if project_default_workflow_option !=
                        UserOptionValue.no_conversations else disabled)

        return results
Exemple #18
0
def text_shingle(n, value):
    return map(u"".join, shingle(n, value))
Exemple #19
0
def psycopg2_version():
    import psycopg2

    version = psycopg2.__version__.split()[0].split(".")
    return tuple(map(int, version))
Exemple #20
0
# TODO(mattrobenolt): Autodiscover commands?
list(
    map(
        lambda cmd: cli.add_command(import_string(cmd)),
        (
            "sentry.runner.commands.backup.export",
            "sentry.runner.commands.backup.import_",
            "sentry.runner.commands.cleanup.cleanup",
            "sentry.runner.commands.config.config",
            "sentry.runner.commands.createuser.createuser",
            "sentry.runner.commands.devserver.devserver",
            "sentry.runner.commands.django.django",
            "sentry.runner.commands.exec.exec_",
            "sentry.runner.commands.files.files",
            "sentry.runner.commands.help.help",
            "sentry.runner.commands.init.init",
            "sentry.runner.commands.plugins.plugins",
            "sentry.runner.commands.queues.queues",
            "sentry.runner.commands.repair.repair",
            "sentry.runner.commands.run.run",
            "sentry.runner.commands.start.start",
            "sentry.runner.commands.tsdb.tsdb",
            "sentry.runner.commands.upgrade.upgrade",
            "sentry.runner.commands.permissions.permissions",
            "sentry.runner.commands.devservices.devservices",
        ),
    ))


def make_django_command(name, django_command=None, help=None):
Exemple #21
0
    def get(self, request, organization):
        """
        Retrieve Event Counts for an Organization
        `````````````````````````````````````````

        .. caution::
           This endpoint may change in the future without notice.

        Return a set of points representing a normalized timestamp and the
        number of events seen in the period.

        :pparam string organization_slug: the slug of the organization for
                                          which the stats should be
                                          retrieved.
        :qparam string stat: the name of the stat to query (``"received"``,
                             ``"rejected"``, ``"blacklisted"``)
        :qparam timestamp since: a timestamp to set the start of the query
                                 in seconds since UNIX epoch.
        :qparam timestamp until: a timestamp to set the end of the query
                                 in seconds since UNIX epoch.
        :qparam string resolution: an explicit resolution to search
                                   for (one of ``10s``, ``1h``, and ``1d``)
        :auth: required
        """
        group = request.GET.get("group", "organization")
        if group == "organization":
            keys = [organization.id]
        elif group == "project":
            team_list = Team.objects.get_for_user(organization=organization,
                                                  user=request.user)

            project_ids = request.GET.getlist("projectID")
            if not project_ids:
                project_list = []
                for team in team_list:
                    project_list.extend(
                        Project.objects.get_for_user(team=team,
                                                     user=request.user))
            else:
                project_list = Project.objects.filter(teams__in=team_list,
                                                      id__in=project_ids)
            keys = list({p.id for p in project_list})
        else:
            raise ValueError("Invalid group: %s" % group)

        if "id" in request.GET:
            id_filter_set = frozenset(map(int, request.GET.getlist("id")))
            keys = [k for k in keys if k in id_filter_set]

        if not keys:
            return Response([])

        stat_model = None
        stat = request.GET.get("stat", "received")
        query_kwargs = {}
        if stat == "received":
            if group == "project":
                stat_model = tsdb.models.project_total_received
            else:
                stat_model = tsdb.models.organization_total_received
        elif stat == "rejected":
            if group == "project":
                stat_model = tsdb.models.project_total_rejected
            else:
                stat_model = tsdb.models.organization_total_rejected
        elif stat == "blacklisted":
            if group == "project":
                stat_model = tsdb.models.project_total_blacklisted
            else:
                stat_model = tsdb.models.organization_total_blacklisted
        elif stat == "generated":
            if group == "project":
                stat_model = tsdb.models.project
                try:
                    query_kwargs[
                        "environment_id"] = self._get_environment_id_from_request(
                            request, organization.id)
                except Environment.DoesNotExist:
                    raise ResourceDoesNotExist

        if stat_model is None:
            raise ValueError(f"Invalid group: {group}, stat: {stat}")
        data = tsdb.get_range(model=stat_model,
                              keys=keys,
                              **self._parse_args(request, **query_kwargs))

        if group == "organization":
            data = data[organization.id]

        return Response(data)
Exemple #22
0
def _find_hashes(project, hash_list):
    return map(
        lambda hash: GroupHash.objects.get_or_create(project=project,
                                                     hash=hash)[0], hash_list)
Exemple #23
0
    def merge_frequencies(self, model, destination, sources, timestamp=None, environment_ids=None):
        environment_ids = list(
            (set(environment_ids) if environment_ids is not None else set()).union([None])
        )

        self.validate_arguments([model], environment_ids)

        if not self.enable_frequency_sketches:
            return

        rollups = []
        for rollup, samples in self.rollups.items():
            _, series = self.get_optimal_rollup_series(
                to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)),
                end=None,
                rollup=rollup,
            )
            rollups.append((rollup, map(to_datetime, series)))

        for (cluster, durable), environment_ids in self.get_cluster_groups(environment_ids):
            exports = defaultdict(list)

            for source in sources:
                for rollup, series in rollups:
                    for timestamp in series:
                        keys = []
                        for environment_id in environment_ids:
                            keys.extend(
                                self.make_frequency_table_keys(
                                    model, rollup, to_timestamp(timestamp), source, environment_id
                                )
                            )
                        arguments = ["EXPORT"] + list(self.DEFAULT_SKETCH_PARAMETERS)
                        exports[source].extend([(CountMinScript, keys, arguments), ["DEL"] + keys])

            try:
                responses = cluster.execute_commands(exports)
            except Exception:
                if durable:
                    raise
                else:
                    continue

            imports = []

            for source, results in responses.items():
                results = iter(results)
                for rollup, series in rollups:
                    for timestamp in series:
                        for environment_id, payload in zip(environment_ids, next(results).value):
                            imports.append(
                                (
                                    CountMinScript,
                                    self.make_frequency_table_keys(
                                        model,
                                        rollup,
                                        to_timestamp(timestamp),
                                        destination,
                                        environment_id,
                                    ),
                                    ["IMPORT"] + list(self.DEFAULT_SKETCH_PARAMETERS) + [payload],
                                )
                            )
                        next(results)  # pop off the result of DEL

            try:
                cluster.execute_commands({destination: imports})
            except Exception:
                if durable:
                    raise
Exemple #24
0
def browser(request, percy, live_server):
    window_size = request.config.getoption("window_size")
    window_width, window_height = map(int, window_size.split("x", 1))

    driver_type = request.config.getoption("selenium_driver")
    headless = not request.config.getoption("no_headless")
    if driver_type == "chrome":
        options = webdriver.ChromeOptions()
        options.add_argument("no-sandbox")
        options.add_argument("disable-gpu")
        options.add_argument(u"window-size={}".format(window_size))
        if headless:
            options.add_argument("headless")
        chrome_path = request.config.getoption("chrome_path")
        if chrome_path:
            options.binary_location = chrome_path
        chromedriver_path = request.config.getoption("chromedriver_path")
        chrome_args = {"options": options}
        if chromedriver_path:
            chrome_args["executable_path"] = chromedriver_path

        driver = start_chrome(**chrome_args)
    elif driver_type == "firefox":
        driver = webdriver.Firefox()
    elif driver_type == "phantomjs":
        phantomjs_path = request.config.getoption("phantomjs_path")
        if not phantomjs_path:
            phantomjs_path = os.path.join("node_modules", "phantomjs-prebuilt",
                                          "bin", "phantomjs")
        driver = webdriver.PhantomJS(executable_path=phantomjs_path)
    else:
        raise pytest.UsageError("--driver must be specified")

    driver.set_window_size(window_width, window_height)

    def fin():
        # dump console log to stdout, will be shown when test fails
        for entry in driver.get_log("browser"):
            sys.stderr.write("[browser console] ")
            sys.stderr.write(repr(entry))
            sys.stderr.write("\n")
        # Teardown Selenium.
        try:
            driver.quit()
        except Exception:
            pass

    request.node._driver = driver
    request.addfinalizer(fin)

    browser = Browser(driver, live_server, percy)

    browser.set_emulated_media([{
        "name": "prefers-reduced-motion",
        "value": "reduce"
    }])

    if hasattr(request, "cls"):
        request.cls.browser = browser
    request.node.browser = browser

    # bind webdriver to percy for snapshots
    percy.loader.webdriver = driver

    return driver
Exemple #25
0
 def fetch(self, timestamp, duration, organization, projects):
     assert all(project.organization_id == organization.id for project in projects)
     return map(functools.partial(self.build, timestamp, duration), projects)
Exemple #26
0
    def get_data(
        self,
        model,
        keys,
        start,
        end,
        rollup=None,
        environment_ids=None,
        aggregation="count()",
        group_on_model=True,
        group_on_time=False,
        conditions=None,
    ):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        # XXX: to counteract the hack in project_key_stats.py
        if model in [
                TSDBModel.key_total_received,
                TSDBModel.key_total_blacklisted,
                TSDBModel.key_total_rejected,
        ]:
            keys = list(set(map(lambda x: int(x), keys)))

        # 10s is the only rollup under an hour that we support
        if rollup and rollup == 10 and model in self.lower_rollup_query_settings:
            model_query_settings = self.lower_rollup_query_settings.get(model)
        else:
            model_query_settings = self.model_query_settings.get(model)

        if model_query_settings is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group = model_query_settings.groupby
        model_aggregate = model_query_settings.aggregate

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append("time")
        if aggregation == "count()" and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        columns = (model_query_settings.groupby,
                   model_query_settings.aggregate)
        keys_map = dict(zip(columns, self.flatten_keys(keys)))
        keys_map = {
            k: v
            for k, v in six.iteritems(keys_map)
            if k is not None and v is not None
        }
        if environment_ids is not None:
            keys_map["environment"] = environment_ids

        aggregations = [[aggregation, model_aggregate, "aggregate"]]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)
        limit = min(10000,
                    int(len(keys) * ((end - start).total_seconds() / rollup)))

        conditions = conditions if conditions is not None else []
        if model_query_settings.conditions is not None:
            conditions += deepcopy(model_query_settings.conditions)
            # copy because we modify the conditions in snuba.query

        if keys:
            result = snuba.query(
                dataset=model_query_settings.dataset,
                start=start,
                end=end,
                groupby=groupby,
                conditions=conditions,
                filter_keys=keys_map,
                aggregations=aggregations,
                rollup=rollup,
                limit=limit,
                referrer="tsdb-modelid:{}".format(model.value),
                is_grouprelease=(
                    model == TSDBModel.frequent_releases_by_group),
            )
        else:
            result = {}

        if group_on_time:
            keys_map["time"] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Exemple #27
0
 def to_python(self, value):
     if not value:
         value = []
     if isinstance(value, str):
         value = json.loads(value)
     return map(self.of.to_python, value)
Exemple #28
0
def resolve_field_list(fields, snuba_args, params=None, auto_fields=True):
    """
    Expand a list of fields based on aliases and aggregate functions.

    Returns a dist of aggregations, selected_columns, and
    groupby that can be merged into the result of get_snuba_query_args()
    to build a more complete snuba query based on event search conventions.
    """
    aggregations = []
    columns = []
    groupby = []
    project_key = ""
    # Which column to map to project names
    project_column = "project_id"

    # If project is requested, we need to map ids to their names since snuba only has ids
    if "project" in fields:
        fields.remove("project")
        project_key = "project"
    # since project.name is more specific, if both are included use project.name instead of project
    if PROJECT_NAME_ALIAS in fields:
        fields.remove(PROJECT_NAME_ALIAS)
        project_key = PROJECT_NAME_ALIAS
    if project_key:
        if "project.id" not in fields:
            fields.append("project.id")

    for field in fields:
        column_additions, agg_additions = resolve_field(field, params)
        if column_additions:
            columns.extend(column_additions)

        if agg_additions:
            aggregations.extend(agg_additions)

    rollup = snuba_args.get("rollup")
    if not rollup and auto_fields:
        # Ensure fields we require to build a functioning interface
        # are present. We don't add fields when using a rollup as the additional fields
        # would be aggregated away. When there are aggregations
        # we use argMax to get the latest event/projectid so we can create links.
        # The `projectid` output name is not a typo, using `project_id` triggers
        # generates invalid queries.
        if not aggregations and "id" not in columns:
            columns.append("id")
        if not aggregations and "project.id" not in columns:
            columns.append("project.id")
            project_column = "project_id"
        if aggregations and "latest_event" not in map(lambda a: a[-1], aggregations):
            _, aggregates = resolve_function("latest_event()")
            aggregations.extend(aggregates)
        if aggregations and "project.id" not in columns:
            aggregations.append(["argMax", ["project.id", "timestamp"], "projectid"])
            project_column = "projectid"
        if project_key == "":
            project_key = PROJECT_NAME_ALIAS

    if project_key:
        project_ids = snuba_args.get("filter_keys", {}).get("project_id", [])
        projects = Project.objects.filter(id__in=project_ids).values("slug", "id")
        aggregations.append(
            [
                u"transform({}, array({}), array({}), '')".format(
                    project_column,
                    # Need to use join like this so we don't get a list including Ls which confuses clickhouse
                    ",".join([six.text_type(project["id"]) for project in projects]),
                    # Can't just format a list since we'll get u"string" instead of a plain 'string'
                    ",".join([u"'{}'".format(project["slug"]) for project in projects]),
                ),
                None,
                project_key,
            ]
        )

    if rollup and columns and not aggregations:
        raise InvalidSearchQuery("You cannot use rollup without an aggregate field.")

    orderby = snuba_args.get("orderby")
    if orderby:
        orderby = resolve_orderby(orderby, columns, aggregations)

    # If aggregations are present all columns
    # need to be added to the group by so that the query is valid.
    if aggregations:
        groupby.extend(columns)

    return {
        "selected_columns": columns,
        "aggregations": aggregations,
        "groupby": groupby,
        "orderby": orderby,
    }
Exemple #29
0
def bulk_raw_query(snuba_param_list, referrer=None):
    headers = {}
    if referrer:
        headers["referer"] = referrer

    query_param_list = map(_prepare_query_params, snuba_param_list)

    def snuba_query(params):
        query_params, forward, reverse = params
        try:
            with timer("snuba_query"):
                body = json.dumps(query_params)
                with sentry_sdk.start_span(
                    op="snuba", description=u"query {}".format(body)
                ) as span:
                    span.set_tag("referrer", headers.get("referer", "<unknown>"))
                    return (
                        _snuba_pool.urlopen("POST", "/query", body=body, headers=headers),
                        forward,
                        reverse,
                    )
        except urllib3.exceptions.HTTPError as err:
            raise SnubaError(err)

    if len(snuba_param_list) > 1:
        query_results = _query_thread_pool.map(snuba_query, query_param_list)
    else:
        # No need to submit to the thread pool if we're just performing a
        # single query
        query_results = [snuba_query(query_param_list[0])]

    results = []
    for response, _, reverse in query_results:
        try:
            body = json.loads(response.data)
        except ValueError:
            raise UnexpectedResponseError(
                u"Could not decode JSON response: {}".format(response.data)
            )

        if response.status != 200:
            if body.get("error"):
                error = body["error"]
                if response.status == 429:
                    raise RateLimitExceeded(error["message"])
                elif error["type"] == "schema":
                    raise SchemaValidationError(error["message"])
                elif error["type"] == "clickhouse":
                    raise clickhouse_error_codes_map.get(error["code"], QueryExecutionError)(
                        error["message"]
                    )
                else:
                    raise SnubaError(error["message"])
            else:
                raise SnubaError(u"HTTP {}".format(response.status))

        # Forward and reverse translation maps from model ids to snuba keys, per column
        body["data"] = [reverse(d) for d in body["data"]]
        results.append(body)

    return results
Exemple #30
0
    def test_unmerge(self):
        now = before_now(minutes=5).replace(microsecond=0, tzinfo=pytz.utc)

        def time_from_now(offset=0):
            return now + timedelta(seconds=offset)

        project = self.create_project()

        sequence = itertools.count(0)
        tag_values = itertools.cycle(["red", "green", "blue"])
        user_values = itertools.cycle([{"id": 1}, {"id": 2}])

        def create_message_event(template,
                                 parameters,
                                 environment,
                                 release,
                                 fingerprint="group1"):
            i = next(sequence)

            event_id = uuid.UUID(fields=(i, 0x0, 0x1000, 0x80, 0x80,
                                         0x808080808080)).hex

            tags = [["color", next(tag_values)]]

            if release:
                tags.append(["sentry:release", release])

            event = self.store_event(
                data={
                    "event_id": event_id,
                    "message": template % parameters,
                    "type": "default",
                    "user": next(user_values),
                    "tags": tags,
                    "fingerprint": [fingerprint],
                    "timestamp": iso_format(now + timedelta(seconds=i)),
                    "environment": environment,
                    "release": release,
                },
                project_id=project.id,
            )

            UserReport.objects.create(
                project_id=project.id,
                group_id=event.group.id,
                event_id=event_id,
                name="Log Hat",
                email="*****@*****.**",
                comments="Quack",
            )

            features.record([event])

            return event

        events = OrderedDict()

        for event in (create_message_event("This is message #%s.",
                                           i,
                                           environment="production",
                                           release="version")
                      for i in xrange(10)):
            events.setdefault(get_fingerprint(event), []).append(event)

        for event in (create_message_event(
                "This is message #%s!",
                i,
                environment="production",
                release="version2",
                fingerprint="group2",
        ) for i in xrange(10, 16)):
            events.setdefault(get_fingerprint(event), []).append(event)

        event = create_message_event(
            "This is message #%s!",
            17,
            environment="staging",
            release="version3",
            fingerprint="group3",
        )

        events.setdefault(get_fingerprint(event), []).append(event)

        merge_source, source, destination = list(Group.objects.all())

        assert len(events) == 3
        assert sum(map(len, events.values())) == 17

        production_environment = Environment.objects.get(
            organization_id=project.organization_id, name="production")

        with self.tasks():
            eventstream_state = eventstream.start_merge(
                project.id, [merge_source.id], source.id)
            merge_groups.delay([merge_source.id], source.id)
            eventstream.end_merge(eventstream_state)

        assert set([
            (gtv.value, gtv.times_seen)
            for gtv in tagstore.get_group_tag_values(
                project.id, source.id, production_environment.id, "color")
        ]) == set([("red", 6), ("green", 5), ("blue", 5)])

        similar_items = features.compare(source)
        assert len(similar_items) == 2
        assert similar_items[0][0] == source.id
        assert similar_items[0][1]["message:message:character-shingles"] == 1.0
        assert similar_items[1][0] == destination.id
        assert similar_items[1][1]["message:message:character-shingles"] < 1.0

        with self.tasks():
            eventstream_state = eventstream.start_unmerge(
                project.id, [list(events.keys())[0]], source.id,
                destination.id)
            unmerge.delay(project.id,
                          source.id,
                          destination.id, [events.keys()[0]],
                          None,
                          batch_size=5)
            eventstream.end_unmerge(eventstream_state)

        assert (list(
            Group.objects.filter(id=merge_source.id).values_list(
                "times_seen", "first_seen", "last_seen")) == [])

        assert list(
            Group.objects.filter(id=source.id).values_list(
                "times_seen", "first_seen",
                "last_seen")) == [(6, time_from_now(10), time_from_now(15))]

        assert list(
            Group.objects.filter(id=destination.id).values_list(
                "times_seen", "first_seen",
                "last_seen")) == [(11, time_from_now(0), time_from_now(16))]

        assert source.id != destination.id
        assert source.project == destination.project

        destination_event_ids = map(lambda event: event.event_id,
                                    events.values()[1])

        assert set(
            UserReport.objects.filter(group_id=source.id).values_list(
                "event_id", flat=True)) == set(destination_event_ids)

        assert set(
            GroupHash.objects.filter(group_id=source.id).values_list(
                "hash", flat=True)) == set(itertools.islice(events.keys(), 2))

        assert set(
            GroupRelease.objects.filter(group_id=source.id).values_list(
                "environment", "first_seen",
                "last_seen")) == set([(u"production", time_from_now(10),
                                       time_from_now(15))])

        assert set([
            (gtv.value, gtv.times_seen)
            for gtv in tagstore.get_group_tag_values(
                project.id, destination.id, production_environment.id, "color")
        ]) == set([(u"red", 4), (u"green", 3), (u"blue", 3)])

        destination_event_ids = map(lambda event: event.event_id,
                                    events.values()[0] + events.values()[2])

        assert set(
            UserReport.objects.filter(group_id=destination.id).values_list(
                "event_id", flat=True)) == set(destination_event_ids)

        assert set(
            GroupHash.objects.filter(group_id=destination.id).values_list(
                "hash",
                flat=True)) == set(itertools.islice(events.keys(), 2, 3))

        assert set(
            GroupRelease.objects.filter(group_id=destination.id).values_list(
                "environment", "first_seen", "last_seen")) == set([
                    ("production", time_from_now(0), time_from_now(9)),
                    ("staging", time_from_now(16), time_from_now(16)),
                ])

        assert set([
            (gtk.value, gtk.times_seen)
            for gtk in tagstore.get_group_tag_values(
                project.id, destination.id, production_environment.id, "color")
        ]) == set([("red", 4), ("blue", 3), ("green", 3)])

        rollup_duration = 3600

        time_series = tsdb.get_range(
            tsdb.models.group,
            [source.id, destination.id],
            now - timedelta(seconds=rollup_duration),
            time_from_now(17),
            rollup_duration,
        )

        environment_time_series = tsdb.get_range(
            tsdb.models.group,
            [source.id, destination.id],
            now - timedelta(seconds=rollup_duration),
            time_from_now(17),
            rollup_duration,
            environment_ids=[production_environment.id],
        )

        def get_expected_series_values(rollup, events, function=None):
            if function is None:

                def function(aggregate, event):
                    return (aggregate if aggregate is not None else 0) + 1

            expected = {}
            for event in events:
                k = float((to_timestamp(event.datetime) // rollup_duration) *
                          rollup_duration)
                expected[k] = function(expected.get(k), event)

            return expected

        def assert_series_contains(expected, actual, default=0):
            actual = dict(actual)

            for key, value in expected.items():
                assert actual.get(key, 0) == value

            for key in set(actual.keys()) - set(expected.keys()):
                assert actual.get(key, 0) == default

        assert_series_contains(
            get_expected_series_values(rollup_duration,
                                       events.values()[1]),
            time_series[source.id],
            0,
        )

        assert_series_contains(
            get_expected_series_values(rollup_duration,
                                       events.values()[0] +
                                       events.values()[2]),
            time_series[destination.id],
            0,
        )

        assert_series_contains(
            get_expected_series_values(rollup_duration,
                                       events.values()[1]),
            environment_time_series[source.id],
            0,
        )

        assert_series_contains(
            get_expected_series_values(
                rollup_duration,
                events.values()[0][:-1] + events.values()[2]),
            environment_time_series[destination.id],
            0,
        )

        time_series = tsdb.get_distinct_counts_series(
            tsdb.models.users_affected_by_group,
            [source.id, destination.id],
            now - timedelta(seconds=rollup_duration),
            time_from_now(17),
            rollup_duration,
        )

        environment_time_series = tsdb.get_distinct_counts_series(
            tsdb.models.users_affected_by_group,
            [source.id, destination.id],
            now - timedelta(seconds=rollup_duration),
            time_from_now(17),
            rollup_duration,
            environment_id=production_environment.id,
        )

        def collect_by_user_tag(aggregate, event):
            aggregate = aggregate if aggregate is not None else set()
            aggregate.add(
                get_event_user_from_interface(event.data["user"]).tag_value)
            return aggregate

        for series in [time_series, environment_time_series]:
            assert_series_contains(
                {
                    timestamp: len(values)
                    for timestamp, values in get_expected_series_values(
                        rollup_duration,
                        events.values()[1], collect_by_user_tag).items()
                },
                series[source.id],
            )

            assert_series_contains(
                {
                    timestamp: len(values)
                    for timestamp, values in get_expected_series_values(
                        rollup_duration,
                        events.values()[0] + events.values()[2],
                        collect_by_user_tag,
                    ).items()
                },
                time_series[destination.id],
            )

        def strip_zeroes(data):
            for group_id, series in data.items():
                for _, values in series:
                    for key, val in values.items():
                        if val == 0:
                            values.pop(key)

            return data

        def collect_by_release(group, aggregate, event):
            aggregate = aggregate if aggregate is not None else {}
            release = event.get_tag("sentry:release")
            if not release:
                return aggregate
            release = GroupRelease.objects.get(
                group_id=group.id,
                environment=event.data["environment"],
                release_id=Release.objects.get(
                    organization_id=project.organization_id,
                    version=release).id,
            ).id
            aggregate[release] = aggregate.get(release, 0) + 1
            return aggregate

        items = {}
        for i in [source.id, destination.id]:
            items[i] = list(
                GroupRelease.objects.filter(group_id=i).values_list("id",
                                                                    flat=True))

        time_series = strip_zeroes(
            tsdb.get_frequency_series(
                tsdb.models.frequent_releases_by_group,
                items,
                now - timedelta(seconds=rollup_duration),
                time_from_now(17),
                rollup_duration,
            ))

        assert_series_contains(
            get_expected_series_values(
                rollup_duration,
                events.values()[1],
                functools.partial(collect_by_release, source)),
            time_series[source.id],
            {},
        )

        assert_series_contains(
            get_expected_series_values(
                rollup_duration,
                events.values()[0] + events.values()[2],
                functools.partial(collect_by_release, destination),
            ),
            time_series[destination.id],
            {},
        )

        items = {}
        for i in [source.id, destination.id]:
            items[i] = list(Environment.objects.all().values_list("id",
                                                                  flat=True))

        time_series = strip_zeroes(
            tsdb.get_frequency_series(
                tsdb.models.frequent_environments_by_group,
                items,
                now - timedelta(seconds=rollup_duration),
                time_from_now(17),
                rollup_duration,
            ))

        def collect_by_environment(aggregate, event):
            aggregate = aggregate if aggregate is not None else {}
            environment = Environment.objects.get(
                organization_id=project.organization_id,
                name=event.data["environment"]).id
            aggregate[environment] = aggregate.get(environment, 0) + 1
            return aggregate

        assert_series_contains(
            get_expected_series_values(rollup_duration,
                                       events.values()[1],
                                       collect_by_environment),
            time_series[source.id],
            {},
        )

        assert_series_contains(
            get_expected_series_values(rollup_duration,
                                       events.values()[0] + events.values()[2],
                                       collect_by_environment),
            time_series[destination.id],
            {},
        )

        source_similar_items = features.compare(source)
        assert source_similar_items[0] == (
            source.id,
            {
                "exception:message:character-shingles": None,
                "exception:stacktrace:application-chunks": None,
                "exception:stacktrace:pairs": None,
                "message:message:character-shingles": 1.0,
            },
        )
        assert source_similar_items[1][0] == destination.id
        assert source_similar_items[1][1][
            "message:message:character-shingles"] < 1.0

        destination_similar_items = features.compare(destination)
        assert destination_similar_items[0] == (
            destination.id,
            {
                "exception:message:character-shingles": None,
                "exception:stacktrace:application-chunks": None,
                "exception:stacktrace:pairs": None,
                "message:message:character-shingles": 1.0,
            },
        )
        assert destination_similar_items[1][0] == source.id
        assert destination_similar_items[1][1][
            "message:message:character-shingles"] < 1.0