Example #1
0
    def test_logs_differences(self):
        logger = logging.getLogger("sentry.eventstore")

        with mock.patch.object(logger, "info") as mock_logger:
            # No differences to log
            filter = eventstore.Filter(project_ids=[self.project.id])
            eventstore.get_events(filter=filter)
            eventstore.get_event_by_id(self.project.id, "a" * 32)
            assert mock_logger.call_count == 0

            # Here we expect a difference since the original implementation handles type as a tag
            event = eventstore.get_event_by_id(self.project.id, "a" * 32)
            filter = eventstore.Filter(
                project_ids=[self.project.id],
                conditions=[["type", "=", "transaction"]])
            eventstore.get_next_event_id(event, filter)
            assert mock_logger.call_count == 1
            mock_logger.assert_called_with(
                "discover.result-mismatch",
                extra={
                    "snuba_result":
                    None,
                    "snuba_discover_result":
                    (six.text_type(self.project.id), "b" * 32),
                    "method":
                    "get_next_event_id",
                    "event_id":
                    event.event_id,
                    "filter_keys":
                    filter.filter_keys,
                    "conditions":
                    filter.conditions,
                },
            )
Example #2
0
def reprocess_group(
    project_id,
    group_id,
    new_group_id=None,
    query_state=None,
    start_time=None,
    max_events=None,
    acting_user_id=None,
):
    from sentry.reprocessing2 import start_group_reprocessing

    if start_time is None:
        assert new_group_id is None
        start_time = time.time()
        new_group_id = start_group_reprocessing(project_id,
                                                group_id,
                                                max_events=max_events,
                                                acting_user_id=acting_user_id)

    assert new_group_id is not None

    query_state, events = celery_run_batch_query(
        filter=eventstore.Filter(project_ids=[project_id],
                                 group_ids=[group_id]),
        batch_size=GROUP_REPROCESSING_CHUNK_SIZE,
        state=query_state,
        referrer="reprocessing2.reprocess_group",
    )

    if not events:
        return

    tombstoned_event_ids = []

    for event in events:
        if max_events is None or max_events > 0:
            reprocess_event.delay(
                project_id=project_id,
                event_id=event.event_id,
                start_time=start_time,
            )
            if max_events is not None:
                max_events -= 1
        else:
            tombstoned_event_ids.append(event.event_id)

    # len(tombstoned_event_ids) is upper-bounded by GROUP_REPROCESSING_CHUNK_SIZE
    if tombstoned_event_ids:
        tombstone_events.delay(project_id=project_id,
                               group_id=group_id,
                               event_ids=tombstoned_event_ids)

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        new_group_id=new_group_id,
        query_state=query_state,
        start_time=start_time,
        max_events=max_events,
    )
Example #3
0
 def get_event_by_processing_counter(n):
     return list(
         eventstore.get_events(
             eventstore.Filter(
                 project_ids=[default_project.id],
                 conditions=[["tags[processing_counter]", "=", n]],
             )))
Example #4
0
    def get(self, request, organization):
        """
        Generate a list of data scrubbing selectors from existing event data.

        This list is used to auto-complete settings in "Data Privacy" /
        "Security and Privacy" settings.
        """

        event_id = request.GET.get("eventId", None)

        # Filtering by the projects that self.get_projects returns deals with
        # permission concerns
        projects = self.get_projects(request, organization)
        project_ids = [project.id for project in projects]

        selectors = set()

        if event_id:
            for event in eventstore.get_events(filter=eventstore.Filter(
                    conditions=[["id", "=", event_id]
                                ], project_ids=project_ids)):
                selectors.update(pii_selectors_from_event(dict(event.data)))

        suggestions = [{
            "type": "value",
            "value": selector
        } for selector in selectors]
        return Response({"suggestions": suggestions})
Example #5
0
def reprocess_group(project_id, group_id, offset=0, start_time=None):
    if start_time is None:
        start_time = time.time()

    events = list(
        eventstore.get_unfetched_events(
            eventstore.Filter(
                project_ids=[project_id],
                group_ids=[group_id],
                # XXX: received?
                conditions=[["timestamp", "<", to_datetime(start_time)]],
            ),
            limit=GROUP_REPROCESSING_CHUNK_SIZE,
            offset=offset,
            referrer="reprocessing2.reprocess_group",
        )
    )

    if not events:
        return

    for event in events:
        reprocess_event.delay(
            project_id=project_id, event_id=event.event_id, start_time=start_time,
        )

    reprocess_group.delay(
        project_id=project_id, group_id=group_id, offset=offset + len(events), start_time=start_time
    )
Example #6
0
    def post_and_retrieve_security_report(self, data):
        url = self.get_relay_security_url(self.project.id,
                                          self.projectkey.public_key)
        responses.add_passthru(url)

        event_ids = {
            event.event_id
            for event in eventstore.get_events(
                eventstore.Filter(project_ids=[self.project.id]))
        }

        def has_new_event():
            # Hack: security report endpoint does not return event ID
            for event in eventstore.get_events(
                    eventstore.Filter(project_ids=[self.project.id])):
                if event.event_id not in event_ids:
                    return event

        resp = requests.post(url, json=data)

        assert resp.ok

        event = self.wait_for_ingest_consumer(has_new_event)
        # check that we found it in Snuba
        assert event
        return event
    def get(self, request, organization):
        """
        Generate a list of data scrubbing selectors from existing event data.

        This list is used to auto-complete settings in "Data Scrubbing" /
        "Security and Privacy" settings.
        """

        event_id = request.GET.get("eventId", None)

        # Filtering by the projects that self.get_projects returns deals with
        # permission concerns
        projects = self.get_projects(request, organization)
        project_ids = [project.id for project in projects]

        suggestions = {}

        if event_id:
            for event in eventstore.get_events(filter=eventstore.Filter(
                    conditions=[["id", "=", event_id]
                                ], project_ids=project_ids)):
                for selector in pii_selector_suggestions_from_event(
                        dict(event.data)):
                    examples_ = suggestions.setdefault(selector["path"], [])
                    if selector["value"]:
                        examples_.append(selector["value"])

        return Response({
            "suggestions": [{
                "type": "value",
                "value": value,
                "examples": examples
            } for value, examples in six.iteritems(suggestions)]
        })
    def get_field(self, request, snuba_args):
        y_axis = request.GET.get("yAxis", None)
        # These aliases are used by v1 of events.
        if not y_axis or y_axis == "event_count":
            y_axis = "count()"
        elif y_axis == "user_count":
            y_axis = "count_unique(user)"

        snuba_filter = eventstore.Filter({
            "start": snuba_args.get("start"),
            "end": snuba_args.get("end"),
            "rollup": snuba_args.get("rollup"),
        })
        try:
            resolved = resolve_field_list([y_axis], snuba_filter)
        except InvalidSearchQuery as err:
            raise ParseError(detail=str(err))
        try:
            aggregate = resolved["aggregations"][0]
        except IndexError:
            raise ParseError(detail="Invalid yAxis value requested.")
        aggregate[2] = "count"
        snuba_args["aggregations"] = [aggregate]

        return snuba_args
Example #9
0
def process_individual_attachment(message, projects):
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    if not features.has("organizations:event-attachments",
                        project.organization,
                        actor=None):
        logger.info("Organization has no event attachments: %s", project_id)
        return

    # Attachments may be uploaded for events that already exist. Fetch the
    # existing group_id, so that the attachment can be fetched by group-level
    # APIs. This is inherently racy.
    events = eventstore.get_unfetched_events(filter=eventstore.Filter(
        event_ids=[event_id], project_ids=[project.id]),
                                             limit=1)

    group_id = None
    if events:
        group_id = events[0].group_id

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    if attachment.type != "event.attachment":
        logger.exception("invalid individual attachment type: %s",
                         attachment.type)
        return

    file = File.objects.create(
        name=attachment.name,
        type=attachment.type,
        headers={"Content-Type": attachment.content_type},
    )

    try:
        data = attachment.data
    except MissingAttachmentChunks:
        logger.exception("Missing chunks for cache_key=%s", cache_key)
        return

    file.putfile(BytesIO(data))
    EventAttachment.objects.create(project_id=project.id,
                                   group_id=group_id,
                                   event_id=event_id,
                                   name=attachment.name,
                                   file=file)

    attachment.delete()
Example #10
0
    def get(self, request, project, event_id):
        """
        Retrieve an Event for a Project
        ```````````````````````````````

        Return details on an individual event.

        :pparam string organization_slug: the slug of the organization the
                                          event belongs to.
        :pparam string project_slug: the slug of the project the event
                                     belongs to.
        :pparam string event_id: the id of the event to retrieve (either the
                                 numeric primary-key or the hexadecimal id as
                                 reported by the raven client)
        :auth: required
        """

        event = eventstore.get_event_by_id(project.id, event_id)

        if event is None:
            return Response({"detail": "Event not found"}, status=404)

        data = serialize(event, request.user, DetailedEventSerializer())

        # Used for paginating through events of a single issue in group details
        # Skip next/prev for issueless events
        next_event_id = None
        prev_event_id = None

        if event.group_id:
            requested_environments = set(request.GET.getlist("environment"))
            conditions = [["event.type", "!=", "transaction"]]

            if requested_environments:
                conditions.append(
                    ["environment", "IN", requested_environments])

            _filter = eventstore.Filter(conditions=conditions,
                                        project_ids=[event.project_id],
                                        group_ids=[event.group_id])

            # Ignore any time params and search entire retention period
            next_event_filter = deepcopy(_filter)
            next_event_filter.end = datetime.utcnow()
            next_event = eventstore.get_next_event_id(event,
                                                      filter=next_event_filter)

            prev_event_filter = deepcopy(_filter)
            prev_event_filter.start = datetime.utcfromtimestamp(0)
            prev_event = eventstore.get_prev_event_id(event,
                                                      filter=prev_event_filter)

            next_event_id = next_event[1] if next_event else None
            prev_event_id = prev_event[1] if prev_event else None

        data["nextEventID"] = next_event_id
        data["previousEventID"] = prev_event_id

        return Response(data)
Example #11
0
def get_filter(query=None, params=None):
    """
    Returns an eventstore filter given the search text provided by the user and
    URL params
    """
    # NOTE: this function assumes project permissions check already happened
    parsed_terms = []
    if query is not None:
        try:
            parsed_terms = parse_search_query(query)
        except ParseError as e:
            raise InvalidSearchQuery(u"Parse error: %r (column %d)" %
                                     (e.expr.name, e.column()))

    # Keys included as url params take precedent if same key is included in search
    if params is not None:
        parsed_terms.extend(convert_endpoint_params(params))

    kwargs = {
        "start": None,
        "end": None,
        "conditions": [],
        "project_ids": [],
        "group_ids": []
    }

    projects = {}
    has_project_term = any(
        isinstance(term, SearchFilter) and term.key.name == PROJECT_KEY
        for term in parsed_terms)
    if has_project_term:
        projects = {
            p["slug"]: p["id"]
            for p in Project.objects.filter(
                id__in=params["project_id"]).values("id", "slug")
        }

    for term in parsed_terms:
        if isinstance(term, SearchFilter):
            name = term.key.name
            if term.key.name == PROJECT_KEY:
                condition = ["project_id", "=", projects.get(term.value.value)]
                kwargs["conditions"].append(condition)
            elif name in ("start", "end"):
                kwargs[name] = term.value.value
            elif name in ("project_id", "issue.id"):
                if name == "issue.id":
                    name = "group_ids"
                if name == "project_id":
                    name = "project_ids"
                value = term.value.value
                if isinstance(value, int):
                    value = [value]
                kwargs[name].extend(value)
            else:
                converted_filter = convert_search_filter_to_snuba_query(term)
                kwargs["conditions"].append(converted_filter)

    return eventstore.Filter(**kwargs)
Example #12
0
def test_concurrent_events_go_into_new_group(
    default_project,
    reset_snuba,
    register_event_preprocessor,
    process_and_save,
    burst_task_runner,
    default_user,
):
    """
    Assert that both unmodified and concurrently inserted events go into "the
    new group", i.e. the successor of the reprocessed (old) group that
    inherited the group hashes.
    """
    @register_event_preprocessor
    def event_preprocessor(data):
        extra = data.setdefault("extra", {})
        extra.setdefault("processing_counter", 0)
        extra["processing_counter"] += 1
        return data

    event_id = process_and_save({"message": "hello world"})

    event = eventstore.get_event_by_id(default_project.id, event_id)
    original_short_id = event.group.short_id
    assert original_short_id
    original_group_id = event.group.id

    original_assignee = GroupAssignee.objects.create(
        group_id=original_group_id, project=default_project, user=default_user)

    with burst_task_runner() as burst_reprocess:
        reprocess_group(default_project.id, event.group_id)

    assert not is_group_finished(event.group_id)

    event_id2 = process_and_save({"message": "hello world"})
    event2 = eventstore.get_event_by_id(default_project.id, event_id2)
    assert event2.event_id != event.event_id
    assert event2.group_id != event.group_id

    burst_reprocess()

    (event3, ) = eventstore.get_events(
        eventstore.Filter(
            project_ids=[default_project.id],
            conditions=[["tags[original_event_id]", "=", event_id]],
        ))

    assert is_group_finished(event.group_id)

    assert event2.group_id == event3.group_id
    assert event.get_hashes() == event2.get_hashes() == event3.get_hashes()

    group = event3.group

    assert group.short_id == original_short_id
    assert GroupAssignee.objects.get(group=group) == original_assignee
    activity = Activity.objects.get(group=group, type=Activity.REPROCESS)
    assert activity.ident == six.text_type(original_group_id)
    def get(self, request, organization, event_id):
        """
        Resolve a Event ID
        ``````````````````

        This resolves a event ID to the project slug and internal issue ID and internal event ID.

        :pparam string organization_slug: the slug of the organization the
                                          event ID should be looked up in.
        :param string event_id: the event ID to look up.
        :auth: required
        """
        # Largely copied from ProjectGroupIndexEndpoint
        if len(event_id) != 32:
            return Response({"detail": "Event ID must be 32 characters."},
                            status=400)

        # Limit to 100req/s
        if ratelimiter.is_limited(
                u"api:event-id-lookup:{}".format(
                    md5_text(request.user.id if request.user and request.user.
                             is_authenticated() else "").hexdigest()),
                limit=100,
                window=1,
        ):
            return Response(
                {
                    "detail":
                    "You are attempting to use this endpoint too quickly. Limit is 100 requests/second."
                },
                status=429,
            )

        project_slugs_by_id = dict(
            Project.objects.filter(organization=organization).values_list(
                "id", "slug"))

        try:
            snuba_filter = eventstore.Filter(
                conditions=[["event.type", "!=", "transaction"]],
                project_ids=project_slugs_by_id.keys(),
                event_ids=[event_id],
            )
            event = eventstore.get_events(filter=snuba_filter, limit=1)[0]
        except IndexError:
            raise ResourceDoesNotExist()
        else:
            return Response({
                "organizationSlug":
                organization.slug,
                "projectSlug":
                project_slugs_by_id[event.project_id],
                "groupId":
                six.text_type(event.group_id),
                "eventId":
                six.text_type(event.event_id),
                "event":
                serialize(event, request.user),
            })
Example #14
0
 def _get_filter(self, snuba_args):
     return eventstore.Filter(
         conditions=snuba_args["conditions"],
         start=snuba_args.get("start", None),
         end=snuba_args.get("end", None),
         project_ids=snuba_args["filter_keys"].get("project_id", None),
         group_ids=snuba_args["filter_keys"].get("issue", None),
     )
Example #15
0
def reprocess_group(project_id,
                    group_id,
                    offset=0,
                    start_time=None,
                    max_events=None,
                    acting_user_id=None):
    from sentry.reprocessing2 import start_group_reprocessing

    if start_time is None:
        start_time = time.time()
        start_group_reprocessing(project_id,
                                 group_id,
                                 max_events=max_events,
                                 acting_user_id=acting_user_id)

    if max_events is not None and max_events <= 0:
        events = []
    else:
        limit = GROUP_REPROCESSING_CHUNK_SIZE

        if max_events is not None:
            limit = min(limit, max_events)

        events = list(
            eventstore.get_unfetched_events(
                eventstore.Filter(
                    project_ids=[project_id],
                    group_ids=[group_id],
                ),
                limit=limit,
                orderby=["-timestamp"],
                offset=offset,
                referrer="reprocessing2.reprocess_group",
            ))

    if not events:
        wait_group_reprocessed.delay(project_id=project_id, group_id=group_id)
        return

    for event in events:
        reprocess_event.delay(
            project_id=project_id,
            event_id=event.event_id,
            start_time=start_time,
        )

    if max_events is not None:
        max_events -= len(events)

    reprocess_group.delay(
        project_id=project_id,
        group_id=group_id,
        offset=offset + len(events),
        start_time=start_time,
        max_events=max_events,
    )
Example #16
0
def update_user_reports(**kwargs: Any) -> None:
    now = timezone.now()
    user_reports = UserReport.objects.filter(
        group_id__isnull=True, environment_id__isnull=True, date_added__gte=now - timedelta(days=1)
    )

    # We do one query per project, just to avoid the small case that two projects have the same event ID
    project_map: Dict[int, Any] = {}
    for r in user_reports:
        project_map.setdefault(r.project_id, []).append(r)

    # Logging values
    total_reports = len(user_reports)
    reports_with_event = 0
    updated_reports = 0
    samples = None

    MAX_EVENTS = kwargs.get("max_events", 5000)
    for project_id, reports in project_map.items():
        event_ids = [r.event_id for r in reports]
        report_by_event = {r.event_id: r for r in reports}
        events = []
        for event_id_chunk in chunked(event_ids, MAX_EVENTS):
            snuba_filter = eventstore.Filter(
                project_ids=[project_id],
                event_ids=event_id_chunk,
                start=now - timedelta(days=2),
                end=now + timedelta(minutes=5),  # Just to catch clock skew
            )
            events_chunk = eventstore.get_events(filter=snuba_filter)
            events.extend(events_chunk)

        for event in events:
            report = report_by_event.get(event.event_id)
            if report:
                reports_with_event += 1
                report.update(group_id=event.group_id, environment_id=event.get_environment().id)
                updated_reports += 1

        if not samples and len(reports) <= 10:
            samples = {
                "project_id": project_id,
                "event_ids": event_ids,
                "reports_event_ids": {r.id: r.event_id for r in reports},
            }

    logger.info(
        "update_user_reports.records_updated",
        extra={
            "reports_to_update": total_reports,
            "reports_with_event": reports_with_event,
            "updated_reports": updated_reports,
            "samples": samples,
        },
    )
Example #17
0
 def filter_by_event_id(self, project_ids, event_id):
     events = eventstore.get_events(
         filter=eventstore.Filter(
             event_ids=[event_id],
             project_ids=project_ids,
             conditions=[["group_id", "IS NOT NULL", None]],
         ),
         limit=max(len(project_ids), 100),
         referrer="Group.filter_by_event_id",
     )
     return self.filter(id__in={event.group_id for event in events})
Example #18
0
def process_individual_attachment(message, projects):
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    cache_key = cache_key_for_event({
        "event_id": event_id,
        "project": project_id
    })

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s",
                     project_id)
        return

    if not features.has("organizations:event-attachments",
                        project.organization,
                        actor=None):
        logger.info("Organization has no event attachments: %s", project_id)
        return

    # Attachments may be uploaded for events that already exist. Fetch the
    # existing group_id, so that the attachment can be fetched by group-level
    # APIs. This is inherently racy.
    events = eventstore.get_unfetched_events(filter=eventstore.Filter(
        event_ids=[event_id], project_ids=[project.id]),
                                             limit=1)

    group_id = None
    if events:
        group_id = events[0].group_id

    attachment = message["attachment"]
    attachment = attachment_cache.get_from_chunks(
        key=cache_key, type=attachment.pop("attachment_type"), **attachment)
    if attachment.type != "event.attachment":
        logger.exception("invalid individual attachment type: %s",
                         attachment.type)
        return

    save_attachment(
        cache_key,
        attachment,
        project,
        event_id,
        key_id=None,  # TODO: Inject this from Relay
        group_id=group_id,
        start_time=None,  # TODO: Inject this from Relay
    )

    attachment.delete()
Example #19
0
    def get(self, request, organization):
        # Check for a direct hit on event ID
        query = request.GET.get("query", "").strip()

        try:
            direct_hit_resp = get_direct_hit_response(
                request,
                query,
                self.get_filter_params(request, organization),
                "api.organization-events-direct-hit",
            )
        except (OrganizationEventsError, NoProjects):
            pass
        else:
            if direct_hit_resp:
                return direct_hit_resp

        full = request.GET.get("full", False)
        try:
            snuba_args = self.get_snuba_query_args_legacy(
                request, organization)
        except OrganizationEventsError as exc:
            return Response({"detail": exc.message}, status=400)
        except NoProjects:
            # return empty result if org doesn't have projects
            # or user doesn't have access to projects in org
            data_fn = lambda *args, **kwargs: []
        else:
            cols = None if full else eventstore.full_columns

            data_fn = partial(
                eventstore.get_events,
                additional_columns=cols,
                referrer="api.organization-events",
                filter=eventstore.Filter(
                    start=snuba_args["start"],
                    end=snuba_args["end"],
                    conditions=snuba_args["conditions"],
                    project_ids=snuba_args["filter_keys"].get(
                        "project_id", None),
                    group_ids=snuba_args["filter_keys"].get("group_id", None),
                ),
            )

        serializer = EventSerializer() if full else SimpleEventSerializer()
        return self.paginate(
            request=request,
            on_results=lambda results: serialize(results, request.user,
                                                 serializer),
            paginator=GenericOffsetPaginator(data_fn=data_fn),
        )
Example #20
0
    def filter_by_event_id(self, project_ids, event_id):
        event_ids = [event_id]
        conditions = [["group_id", "IS NOT NULL", None]]
        data = eventstore.get_events(
            filter=eventstore.Filter(event_ids=event_ids,
                                     project_ids=project_ids,
                                     conditions=conditions),
            limit=max(len(project_ids), 100),
            referrer="Group.filter_by_event_id",
        )

        group_ids = set([evt.group_id for evt in data])

        return Group.objects.filter(id__in=group_ids)
Example #21
0
    def get(self, request: Request, project) -> Response:
        """
        List a Project's Events
        ```````````````````````

        Return a list of events bound to a project.

        Note: This endpoint is experimental and may be removed without notice.

        :qparam bool full: if this is set to true then the event payload will
                           include the full event body, including the stacktrace.
                           Set to 1 to enable.

        :pparam string organization_slug: the slug of the organization the
                                          groups belong to.
        :pparam string project_slug: the slug of the project the groups
                                     belong to.
        """
        from sentry.api.paginator import GenericOffsetPaginator

        query = request.GET.get("query")
        conditions = []
        if query:
            conditions.append(
                [["positionCaseInsensitive", ["message", f"'{query}'"]], "!=",
                 0])

        event_filter = eventstore.Filter(conditions=conditions,
                                         project_ids=[project.id])
        if features.has("organizations:project-event-date-limit",
                        project.organization,
                        actor=request.user):
            event_filter.start = timezone.now() - timedelta(days=7)

        full = request.GET.get("full", False)

        data_fn = partial(
            eventstore.get_events,
            filter=event_filter,
            referrer="api.project-events",
        )

        serializer = EventSerializer() if full else SimpleEventSerializer()
        return self.paginate(
            request=request,
            on_results=lambda results: serialize(results, request.user,
                                                 serializer),
            paginator=GenericOffsetPaginator(data_fn=data_fn),
        )
    def serialize(self, parent_map, root, warning_extra, params, snuba_event=None, event_id=None):
        """ For the full event trace, we return the results as a graph instead of a flattened list """
        parent_events = {}
        result = parent_events[root["id"]] = self.serialize_event(root, None, 0, True)
        with sentry_sdk.start_span(
            op="nodestore", description=f"retrieving {len(parent_map)} nodes"
        ) as span:
            span.set_data("total nodes", len(parent_map))
            node_data = {
                event.event_id: event
                for event in eventstore.get_events(
                    eventstore.Filter(
                        project_ids=params["project_id"],
                        event_ids=[event["id"] for event in parent_map.values()],
                    )
                )
            }

        with sentry_sdk.start_span(op="building.trace", description="full trace"):
            to_check = deque([root])
            iteration = 0
            while to_check:
                current_event = to_check.popleft()
                event = node_data.get(current_event["id"])
                previous_event = parent_events[current_event["id"]]
                for child in event.data.get("spans", []):
                    if child["span_id"] not in parent_map:
                        continue
                    # Avoid potential span loops by popping, so we don't traverse the same nodes twice
                    child_event = parent_map.pop(child["span_id"])

                    parent_events[child_event["id"]] = self.serialize_event(
                        child_event, current_event["id"], previous_event["generation"] + 1
                    )
                    # Add this event to its parent's children
                    previous_event["children"].append(parent_events[child_event["id"]])

                    to_check.append(child_event)
                # Limit iterations just to be safe
                iteration += 1
                if iteration > MAX_TRACE_SIZE:
                    logger.warning(
                        "discover.trace-view.surpassed-trace-limit",
                        extra=warning_extra,
                    )
                    break

        return result
Example #23
0
    def chunk(self):
        conditions = []
        if self.last_event is not None:
            conditions.extend(
                [
                    ["timestamp", "<=", self.last_event.timestamp],
                    [
                        ["timestamp", "<", self.last_event.timestamp],
                        ["event_id", "<", self.last_event.event_id],
                    ],
                ]
            )

        events = eventstore.get_unfetched_events(
            filter=eventstore.Filter(
                conditions=conditions, project_ids=[self.project_id], group_ids=[self.group_id]
            ),
            limit=self.DEFAULT_CHUNK_SIZE,
            referrer="deletions.group",
            orderby=["-timestamp", "-event_id"],
        )

        if not events:
            return False

        self.last_event = events[-1]

        # Remove from nodestore
        node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events]
        nodestore.delete_multi(node_ids)

        from sentry.reprocessing2 import delete_unprocessed_events

        delete_unprocessed_events(events)

        # Remove EventAttachment and UserReport *again* as those may not have a
        # group ID, therefore there may be dangling ones after "regular" model
        # deletion.
        event_ids = [event.event_id for event in events]
        models.EventAttachment.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()
        models.UserReport.objects.filter(
            event_id__in=event_ids, project_id=self.project_id
        ).delete()

        return True
Example #24
0
 def assertReportCreated(self, input, output):
     resp = self._postCspWithHeader(input)
     assert resp.status_code == 201, resp.content
     # XXX: there appears to be a race condition between the 201 return and get_events,
     # leading this test to sometimes fail. .5s seems to be sufficient.
     # Modifying the timestamp of store_event, like how it's done in other snuba tests,
     # doesn't work here because the event isn't created directly by this test.
     sleep(0.5)
     events = eventstore.get_events(filter=eventstore.Filter(
         project_ids=[self.project.id], conditions=[["type", "=", "csp"]]))
     assert len(events) == 1
     e = events[0]
     assert output["message"] == e.data["logentry"]["formatted"]
     for key, value in six.iteritems(output["tags"]):
         assert e.get_tag(key) == value
     for key, value in six.iteritems(output["data"]):
         assert e.data[key] == value
Example #25
0
    def get(self, request, organization):
        logger.info("eventsv1.request", extra={"organization_id": organization.id})

        # Check for a direct hit on event ID
        query = request.GET.get("query", "").strip()

        try:
            direct_hit_resp = get_direct_hit_response(
                request,
                query,
                self.get_filter_params(request, organization),
                "api.organization-events-direct-hit",
            )
        except NoProjects:
            pass
        else:
            if direct_hit_resp:
                return direct_hit_resp

        full = request.GET.get("full", False)
        try:
            snuba_args = self.get_snuba_query_args_legacy(request, organization)
        except NoProjects:
            # return empty result if org doesn't have projects
            # or user doesn't have access to projects in org
            data_fn = lambda *args, **kwargs: []
        else:
            data_fn = partial(
                eventstore.get_events,
                referrer="api.organization-events",
                filter=eventstore.Filter(
                    start=snuba_args["start"],
                    end=snuba_args["end"],
                    conditions=snuba_args["conditions"],
                    project_ids=snuba_args["filter_keys"].get("project_id", None),
                    group_ids=snuba_args["filter_keys"].get("group_id", None),
                ),
            )

        serializer = EventSerializer() if full else SimpleEventSerializer()
        return self.paginate(
            request=request,
            on_results=lambda results: serialize(results, request.user, serializer),
            paginator=GenericOffsetPaginator(data_fn=data_fn),
        )
Example #26
0
    def get(self, request, project):
        """
        List a Project's Events
        ```````````````````````

        Return a list of events bound to a project.

        Note: This endpoint is experimental and may be removed without notice.

        :qparam bool full: if this is set to true then the event payload will
                           include the full event body, including the stacktrace.
                           Set to 1 to enable.

        :pparam string organization_slug: the slug of the organization the
                                          groups belong to.
        :pparam string project_slug: the slug of the project the groups
                                     belong to.
        """
        from sentry.api.paginator import GenericOffsetPaginator

        query = request.GET.get("query")
        conditions = []
        if query:
            conditions.append(
                [["positionCaseInsensitive", ["message", "'%s'" % (query,)]], "!=", 0]
            )

        full = request.GET.get("full", False)

        cols = None if full else eventstore.full_columns

        data_fn = partial(
            eventstore.get_events,
            additional_columns=cols,
            filter=eventstore.Filter(conditions=conditions, project_ids=[project.id]),
            referrer="api.project-events",
        )

        serializer = EventSerializer() if full else SimpleEventSerializer()
        return self.paginate(
            request=request,
            on_results=lambda results: serialize(results, request.user, serializer),
            paginator=GenericOffsetPaginator(data_fn=data_fn),
        )
Example #27
0
    def get(self, request, organization):
        """
        Generate a list of data scrubbing selectors from existing event data.

        This list is used to auto-complete settings in "Data Scrubbing" /
        "Security and Privacy" settings.
        """

        event_id = request.GET.get("eventId", None)

        # For organization settings we access all projects the user has access
        # to. For the project level, `get_projects` will give us back a single
        # project.
        #
        # Filtering by the projects that self.get_projects returns deals with
        # permission concerns.
        #
        # The org-wide search for the event ID is quite slow, but we cannot fix
        # that without product redesign.
        projects = self.get_projects(request, organization)
        project_ids = [project.id for project in projects]

        suggestions = {}

        if event_id:
            for event in eventstore.get_events(
                    filter=eventstore.Filter(event_ids=[event_id],
                                             project_ids=project_ids),
                    referrer="api.data_scrubbing_selector_suggestions",
            ):
                for selector in pii_selector_suggestions_from_event(
                        dict(event.data)):
                    examples_ = suggestions.setdefault(selector["path"], [])
                    if selector["value"]:
                        examples_.append(selector["value"])

        return Response({
            "suggestions": [{
                "type": "value",
                "value": value,
                "examples": examples
            } for value, examples in suggestions.items()]
        })
Example #28
0
    def get(self, request, organization, event_id):
        """
        Resolve a Event ID
        ``````````````````

        This resolves a event ID to the project slug and internal issue ID and internal event ID.

        :pparam string organization_slug: the slug of the organization the
                                          event ID should be looked up in.
        :param string event_id: the event ID to look up.
        :auth: required
        """
        # Largely copied from ProjectGroupIndexEndpoint
        if len(event_id) != 32:
            return Response({"detail": "Event ID must be 32 characters."},
                            status=400)

        project_slugs_by_id = dict(
            Project.objects.filter(organization=organization).values_list(
                "id", "slug"))

        try:
            event = eventstore.get_events(
                filter=eventstore.Filter(
                    project_ids=project_slugs_by_id.keys(),
                    event_ids=[event_id]),
                limit=1,
            )[0]
        except IndexError:
            raise ResourceDoesNotExist()
        else:
            return Response({
                "organizationSlug":
                organization.slug,
                "projectSlug":
                project_slugs_by_id[event.project_id],
                "groupId":
                six.text_type(event.group_id),
                "eventId":
                six.text_type(event.id),
                "event":
                serialize(event, request.user),
            })
Example #29
0
    def wait_for_event_count(self, project_id, total, attempts=2):
        """
        Wait until the event count reaches the provided value or until attempts is reached.

        Useful when you're storing several events and need to ensure that snuba/clickhouse
        state has settled.
        """
        # Verify that events have settled in snuba's storage.
        # While snuba is synchronous, clickhouse isn't entirely synchronous.
        attempt = 0
        snuba_filter = eventstore.Filter(project_ids=[project_id])
        while attempt < attempts:
            events = eventstore.get_events(snuba_filter)
            if len(events) >= total:
                break
            attempt += 1
            time.sleep(0.05)
        if attempt == attempts:
            assert False, f"Could not ensure event was persisted within {attempt} attempt(s)"
    def get(self, request: Request, organization, event_id) -> Response:
        """
        Resolve an Event ID
        ``````````````````

        This resolves an event ID to the project slug and internal issue ID and internal event ID.

        :pparam string organization_slug: the slug of the organization the
                                          event ID should be looked up in.
        :param string event_id: the event ID to look up. validated by a
                                regex in the URL.
        :auth: required
        """
        if event_id and not is_event_id(event_id):
            return Response({"detail": INVALID_ID_DETAILS.format("Event ID")},
                            status=400)

        project_slugs_by_id = dict(
            Project.objects.filter(organization=organization).values_list(
                "id", "slug"))

        try:
            snuba_filter = eventstore.Filter(
                conditions=[["event.type", "!=", "transaction"]],
                project_ids=list(project_slugs_by_id.keys()),
                event_ids=[event_id],
            )
            event = eventstore.get_events(filter=snuba_filter, limit=1)[0]
        except IndexError:
            raise ResourceDoesNotExist()
        else:
            return Response({
                "organizationSlug":
                organization.slug,
                "projectSlug":
                project_slugs_by_id[event.project_id],
                "groupId":
                str(event.group_id),
                "eventId":
                str(event.event_id),
                "event":
                serialize(event, request.user),
            })