예제 #1
0
def logger(txt, err=False, module="", obj=None):
    if (err):
        sentry_sdk.set_tag('Module', module)
        sentry_sdk.set_extra(module, obj)
        sentry_sdk.capture_exception(txt)
    if (current_app.config["DEBUG"]):
        print(txt)
예제 #2
0
    def __init__(self, body):
        self.body = body
        self.event = DataChangeEvent.parse_raw(self.body) if isinstance(self.body, (bytes, str)) else DataChangeEvent.parse_obj(self.body)

        if self.event.metadata.user and self.event.metadata.user.uid:
            access_ctx.set(Access(
                token=AccessToken(
                    iss='int',
                    iat=0,
                    nbf=0,
                    exp=0,
                    sub=self.event.metadata.user.uid,
                    ten=self.event.tenant_id,
                    aud=self.event.metadata.user.scopes,
                    rls=self.event.metadata.user.roles,
                    jti='int',
                    crt=False,
                ),
            ))

        self.is_new_orm_obj = False
        self.span = span_ctx.get()
        self.span.set_tag('exchange', self.exchange)
        self.span.set_tag('queue', self.queue)
        self.span.set_tag('orm_model', self.orm_model)
        self.span.set_tag('data_op', self.event.data_op)
        self.span.set_data('body', self.body)
        set_extra('body', self.body)
예제 #3
0
    def orm_obj(self) -> TDjangoModel:
        if not self.__orm_obj:
            data = json.loads(self.body) if isinstance(self.body, (bytes, str)) else self.body
            query = models.Q(id=data.get('id'))
            if self.is_tenant_bound:
                query &= models.Q(tenant_id=self.event.tenant_id)

            self.__orm_obj = self.orm_model.objects.get(query)
            set_extra('orm_obj', self.__orm_obj)

        return self.__orm_obj
예제 #4
0
    async def __call__(self,
                       request: Request,
                       scopes: SecurityScopes = None) -> Optional[Access]:
        try:
            token = await super().__call__(request)

        except HTTPException as error:
            if error.status_code == 403:
                raise AuthError from error

            raise

        if not token:
            return

        current_access = Access(
            token=AccessToken(**self.decode_token(token)), )
        set_extra('access.token.aud', current_access.token.aud)

        set_user({
            'id': current_access.user_id,
            'tenant_id': current_access.tenant_id,
        })

        if scopes:
            audiences = current_access.token.has_audiences(scopes.scopes)
            if not audiences:
                raise HTTPException(
                    status_code=403,
                    detail=Error(
                        type='JWTClaimsError',
                        code='required_audience_missing',
                        message=
                        'The required scope is not included in the given token.',
                        detail=scopes.scopes,
                    ))

            aud_scopes = [
                AccessScope.from_str(audience) for audience in audiences
            ]
            current_access.scopes = aud_scopes
            current_access.scope = aud_scopes[0]
            set_extra('access.scopes', aud_scopes)

        access.set(current_access)

        return current_access
예제 #5
0
def run_comparison(
    fn_name: str,
    metrics_fn: Callable[..., Any],
    should_compare: bool,
    rollup: Optional[int],
    organization: Optional[Organization],
    schema: Optional[Schema],
    function_args: Tuple[Any],
    sessions_result: Any,
    sessions_time: datetime,
    **kwargs,
) -> None:
    if rollup is None:
        rollup = 0  # force exact date comparison if not specified

    tags = {"method": fn_name, "rollup": str(rollup)}

    set_tag("releasehealth.duplex.rollup", str(rollup))
    set_tag("releasehealth.duplex.method", fn_name)
    set_tag("releasehealth.duplex.org_id", str(getattr(organization, "id")))

    set_context(
        "release-health-duplex-sessions",
        {
            "sessions": sessions_result,
        },
    )

    try:
        delay = (datetime.now(pytz.utc) - sessions_time).total_seconds()
        set_extra("delay", delay)
        timing("releasehealth.metrics.delay", delay)

        # We read from the metrics source even if there is no need to compare.
        with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0):
            metrics_val = metrics_fn(*function_args)

        incr(
            "releasehealth.metrics.check_should_compare",
            tags={"should_compare": str(should_compare), **tags},
            sample_rate=1.0,
        )

        if not should_compare:
            return

        copy = deepcopy(sessions_result)

        set_context("release-health-duplex-metrics", {"metrics": metrics_val})

        with timer("releasehealth.results-diff.duration", tags=tags, sample_rate=1.0):
            errors = compare_results(copy, metrics_val, rollup, None, schema)
        set_context("release-health-duplex-errors", {"errors": [str(error) for error in errors]})

        should_report = features.has(
            "organizations:release-health-check-metrics-report", organization
        )

        incr(
            "releasehealth.metrics.compare",
            tags={"has_errors": str(bool(errors)), "reported": str(should_report), **tags},
            sample_rate=1.0,
        )

        if errors and should_report:
            tag_delta(errors, tags)
            # We heavily rely on Sentry's message sanitization to properly deduplicate this
            capture_message(f"{fn_name} - Release health metrics mismatch: {errors[0]}")
    except Exception:
        capture_exception()
        should_compare = False
        incr(
            "releasehealth.metrics.crashed",
            tags=tags,
            sample_rate=1.0,
        )
예제 #6
0
def _load_event(
    message: Message, projects: Mapping[int, Project]
) -> Optional[Tuple[Any, Callable[[str], None]]]:
    """
    Perform some initial filtering and deserialize the message payload. If the
    event should be stored, the deserialized payload is returned along with a
    function that can be called with the event's storage key to resume
    processing after the event has been persisted and is available to be read by
    other processing components.
    """
    payload = message["payload"]
    start_time = float(message["start_time"])
    event_id = message["event_id"]
    project_id = int(message["project_id"])
    remote_addr = message.get("remote_addr")
    attachments = message.get("attachments") or ()

    sentry_sdk.set_extra("event_id", event_id)
    sentry_sdk.set_extra("len_attachments", len(attachments))

    if project_id == settings.SENTRY_PROJECT:
        metrics.incr("internal.captured.ingest_consumer.unparsed")

    # check that we haven't already processed this event (a previous instance of the forwarder
    # died before it could commit the event queue offset)
    #
    # XXX(markus): I believe this code is extremely broken:
    #
    # * it practically uses memcached in prod which has no consistency
    #   guarantees (no idea how we don't run into issues there)
    #
    # * a TTL of 1h basically doesn't guarantee any deduplication at all. It
    #   just guarantees a good error message... for one hour.
    #
    # This code has been ripped from the old python store endpoint. We're
    # keeping it around because it does provide some protection against
    # reprocessing good events if a single consumer is in a restart loop.
    deduplication_key = f"ev:{project_id}:{event_id}"
    if cache.get(deduplication_key) is not None:
        logger.warning(
            "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.",
            event_id,
            project_id,
        )
        return  # message already processed do not reprocess

    if killswitch_matches_context(
        "store.load-shed-pipeline-projects",
        {
            "project_id": project_id,
            "event_id": event_id,
            "has_attachments": bool(attachments),
        },
    ):
        # This killswitch is for the worst of scenarios and should probably not
        # cause additional load on our logging infrastructure
        return

    try:
        project = projects[project_id]
    except KeyError:
        logger.error("Project for ingested event does not exist: %s", project_id)
        return

    # Parse the JSON payload. This is required to compute the cache key and
    # call process_event. The payload will be put into Kafka raw, to avoid
    # serializing it again.
    # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event
    # which assumes that data passed in is a raw dictionary.
    data = json.loads(payload)

    if project_id == settings.SENTRY_PROJECT:
        metrics.incr(
            "internal.captured.ingest_consumer.parsed",
            tags={"event_type": data.get("type") or "null"},
        )

    if killswitch_matches_context(
        "store.load-shed-parsed-pipeline-projects",
        {
            "organization_id": project.organization_id,
            "project_id": project.id,
            "event_type": data.get("type") or "null",
            "has_attachments": bool(attachments),
            "event_id": event_id,
        },
    ):
        return

    def dispatch_task(cache_key: str) -> None:
        if attachments:
            with sentry_sdk.start_span(op="ingest_consumer.set_attachment_cache"):
                attachment_objects = [
                    CachedAttachment(type=attachment.pop("attachment_type"), **attachment)
                    for attachment in attachments
                ]

                attachment_cache.set(
                    cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT
                )

        # Preprocess this event, which spawns either process_event or
        # save_event. Pass data explicitly to avoid fetching it again from the
        # cache.
        with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"):
            preprocess_event(
                cache_key=cache_key,
                data=data,
                start_time=start_time,
                event_id=event_id,
                project=project,
            )

        # remember for an 1 hour that we saved this event (deduplication protection)
        cache.set(deduplication_key, "", CACHE_TIMEOUT)

        # emit event_accepted once everything is done
        event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event)

    return data, dispatch_task
예제 #7
0
def _set_request_info():
    sentry_sdk.set_extra(
        'Endpoint',
        str(request.url_rule.endpoint) if request.url_rule else None)
    sentry_sdk.set_extra('Request ID', request.id)
    sentry_sdk.set_tag('locale', set_best_lang())
예제 #8
0
 def orm_obj(self, value):
     self.__orm_obj = value
     set_extra('orm_obj', self.__orm_obj)
예제 #9
0
파일: duplex.py 프로젝트: billyvg/sentry
    def _dispatch_call_inner(
        self,
        fn_name: str,
        should_compare: Union[bool, Callable[[Any], bool]],
        rollup: Optional[int],
        organization: Optional[Organization],
        schema: Optional[Schema],
        *args: Any,
    ) -> ReleaseHealthResult:
        if rollup is None:
            rollup = 0  # force exact date comparison if not specified
        sessions_fn = getattr(self.sessions, fn_name)
        set_tag("releasehealth.duplex.rollup", str(rollup))
        set_tag("releasehealth.duplex.method", fn_name)
        set_tag("releasehealth.duplex.org_id", str(getattr(organization,
                                                           "id")))

        set_extra("function_args",
                  args)  # Make sure we always know all function args

        tags = {"method": fn_name, "rollup": str(rollup)}
        with timer("releasehealth.sessions.duration",
                   tags=tags,
                   sample_rate=1.0):
            ret_val = sessions_fn(*args)

        if organization is None or not features.has(
                "organizations:release-health-check-metrics", organization):
            return ret_val  # cannot check feature without organization

        set_context(
            "release-health-duplex-sessions",
            {
                "sessions": ret_val,
            },
        )

        try:
            # We read from the metrics source even if there is no need to compare.
            metrics_fn = getattr(self.metrics, fn_name)
            with timer("releasehealth.metrics.duration",
                       tags=tags,
                       sample_rate=1.0):
                metrics_val = metrics_fn(*args)

            if not isinstance(should_compare, bool):
                # should compare depends on the session result
                # evaluate it now
                should_compare = should_compare(ret_val)

            incr(
                "releasehealth.metrics.check_should_compare",
                tags={
                    "should_compare": str(should_compare),
                    **tags
                },
                sample_rate=1.0,
            )

            if not should_compare:
                return ret_val

            copy = deepcopy(ret_val)

            set_context("release-health-duplex-metrics",
                        {"metrics": metrics_val})

            with timer("releasehealth.results-diff.duration",
                       tags=tags,
                       sample_rate=1.0):
                errors = compare_results(copy, metrics_val, rollup, None,
                                         schema)
            set_context("release-health-duplex-errors",
                        {"errors": [str(error) for error in errors]})

            should_report = features.has(
                "organizations:release-health-check-metrics-report",
                organization)

            incr(
                "releasehealth.metrics.compare",
                tags={
                    "has_errors": str(bool(errors)),
                    "reported": str(should_report),
                    **tags
                },
                sample_rate=1.0,
            )

            if errors and should_report:
                tag_delta(errors, tags)
                # We heavily rely on Sentry's message sanitization to properly deduplicate this
                capture_message(
                    f"{fn_name} - Release health metrics mismatch: {errors[0]}"
                )
        except Exception:
            capture_exception()
            should_compare = False
            incr(
                "releasehealth.metrics.crashed",
                tags=tags,
                sample_rate=1.0,
            )

        return ret_val
예제 #10
0
def start_group_reprocessing(project_id,
                             group_id,
                             remaining_events,
                             max_events=None,
                             acting_user_id=None):
    from django.db import transaction

    with transaction.atomic():
        group = models.Group.objects.get(id=group_id)
        original_status = group.status
        if original_status == models.GroupStatus.REPROCESSING:
            # This is supposed to be a rather unlikely UI race when two people
            # click reprocessing in the UI at the same time.
            #
            # During reprocessing the button is greyed out.
            raise RuntimeError(
                "Cannot reprocess group that is currently being reprocessed")

        original_short_id = group.short_id
        group.status = models.GroupStatus.REPROCESSING
        # satisfy unique constraint of (project_id, short_id)
        # we manually tested that multiple groups with (project_id=1,
        # short_id=null) can exist in postgres
        group.short_id = None
        group.save()

        # Create a duplicate row that has the same attributes by nulling out
        # the primary key and saving
        group.pk = group.id = None
        new_group = group  # rename variable just to avoid confusion
        del group
        new_group.status = original_status
        new_group.short_id = original_short_id

        # this will be incremented by either the events that are
        # reprocessed, or handle_remaining_events
        #
        # XXX(markus): times_seen etc are unlikely to be correct ootb,
        # especially if handle_remaining_events is used a lot.
        new_group.times_seen = 0

        new_group.save()

        # This migrates all models that are associated with a group but not
        # directly with an event, i.e. everything but event attachments and user
        # reports. Those other updates are run per-event (in
        # post-process-forwarder) to not cause too much load on pg.
        for model in GROUP_MODELS_TO_MIGRATE:
            model.objects.filter(group_id=group_id).update(
                group_id=new_group.id)

    # Get event counts of issue (for all environments etc). This was copypasted
    # and simplified from groupserializer.
    event_count = sync_count = snuba.aliased_query(
        aggregations=[["count()", "", "times_seen"]],  # select
        dataset=snuba.Dataset.Events,  # from
        conditions=[["group_id", "=", group_id],
                    ["project_id", "=", project_id]],  # where
        referrer="reprocessing2.start_group_reprocessing",
    )["data"][0]["times_seen"]

    sentry_sdk.set_extra("event_count", event_count)

    if max_events is not None:
        event_count = min(max_events, event_count)

    # Create activity on *old* group as that will serve the landing page for our
    # reprocessing status
    #
    # Later the activity is migrated to the new group where it is used to serve
    # the success message.
    new_activity = models.Activity.objects.create(
        type=models.Activity.REPROCESS,
        project=new_group.project,
        ident=str(group_id),
        group_id=group_id,
        user_id=acting_user_id,
        data={
            "eventCount": event_count,
            "oldGroupId": group_id,
            "newGroupId": new_group.id
        },
    )

    # New Activity Timestamp
    date_created = new_activity.datetime

    client = _get_sync_redis_client()
    client.setex(_get_sync_counter_key(group_id),
                 settings.SENTRY_REPROCESSING_SYNC_TTL, sync_count)
    client.setex(
        _get_info_reprocessed_key(group_id),
        settings.SENTRY_REPROCESSING_SYNC_TTL,
        json.dumps({
            "dateCreated": date_created,
            "syncCount": sync_count,
            "totalEvents": event_count
        }),
    )

    return new_group.id