def logger(txt, err=False, module="", obj=None): if (err): sentry_sdk.set_tag('Module', module) sentry_sdk.set_extra(module, obj) sentry_sdk.capture_exception(txt) if (current_app.config["DEBUG"]): print(txt)
def __init__(self, body): self.body = body self.event = DataChangeEvent.parse_raw(self.body) if isinstance(self.body, (bytes, str)) else DataChangeEvent.parse_obj(self.body) if self.event.metadata.user and self.event.metadata.user.uid: access_ctx.set(Access( token=AccessToken( iss='int', iat=0, nbf=0, exp=0, sub=self.event.metadata.user.uid, ten=self.event.tenant_id, aud=self.event.metadata.user.scopes, rls=self.event.metadata.user.roles, jti='int', crt=False, ), )) self.is_new_orm_obj = False self.span = span_ctx.get() self.span.set_tag('exchange', self.exchange) self.span.set_tag('queue', self.queue) self.span.set_tag('orm_model', self.orm_model) self.span.set_tag('data_op', self.event.data_op) self.span.set_data('body', self.body) set_extra('body', self.body)
def orm_obj(self) -> TDjangoModel: if not self.__orm_obj: data = json.loads(self.body) if isinstance(self.body, (bytes, str)) else self.body query = models.Q(id=data.get('id')) if self.is_tenant_bound: query &= models.Q(tenant_id=self.event.tenant_id) self.__orm_obj = self.orm_model.objects.get(query) set_extra('orm_obj', self.__orm_obj) return self.__orm_obj
async def __call__(self, request: Request, scopes: SecurityScopes = None) -> Optional[Access]: try: token = await super().__call__(request) except HTTPException as error: if error.status_code == 403: raise AuthError from error raise if not token: return current_access = Access( token=AccessToken(**self.decode_token(token)), ) set_extra('access.token.aud', current_access.token.aud) set_user({ 'id': current_access.user_id, 'tenant_id': current_access.tenant_id, }) if scopes: audiences = current_access.token.has_audiences(scopes.scopes) if not audiences: raise HTTPException( status_code=403, detail=Error( type='JWTClaimsError', code='required_audience_missing', message= 'The required scope is not included in the given token.', detail=scopes.scopes, )) aud_scopes = [ AccessScope.from_str(audience) for audience in audiences ] current_access.scopes = aud_scopes current_access.scope = aud_scopes[0] set_extra('access.scopes', aud_scopes) access.set(current_access) return current_access
def run_comparison( fn_name: str, metrics_fn: Callable[..., Any], should_compare: bool, rollup: Optional[int], organization: Optional[Organization], schema: Optional[Schema], function_args: Tuple[Any], sessions_result: Any, sessions_time: datetime, **kwargs, ) -> None: if rollup is None: rollup = 0 # force exact date comparison if not specified tags = {"method": fn_name, "rollup": str(rollup)} set_tag("releasehealth.duplex.rollup", str(rollup)) set_tag("releasehealth.duplex.method", fn_name) set_tag("releasehealth.duplex.org_id", str(getattr(organization, "id"))) set_context( "release-health-duplex-sessions", { "sessions": sessions_result, }, ) try: delay = (datetime.now(pytz.utc) - sessions_time).total_seconds() set_extra("delay", delay) timing("releasehealth.metrics.delay", delay) # We read from the metrics source even if there is no need to compare. with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0): metrics_val = metrics_fn(*function_args) incr( "releasehealth.metrics.check_should_compare", tags={"should_compare": str(should_compare), **tags}, sample_rate=1.0, ) if not should_compare: return copy = deepcopy(sessions_result) set_context("release-health-duplex-metrics", {"metrics": metrics_val}) with timer("releasehealth.results-diff.duration", tags=tags, sample_rate=1.0): errors = compare_results(copy, metrics_val, rollup, None, schema) set_context("release-health-duplex-errors", {"errors": [str(error) for error in errors]}) should_report = features.has( "organizations:release-health-check-metrics-report", organization ) incr( "releasehealth.metrics.compare", tags={"has_errors": str(bool(errors)), "reported": str(should_report), **tags}, sample_rate=1.0, ) if errors and should_report: tag_delta(errors, tags) # We heavily rely on Sentry's message sanitization to properly deduplicate this capture_message(f"{fn_name} - Release health metrics mismatch: {errors[0]}") except Exception: capture_exception() should_compare = False incr( "releasehealth.metrics.crashed", tags=tags, sample_rate=1.0, )
def _load_event( message: Message, projects: Mapping[int, Project] ) -> Optional[Tuple[Any, Callable[[str], None]]]: """ Perform some initial filtering and deserialize the message payload. If the event should be stored, the deserialized payload is returned along with a function that can be called with the event's storage key to resume processing after the event has been persisted and is available to be read by other processing components. """ payload = message["payload"] start_time = float(message["start_time"]) event_id = message["event_id"] project_id = int(message["project_id"]) remote_addr = message.get("remote_addr") attachments = message.get("attachments") or () sentry_sdk.set_extra("event_id", event_id) sentry_sdk.set_extra("len_attachments", len(attachments)) if project_id == settings.SENTRY_PROJECT: metrics.incr("internal.captured.ingest_consumer.unparsed") # check that we haven't already processed this event (a previous instance of the forwarder # died before it could commit the event queue offset) # # XXX(markus): I believe this code is extremely broken: # # * it practically uses memcached in prod which has no consistency # guarantees (no idea how we don't run into issues there) # # * a TTL of 1h basically doesn't guarantee any deduplication at all. It # just guarantees a good error message... for one hour. # # This code has been ripped from the old python store endpoint. We're # keeping it around because it does provide some protection against # reprocessing good events if a single consumer is in a restart loop. deduplication_key = f"ev:{project_id}:{event_id}" if cache.get(deduplication_key) is not None: logger.warning( "pre-process-forwarder detected a duplicated event" " with id:%s for project:%s.", event_id, project_id, ) return # message already processed do not reprocess if killswitch_matches_context( "store.load-shed-pipeline-projects", { "project_id": project_id, "event_id": event_id, "has_attachments": bool(attachments), }, ): # This killswitch is for the worst of scenarios and should probably not # cause additional load on our logging infrastructure return try: project = projects[project_id] except KeyError: logger.error("Project for ingested event does not exist: %s", project_id) return # Parse the JSON payload. This is required to compute the cache key and # call process_event. The payload will be put into Kafka raw, to avoid # serializing it again. # XXX: Do not use CanonicalKeyDict here. This may break preprocess_event # which assumes that data passed in is a raw dictionary. data = json.loads(payload) if project_id == settings.SENTRY_PROJECT: metrics.incr( "internal.captured.ingest_consumer.parsed", tags={"event_type": data.get("type") or "null"}, ) if killswitch_matches_context( "store.load-shed-parsed-pipeline-projects", { "organization_id": project.organization_id, "project_id": project.id, "event_type": data.get("type") or "null", "has_attachments": bool(attachments), "event_id": event_id, }, ): return def dispatch_task(cache_key: str) -> None: if attachments: with sentry_sdk.start_span(op="ingest_consumer.set_attachment_cache"): attachment_objects = [ CachedAttachment(type=attachment.pop("attachment_type"), **attachment) for attachment in attachments ] attachment_cache.set( cache_key, attachments=attachment_objects, timeout=CACHE_TIMEOUT ) # Preprocess this event, which spawns either process_event or # save_event. Pass data explicitly to avoid fetching it again from the # cache. with sentry_sdk.start_span(op="ingest_consumer.process_event.preprocess_event"): preprocess_event( cache_key=cache_key, data=data, start_time=start_time, event_id=event_id, project=project, ) # remember for an 1 hour that we saved this event (deduplication protection) cache.set(deduplication_key, "", CACHE_TIMEOUT) # emit event_accepted once everything is done event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return data, dispatch_task
def _set_request_info(): sentry_sdk.set_extra( 'Endpoint', str(request.url_rule.endpoint) if request.url_rule else None) sentry_sdk.set_extra('Request ID', request.id) sentry_sdk.set_tag('locale', set_best_lang())
def orm_obj(self, value): self.__orm_obj = value set_extra('orm_obj', self.__orm_obj)
def _dispatch_call_inner( self, fn_name: str, should_compare: Union[bool, Callable[[Any], bool]], rollup: Optional[int], organization: Optional[Organization], schema: Optional[Schema], *args: Any, ) -> ReleaseHealthResult: if rollup is None: rollup = 0 # force exact date comparison if not specified sessions_fn = getattr(self.sessions, fn_name) set_tag("releasehealth.duplex.rollup", str(rollup)) set_tag("releasehealth.duplex.method", fn_name) set_tag("releasehealth.duplex.org_id", str(getattr(organization, "id"))) set_extra("function_args", args) # Make sure we always know all function args tags = {"method": fn_name, "rollup": str(rollup)} with timer("releasehealth.sessions.duration", tags=tags, sample_rate=1.0): ret_val = sessions_fn(*args) if organization is None or not features.has( "organizations:release-health-check-metrics", organization): return ret_val # cannot check feature without organization set_context( "release-health-duplex-sessions", { "sessions": ret_val, }, ) try: # We read from the metrics source even if there is no need to compare. metrics_fn = getattr(self.metrics, fn_name) with timer("releasehealth.metrics.duration", tags=tags, sample_rate=1.0): metrics_val = metrics_fn(*args) if not isinstance(should_compare, bool): # should compare depends on the session result # evaluate it now should_compare = should_compare(ret_val) incr( "releasehealth.metrics.check_should_compare", tags={ "should_compare": str(should_compare), **tags }, sample_rate=1.0, ) if not should_compare: return ret_val copy = deepcopy(ret_val) set_context("release-health-duplex-metrics", {"metrics": metrics_val}) with timer("releasehealth.results-diff.duration", tags=tags, sample_rate=1.0): errors = compare_results(copy, metrics_val, rollup, None, schema) set_context("release-health-duplex-errors", {"errors": [str(error) for error in errors]}) should_report = features.has( "organizations:release-health-check-metrics-report", organization) incr( "releasehealth.metrics.compare", tags={ "has_errors": str(bool(errors)), "reported": str(should_report), **tags }, sample_rate=1.0, ) if errors and should_report: tag_delta(errors, tags) # We heavily rely on Sentry's message sanitization to properly deduplicate this capture_message( f"{fn_name} - Release health metrics mismatch: {errors[0]}" ) except Exception: capture_exception() should_compare = False incr( "releasehealth.metrics.crashed", tags=tags, sample_rate=1.0, ) return ret_val
def start_group_reprocessing(project_id, group_id, remaining_events, max_events=None, acting_user_id=None): from django.db import transaction with transaction.atomic(): group = models.Group.objects.get(id=group_id) original_status = group.status if original_status == models.GroupStatus.REPROCESSING: # This is supposed to be a rather unlikely UI race when two people # click reprocessing in the UI at the same time. # # During reprocessing the button is greyed out. raise RuntimeError( "Cannot reprocess group that is currently being reprocessed") original_short_id = group.short_id group.status = models.GroupStatus.REPROCESSING # satisfy unique constraint of (project_id, short_id) # we manually tested that multiple groups with (project_id=1, # short_id=null) can exist in postgres group.short_id = None group.save() # Create a duplicate row that has the same attributes by nulling out # the primary key and saving group.pk = group.id = None new_group = group # rename variable just to avoid confusion del group new_group.status = original_status new_group.short_id = original_short_id # this will be incremented by either the events that are # reprocessed, or handle_remaining_events # # XXX(markus): times_seen etc are unlikely to be correct ootb, # especially if handle_remaining_events is used a lot. new_group.times_seen = 0 new_group.save() # This migrates all models that are associated with a group but not # directly with an event, i.e. everything but event attachments and user # reports. Those other updates are run per-event (in # post-process-forwarder) to not cause too much load on pg. for model in GROUP_MODELS_TO_MIGRATE: model.objects.filter(group_id=group_id).update( group_id=new_group.id) # Get event counts of issue (for all environments etc). This was copypasted # and simplified from groupserializer. event_count = sync_count = snuba.aliased_query( aggregations=[["count()", "", "times_seen"]], # select dataset=snuba.Dataset.Events, # from conditions=[["group_id", "=", group_id], ["project_id", "=", project_id]], # where referrer="reprocessing2.start_group_reprocessing", )["data"][0]["times_seen"] sentry_sdk.set_extra("event_count", event_count) if max_events is not None: event_count = min(max_events, event_count) # Create activity on *old* group as that will serve the landing page for our # reprocessing status # # Later the activity is migrated to the new group where it is used to serve # the success message. new_activity = models.Activity.objects.create( type=models.Activity.REPROCESS, project=new_group.project, ident=str(group_id), group_id=group_id, user_id=acting_user_id, data={ "eventCount": event_count, "oldGroupId": group_id, "newGroupId": new_group.id }, ) # New Activity Timestamp date_created = new_activity.datetime client = _get_sync_redis_client() client.setex(_get_sync_counter_key(group_id), settings.SENTRY_REPROCESSING_SYNC_TTL, sync_count) client.setex( _get_info_reprocessed_key(group_id), settings.SENTRY_REPROCESSING_SYNC_TTL, json.dumps({ "dateCreated": date_created, "syncCount": sync_count, "totalEvents": event_count }), ) return new_group.id