def hash_user_identifier(identifier): identifier = force_text(identifier, errors="replace") return sum(map(ord, identifier))
def build_project_breakdown_series(reports): Key = namedtuple("Key", "label url color data") def get_legend_data(report): filtered, rate_limited = report.usage_summary return { "events": sum(sum(value) for timestamp, value in report.series), "filtered": filtered, "rate_limited": rate_limited, } # Find the reports with the most total events. (The number of reports to # keep is the same as the number of colors available to use in the legend.) instances = map( operator.itemgetter(0), sorted( reports.items(), key=lambda instance__report: sum( sum(values) for timestamp, values in instance__report[1][0] ), reverse=True, ), )[: len(colors)] # Starting building the list of items to include in the report chart. This # is a list of [Key, Report] pairs, in *ascending* order of the total sum # of values in the series. (This is so when we render the series, the # largest color blocks are at the bottom and it feels appropriately # weighted.) selections = map( lambda instance__color: ( Key( instance__color[0].slug, instance__color[0].get_absolute_url(), instance__color[1], get_legend_data(reports[instance__color[0]]), ), reports[instance__color[0]], ), zip(instances, colors), )[::-1] # Collect any reports that weren't in the selection set, merge them # together and add it at the top (front) of the stack. overflow = set(reports) - set(instances) if overflow: overflow_report = reduce(merge_reports, [reports[instance] for instance in overflow]) selections.insert( 0, (Key("Other", None, "#f2f0fa", get_legend_data(overflow_report)), overflow_report) ) def summarize(key, points): total = sum(points) return [(key, total)] if total else [] # Collect all of the independent series into a single series to make it # easier to render, resulting in a series where each value is a sequence of # (key, count) pairs. series = reduce( merge_series, [series_map(functools.partial(summarize, key), report[0]) for key, report in selections], ) legend = [key for key, value in reversed(selections)] return { "points": [(to_datetime(timestamp), value) for timestamp, value in series], "maximum": max(sum(count for key, count in value) for timestamp, value in series), "legend": { "rows": legend, "total": Key("Total", None, None, reduce(merge_mappings, [key.data for key in legend])), }, }
def bulk_raw_query(snuba_param_list, referrer=None): headers = {} if referrer: headers["referer"] = referrer query_param_list = map(_prepare_query_params, snuba_param_list) def snuba_query(params): query_params, forward, reverse, thread_hub = params try: with timer("snuba_query"): referrer = headers.get("referer", "<unknown>") if SNUBA_INFO: logger.info("{}.body: {}".format(referrer, json.dumps(query_params))) query_params["debug"] = True body = json.dumps(query_params) with thread_hub.start_span( op="snuba", description=u"query {}".format(referrer) ) as span: span.set_tag("referrer", referrer) for param_key, param_data in six.iteritems(query_params): span.set_data(param_key, param_data) return ( _snuba_pool.urlopen("POST", "/query", body=body, headers=headers), forward, reverse, ) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) with sentry_sdk.start_span( op="start_snuba_query", description=u"running {} snuba queries".format(len(snuba_param_list)), ) as span: span.set_tag("referrer", headers.get("referer", "<unknown>")) if len(snuba_param_list) > 1: query_results = list( _query_thread_pool.map( snuba_query, [params + (Hub(Hub.current),) for params in query_param_list] ) ) else: # No need to submit to the thread pool if we're just performing a # single query query_results = [snuba_query(query_param_list[0] + (Hub(Hub.current),))] results = [] for response, _, reverse in query_results: try: body = json.loads(response.data) if SNUBA_INFO: if "sql" in body: logger.info( "{}.sql: {}".format(headers.get("referer", "<unknown>"), body["sql"]) ) if "error" in body: logger.info( "{}.err: {}".format(headers.get("referer", "<unknown>"), body["error"]) ) except ValueError: if response.status != 200: logger.error("snuba.query.invalid-json") raise SnubaError("Failed to parse snuba error response") raise UnexpectedResponseError( u"Could not decode JSON response: {}".format(response.data) ) if response.status != 200: if body.get("error"): error = body["error"] if response.status == 429: raise RateLimitExceeded(error["message"]) elif error["type"] == "schema": raise SchemaValidationError(error["message"]) elif error["type"] == "clickhouse": raise clickhouse_error_codes_map.get(error["code"], QueryExecutionError)( error["message"] ) else: raise SnubaError(error["message"]) else: raise SnubaError(u"HTTP {}".format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body["data"] = [reverse(d) for d in body["data"]] results.append(body) return results
def make_frequency_table_keys(self, model, rollup, timestamp, key, environment_id): prefix = self.make_key(model, rollup, timestamp, key, environment_id) return map(operator.methodcaller("format", prefix), ("{}:i", "{}:e"))
def get(self, request, organization): """ List an Organization's Issues ````````````````````````````` Return a list of issues (groups) bound to an organization. All parameters are supplied as query string parameters. A default query of ``is:unresolved`` is applied. To return results with other statuses send an new query value (i.e. ``?query=`` for all results). The ``groupStatsPeriod`` parameter can be used to select the timeline stats which should be present. Possible values are: '' (disable), '24h', '14d' The ``statsPeriod`` parameter can be used to select a date window starting from now. Ex. ``14d``. The ``start`` and ``end`` parameters can be used to select an absolute date period to fetch issues from. :qparam string statsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string groupStatsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string start: Beginning date. You must also provide ``end``. :qparam string end: End date. You must also provide ``start``. :qparam bool shortIdLookup: if this is set to true then short IDs are looked up by this function as well. This can cause the return value of the function to return an event issue of a different project which is why this is an opt-in. Set to `1` to enable. :qparam querystring query: an optional Sentry structured search query. If not provided an implied ``"is:unresolved"`` is assumed.) :pparam string organization_slug: the slug of the organization the issues belong to. :auth: required """ stats_period = request.GET.get("groupStatsPeriod") try: start, end = get_date_range_from_params(request.GET) except InvalidParams as e: raise ParseError(detail=six.text_type(e)) has_dynamic_issue_counts = features.has( "organizations:dynamic-issue-counts", organization, actor=request.user) if stats_period not in (None, "", "24h", "14d", "auto"): return Response({"detail": ERR_INVALID_STATS_PERIOD}, status=400) elif stats_period is None: # default if no dynamic-issue-counts stats_period = "24h" elif stats_period == "": # disable stats stats_period = None if stats_period == "auto": stats_period_start = start stats_period_end = end else: stats_period_start = None stats_period_end = None environments = self.get_environments(request, organization) serializer = functools.partial( StreamGroupSerializerSnuba, environment_ids=[env.id for env in environments], stats_period=stats_period, stats_period_start=stats_period_start, stats_period_end=stats_period_end, ) projects = self.get_projects(request, organization) project_ids = [p.id for p in projects] if not projects: return Response([]) if len(projects) > 1 and not features.has("organizations:global-views", organization, actor=request.user): return Response( { "detail": "You do not have the multi project stream feature enabled" }, status=400) # we ignore date range for both short id and event ids query = request.GET.get("query", "").strip() if query: # check to see if we've got an event ID event_id = normalize_event_id(query) if event_id: # For a direct hit lookup we want to use any passed project ids # (we've already checked permissions on these) plus any other # projects that the user is a member of. This gives us a better # chance of returning the correct result, even if the wrong # project is selected. direct_hit_projects = set(project_ids) | set( [project.id for project in request.access.projects]) groups = list( Group.objects.filter_by_event_id(direct_hit_projects, event_id)) if len(groups) == 1: response = Response( serialize(groups, request.user, serializer(matching_event_id=event_id))) response["X-Sentry-Direct-Hit"] = "1" return response if groups: return Response( serialize(groups, request.user, serializer())) group = get_by_short_id(organization.id, request.GET.get("shortIdLookup"), query) if group is not None: # check all projects user has access to if request.access.has_project_access(group.project): response = Response( serialize([group], request.user, serializer())) response["X-Sentry-Direct-Hit"] = "1" return response # If group ids specified, just ignore any query components try: group_ids = set(map(int, request.GET.getlist("group"))) except ValueError: return Response({"detail": "Group ids must be integers"}, status=400) if group_ids: groups = list( Group.objects.filter(id__in=group_ids, project_id__in=project_ids)) if any(g for g in groups if not request.access.has_project_access(g.project)): raise PermissionDenied return Response(serialize(groups, request.user, serializer())) try: cursor_result, query_kwargs = self._search( request, organization, projects, environments, { "count_hits": True, "date_to": end, "date_from": start }, ) except (ValidationError, discover.InvalidSearchQuery) as exc: return Response({"detail": six.text_type(exc)}, status=400) results = list(cursor_result) if has_dynamic_issue_counts: context = serialize( results, request.user, serializer( start=start, end=end, search_filters=query_kwargs["search_filters"] if "search_filters" in query_kwargs else None, has_dynamic_issue_counts=True, ), ) else: context = serialize(results, request.user, serializer()) # HACK: remove auto resolved entries # TODO: We should try to integrate this into the search backend, since # this can cause us to arbitrarily return fewer results than requested. status = [ search_filter for search_filter in query_kwargs.get("search_filters", []) if search_filter.key.name == "status" ] if status and status[0].value.raw_value == GroupStatus.UNRESOLVED: context = [r for r in context if r["status"] == "unresolved"] response = Response(context) self.add_cursor_headers(request, response, cursor_result) # TODO(jess): add metrics that are similar to project endpoint here return response
def validate(self, data): organization = self.context["organization"] query_info = data["query_info"] # Validate the project field, if provided # A PermissionDenied error will be raised in `get_projects_by_id` if the request is invalid project_query = query_info.get("project") if project_query: get_projects_by_id = self.context["get_projects_by_id"] # Coerce the query into a set if isinstance(project_query, list): projects = get_projects_by_id(set(map(int, project_query))) else: projects = get_projects_by_id({int(project_query)}) query_info["project"] = [project.id for project in projects] # Discover Pre-processing if data["query_type"] == ExportQueryType.DISCOVER_STR: # coerce the fields into a list as needed base_fields = query_info.get("field", []) if not isinstance(base_fields, list): base_fields = [base_fields] equations, fields = categorize_columns(base_fields) if len(base_fields) > MAX_FIELDS: detail = f"You can export up to {MAX_FIELDS} fields at a time. Please delete some and try again." raise serializers.ValidationError(detail) elif len(base_fields) == 0: raise serializers.ValidationError( "at least one field is required to export") if "query" not in query_info: detail = "query is a required to export, please pass an empty string if you don't want to set one" raise serializers.ValidationError(detail) query_info["field"] = fields query_info["equations"] = equations if not query_info.get("project"): projects = self.context["get_projects"]() query_info["project"] = [project.id for project in projects] # make sure to fix the export start/end times to ensure consistent results try: start, end = get_date_range_from_params(query_info) except InvalidParams as e: sentry_sdk.set_tag("query.error_reason", "Invalid date params") raise serializers.ValidationError(str(e)) if "statsPeriod" in query_info: del query_info["statsPeriod"] if "statsPeriodStart" in query_info: del query_info["statsPeriodStart"] if "statsPeriodEnd" in query_info: del query_info["statsPeriodEnd"] query_info["start"] = start.isoformat() query_info["end"] = end.isoformat() query_info["use_snql"] = features.has( "organizations:discover-use-snql", organization) # validate the query string by trying to parse it processor = DiscoverProcessor( discover_query=query_info, organization_id=organization.id, ) try: snuba_filter = get_filter(query_info["query"], processor.params) if len(equations) > 0: resolved_equations, _, _ = resolve_equation_list( equations, fields) else: resolved_equations = [] resolve_field_list( fields.copy(), snuba_filter, auto_fields=True, auto_aggregations=True, resolved_equations=resolved_equations, ) except InvalidSearchQuery as err: raise serializers.ValidationError(str(err)) return data
def get(self, request, organization): """ Get the stats on an Organization's Issues ````````````````````````````` Return a list of issues (groups) with the requested stats. All parameters are supplied as query string parameters. :qparam list groups: A list of group ids :qparam list expand: an optional list of strings to opt in to additional data. Supports `inbox` :qparam list collapse: an optional list of strings to opt out of certain pieces of data. Supports `stats`, `lifetime`, `filtered`, and `base` The ``groupStatsPeriod`` parameter can be used to select the timeline stats which should be present. Possible values are: '' (disable), '24h', '14d' The ``statsPeriod`` parameter can be used to select a date window starting from now. Ex. ``14d``. The ``start`` and ``end`` parameters can be used to select an absolute date period to fetch issues from. :qparam string statsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string groupStatsPeriod: an optional stat period (can be one of ``"24h"``, ``"14d"``, and ``""``). :qparam string start: Beginning date. You must also provide ``end``. :qparam string end: End date. You must also provide ``start``. """ stats_period = request.GET.get("groupStatsPeriod") try: start, end = get_date_range_from_params(request.GET) except InvalidParams as e: raise ParseError(detail=six.text_type(e)) expand = request.GET.getlist("expand", []) collapse = request.GET.getlist("collapse", ["base"]) has_inbox = features.has("organizations:inbox", organization, actor=request.user) projects = self.get_projects(request, organization) project_ids = [p.id for p in projects] try: group_ids = set(map(int, request.GET.getlist("groups"))) except ValueError: raise ParseError(detail="Group ids must be integers") if not group_ids: raise ParseError( detail= "You should include `groups` with your request. (i.e. groups=1,2,3)" ) else: groups = list( Group.objects.filter(id__in=group_ids, project_id__in=project_ids)) if not groups: raise ParseError(detail="No matching groups found") elif len(groups) > 25: raise ParseError(detail="Too many groups requested.") elif any(g for g in groups if not request.access.has_project_access(g.project)): raise PermissionDenied if stats_period not in (None, "", "24h", "14d", "auto"): raise ParseError(detail=ERR_INVALID_STATS_PERIOD) stats_period, stats_period_start, stats_period_end = calculate_stats_period( stats_period, start, end) environments = self.get_environments(request, organization) query_kwargs = build_query_params_from_request(request, organization, projects, environments) context = serialize( groups, request.user, StreamGroupSerializerSnuba( environment_ids=[env.id for env in environments], stats_period=stats_period, stats_period_start=stats_period_start, stats_period_end=stats_period_end, collapse=collapse, expand=expand, has_inbox=has_inbox, start=start, end=end, search_filters=query_kwargs["search_filters"] if "search_filters" in query_kwargs else None, ), ) response = Response(context) return response
def __str__(self): return ".".join(map(force_text, self))
def post(self, request, organization): """ Create a new asynchronous file export task, and email user upon completion, """ # Ensure new data-export features are enabled if not features.has("organizations:data-export", organization): return Response(status=404) # Get environment_id and limit if available try: environment_id = self._get_environment_id_from_request( request, organization.id) except Environment.DoesNotExist as error: return Response(error, status=400) limit = request.data.get("limit") # Validate the data export payload serializer = DataExportQuerySerializer(data=request.data) if not serializer.is_valid(): return Response(serializer.errors, status=400) data = serializer.validated_data # Validate the project field, if provided # A PermissionDenied error will be raised in `_get_projects_by_id` if the request is invalid project_query = data["query_info"].get("project") if project_query: # Coerce the query into a set if isinstance(project_query, list): projects = self._get_projects_by_id( set(map(int, project_query)), request, organization) else: projects = self._get_projects_by_id({int(project_query)}, request, organization) data["query_info"]["project"] = [ project.id for project in projects ] # Discover Pre-processing if data["query_type"] == ExportQueryType.DISCOVER_STR: if not features.has("organizations:discover-basic", organization, actor=request.user): return Response(status=403) if "project" not in data["query_info"]: projects = self.get_projects(request, organization) data["query_info"]["project"] = [ project.id for project in projects ] try: # If this user has sent a sent a request with the same payload and organization, # we return them the latest one that is NOT complete (i.e. don't start another) query_type = ExportQueryType.from_str(data["query_type"]) data_export, created = ExportedData.objects.get_or_create( organization=organization, user=request.user, query_type=query_type, query_info=data["query_info"], date_finished=None, ) status = 200 if created: metrics.incr("dataexport.enqueue", tags={"query_type": data["query_type"]}, sample_rate=1.0) assemble_download.delay(data_export_id=data_export.id, export_limit=limit, environment_id=environment_id) status = 201 except ValidationError as e: # This will handle invalid JSON requests metrics.incr("dataexport.invalid", tags={"query_type": data.get("query_type")}, sample_rate=1.0) return Response({"detail": six.text_type(e)}, status=400) return Response(serialize(data_export, request.user), status=status)
def _start_service(client, name, containers, project, fast=False, always_start=False): from django.conf import settings import docker options = containers[name] # HACK(mattrobenolt): special handle snuba backend because it needs to # handle different values based on the eventstream backend # For snuba, we can't run the full suite of devserver, but can only # run the api. if name == "snuba" and "snuba" in settings.SENTRY_EVENTSTREAM: options["environment"].pop("DEFAULT_BROKERS", None) options["command"] = ["devserver", "--no-workers"] for key, value in options["environment"].items(): options["environment"][key] = value.format(containers=containers) pull = options.pop("pull", False) if not fast: if pull: click.secho("> Pulling image '%s'" % options["image"], err=True, fg="green") client.images.pull(options["image"]) else: # We want make sure to pull everything on the first time, # (the image doesn't exist), regardless of pull=True. try: client.images.get(options["image"]) except docker.errors.NotFound: click.secho("> Pulling image '%s'" % options["image"], err=True, fg="green") client.images.pull(options["image"]) for mount in options.get("volumes", {}).keys(): if "/" not in mount: get_or_create(client, "volume", project + "_" + mount) options["volumes"][project + "_" + mount] = options["volumes"].pop(mount) listening = "" if options["ports"]: listening = "(listening: %s)" % ", ".join(map(text_type, options["ports"].values())) # If a service is associated with the devserver, then do not run the created container. # This was mainly added since it was not desirable for reverse_proxy to occupy port 8000 on the # first "devservices up". # See https://github.com/getsentry/sentry/pull/18362#issuecomment-616785458 with_devserver = options.pop("with_devserver", False) # Two things call _start_service. # devservices up, and devservices attach. # Containers that should be started on-demand with devserver # should ONLY be started via the latter, which sets `always_start`. if with_devserver and not always_start: click.secho( "> Not starting container '%s' because it should be started on-demand with devserver." % options["name"], fg="yellow", ) # XXX: if always_start=False, do not expect to have a container returned 100% of the time. return None container = None try: container = client.containers.get(options["name"]) except docker.errors.NotFound: pass if container is not None: # devservices which are marked with pull True will need their containers # to be recreated with the freshly pulled image. should_reuse_container = not pull # Except if the container is started as part of devserver we should reuse it. # Or, if we're in fast mode (devservices up --fast) if with_devserver or fast: should_reuse_container = True if should_reuse_container: click.secho( "> Starting EXISTING container '%s' %s" % (container.name, listening), err=True, fg="yellow", ) # Note that if the container is already running, this will noop. # This makes repeated `devservices up` quite fast. container.start() return container click.secho("> Stopping container '%s'" % container.name, err=True, fg="yellow") container.stop() click.secho("> Removing container '%s'" % container.name, err=True, fg="yellow") container.remove() click.secho("> Creating container '%s'" % options["name"], err=True, fg="yellow") container = client.containers.create(**options) click.secho("> Starting container '%s' %s" % (container.name, listening), err=True, fg="yellow") container.start() return container
def get_function_layer_arns(function): layers = function.get("Layers", []) return map(_get_arn_from_layer, layers)
def encode_row(row): return map(force_bytes, row)
from __future__ import absolute_import import six from sentry.utils.compat import map version = (0, 7, 28) __version__ = ".".join(map(six.text_type, version))
def resolve_field_list(fields, snuba_args, params=None, auto_fields=True): """ Expand a list of fields based on aliases and aggregate functions. Returns a dist of aggregations, selected_columns, and groupby that can be merged into the result of get_snuba_query_args() to build a more complete snuba query based on event search conventions. """ # If project.name is requested, get the project.id from Snuba so we # can use this to look up the name in Sentry if "project.name" in fields: fields.remove("project.name") if "project.id" not in fields: fields.append("project.id") aggregations = [] columns = [] groupby = [] for field in fields: column_additions, agg_additions = resolve_field(field, params) if column_additions: columns.extend(column_additions) if agg_additions: aggregations.extend(agg_additions) rollup = snuba_args.get("rollup") if not rollup and auto_fields: # Ensure fields we require to build a functioning interface # are present. We don't add fields when using a rollup as the additional fields # would be aggregated away. When there are aggregations # we use argMax to get the latest event/projectid so we can create links. # The `projectid` output name is not a typo, using `project_id` triggers # generates invalid queries. if not aggregations and "id" not in columns: columns.append("id") if not aggregations and "project.id" not in columns: columns.append("project.id") if aggregations and "latest_event" not in map(lambda a: a[-1], aggregations): aggregations.extend(deepcopy(FIELD_ALIASES["latest_event"]["aggregations"])) if aggregations and "project.id" not in columns: aggregations.append(["argMax", ["project.id", "timestamp"], "projectid"]) if rollup and columns and not aggregations: raise InvalidSearchQuery("You cannot use rollup without an aggregate field.") orderby = snuba_args.get("orderby") if orderby: orderby = resolve_orderby(orderby, columns, aggregations) # If aggregations are present all columns # need to be added to the group by so that the query is valid. if aggregations: groupby.extend(columns) return { "selected_columns": columns, "aggregations": aggregations, "groupby": groupby, "orderby": orderby, }
def __str__(self): return "".join(map(six.text_type, (self.key.name, self.operator, self.value.raw_value)))
def providers(self): # TODO: use feature flag in the future providers = filter(lambda x: x.has_stacktrace_linking, list(integrations.all())) return map(lambda x: x.key, providers)
def _get_subscriptions(self, item_list, user): """ Returns a mapping of group IDs to a two-tuple of (subscribed: bool, subscription: GroupSubscription or None) for the provided user and groups. """ if not item_list: return {} # Collect all of the projects to look up, and keep a set of groups that # are part of that project. (Note that the common -- but not only -- # case here is that all groups are part of the same project.) projects = defaultdict(set) for group in item_list: projects[group.project].add(group) # Fetch the options for each project -- we'll need this to identify if # a user has totally disabled workflow notifications for a project. # NOTE: This doesn't use `values_list` because that bypasses field # value decoding, so the `value` field would not be unpickled. options = { option.project_id: option.value for option in UserOption.objects.filter( Q(project__in=projects.keys()) | Q(project__isnull=True), user=user, key="workflow:notifications", ) } # If there is a subscription record associated with the group, we can # just use that to know if a user is subscribed or not, as long as # notifications aren't disabled for the project. subscriptions = { subscription.group_id: subscription for subscription in GroupSubscription.objects.filter( group__in=list( itertools.chain.from_iterable( map( lambda project__groups: project__groups[1] if not options.get(project__groups[0].id, options.get(None)) == UserOptionValue.no_conversations else [], projects.items(), ))), user=user, ) } # This is the user's default value for any projects that don't have # the option value specifically recorded. (The default # "participating_only" value is convention.) global_default_workflow_option = options.get( None, UserOptionValue.participating_only) results = {} for project, groups in projects.items(): project_default_workflow_option = options.get( project.id, global_default_workflow_option) for group in groups: subscription = subscriptions.get(group.id) if subscription is not None: results[group.id] = (subscription.is_active, subscription) else: results[group.id] = ( (project_default_workflow_option == UserOptionValue.all_conversations, None) if project_default_workflow_option != UserOptionValue.no_conversations else disabled) return results
def text_shingle(n, value): return map(u"".join, shingle(n, value))
def psycopg2_version(): import psycopg2 version = psycopg2.__version__.split()[0].split(".") return tuple(map(int, version))
# TODO(mattrobenolt): Autodiscover commands? list( map( lambda cmd: cli.add_command(import_string(cmd)), ( "sentry.runner.commands.backup.export", "sentry.runner.commands.backup.import_", "sentry.runner.commands.cleanup.cleanup", "sentry.runner.commands.config.config", "sentry.runner.commands.createuser.createuser", "sentry.runner.commands.devserver.devserver", "sentry.runner.commands.django.django", "sentry.runner.commands.exec.exec_", "sentry.runner.commands.files.files", "sentry.runner.commands.help.help", "sentry.runner.commands.init.init", "sentry.runner.commands.plugins.plugins", "sentry.runner.commands.queues.queues", "sentry.runner.commands.repair.repair", "sentry.runner.commands.run.run", "sentry.runner.commands.start.start", "sentry.runner.commands.tsdb.tsdb", "sentry.runner.commands.upgrade.upgrade", "sentry.runner.commands.permissions.permissions", "sentry.runner.commands.devservices.devservices", ), )) def make_django_command(name, django_command=None, help=None):
def get(self, request, organization): """ Retrieve Event Counts for an Organization ````````````````````````````````````````` .. caution:: This endpoint may change in the future without notice. Return a set of points representing a normalized timestamp and the number of events seen in the period. :pparam string organization_slug: the slug of the organization for which the stats should be retrieved. :qparam string stat: the name of the stat to query (``"received"``, ``"rejected"``, ``"blacklisted"``) :qparam timestamp since: a timestamp to set the start of the query in seconds since UNIX epoch. :qparam timestamp until: a timestamp to set the end of the query in seconds since UNIX epoch. :qparam string resolution: an explicit resolution to search for (one of ``10s``, ``1h``, and ``1d``) :auth: required """ group = request.GET.get("group", "organization") if group == "organization": keys = [organization.id] elif group == "project": team_list = Team.objects.get_for_user(organization=organization, user=request.user) project_ids = request.GET.getlist("projectID") if not project_ids: project_list = [] for team in team_list: project_list.extend( Project.objects.get_for_user(team=team, user=request.user)) else: project_list = Project.objects.filter(teams__in=team_list, id__in=project_ids) keys = list({p.id for p in project_list}) else: raise ValueError("Invalid group: %s" % group) if "id" in request.GET: id_filter_set = frozenset(map(int, request.GET.getlist("id"))) keys = [k for k in keys if k in id_filter_set] if not keys: return Response([]) stat_model = None stat = request.GET.get("stat", "received") query_kwargs = {} if stat == "received": if group == "project": stat_model = tsdb.models.project_total_received else: stat_model = tsdb.models.organization_total_received elif stat == "rejected": if group == "project": stat_model = tsdb.models.project_total_rejected else: stat_model = tsdb.models.organization_total_rejected elif stat == "blacklisted": if group == "project": stat_model = tsdb.models.project_total_blacklisted else: stat_model = tsdb.models.organization_total_blacklisted elif stat == "generated": if group == "project": stat_model = tsdb.models.project try: query_kwargs[ "environment_id"] = self._get_environment_id_from_request( request, organization.id) except Environment.DoesNotExist: raise ResourceDoesNotExist if stat_model is None: raise ValueError(f"Invalid group: {group}, stat: {stat}") data = tsdb.get_range(model=stat_model, keys=keys, **self._parse_args(request, **query_kwargs)) if group == "organization": data = data[organization.id] return Response(data)
def _find_hashes(project, hash_list): return map( lambda hash: GroupHash.objects.get_or_create(project=project, hash=hash)[0], hash_list)
def merge_frequencies(self, model, destination, sources, timestamp=None, environment_ids=None): environment_ids = list( (set(environment_ids) if environment_ids is not None else set()).union([None]) ) self.validate_arguments([model], environment_ids) if not self.enable_frequency_sketches: return rollups = [] for rollup, samples in self.rollups.items(): _, series = self.get_optimal_rollup_series( to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)), end=None, rollup=rollup, ) rollups.append((rollup, map(to_datetime, series))) for (cluster, durable), environment_ids in self.get_cluster_groups(environment_ids): exports = defaultdict(list) for source in sources: for rollup, series in rollups: for timestamp in series: keys = [] for environment_id in environment_ids: keys.extend( self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), source, environment_id ) ) arguments = ["EXPORT"] + list(self.DEFAULT_SKETCH_PARAMETERS) exports[source].extend([(CountMinScript, keys, arguments), ["DEL"] + keys]) try: responses = cluster.execute_commands(exports) except Exception: if durable: raise else: continue imports = [] for source, results in responses.items(): results = iter(results) for rollup, series in rollups: for timestamp in series: for environment_id, payload in zip(environment_ids, next(results).value): imports.append( ( CountMinScript, self.make_frequency_table_keys( model, rollup, to_timestamp(timestamp), destination, environment_id, ), ["IMPORT"] + list(self.DEFAULT_SKETCH_PARAMETERS) + [payload], ) ) next(results) # pop off the result of DEL try: cluster.execute_commands({destination: imports}) except Exception: if durable: raise
def browser(request, percy, live_server): window_size = request.config.getoption("window_size") window_width, window_height = map(int, window_size.split("x", 1)) driver_type = request.config.getoption("selenium_driver") headless = not request.config.getoption("no_headless") if driver_type == "chrome": options = webdriver.ChromeOptions() options.add_argument("no-sandbox") options.add_argument("disable-gpu") options.add_argument(u"window-size={}".format(window_size)) if headless: options.add_argument("headless") chrome_path = request.config.getoption("chrome_path") if chrome_path: options.binary_location = chrome_path chromedriver_path = request.config.getoption("chromedriver_path") chrome_args = {"options": options} if chromedriver_path: chrome_args["executable_path"] = chromedriver_path driver = start_chrome(**chrome_args) elif driver_type == "firefox": driver = webdriver.Firefox() elif driver_type == "phantomjs": phantomjs_path = request.config.getoption("phantomjs_path") if not phantomjs_path: phantomjs_path = os.path.join("node_modules", "phantomjs-prebuilt", "bin", "phantomjs") driver = webdriver.PhantomJS(executable_path=phantomjs_path) else: raise pytest.UsageError("--driver must be specified") driver.set_window_size(window_width, window_height) def fin(): # dump console log to stdout, will be shown when test fails for entry in driver.get_log("browser"): sys.stderr.write("[browser console] ") sys.stderr.write(repr(entry)) sys.stderr.write("\n") # Teardown Selenium. try: driver.quit() except Exception: pass request.node._driver = driver request.addfinalizer(fin) browser = Browser(driver, live_server, percy) browser.set_emulated_media([{ "name": "prefers-reduced-motion", "value": "reduce" }]) if hasattr(request, "cls"): request.cls.browser = browser request.node.browser = browser # bind webdriver to percy for snapshots percy.loader.webdriver = driver return driver
def fetch(self, timestamp, duration, organization, projects): assert all(project.organization_id == organization.id for project in projects) return map(functools.partial(self.build, timestamp, duration), projects)
def get_data( self, model, keys, start, end, rollup=None, environment_ids=None, aggregation="count()", group_on_model=True, group_on_time=False, conditions=None, ): """ Normalizes all the TSDB parameters and sends a query to snuba. `group_on_time`: whether to add a GROUP BY clause on the 'time' field. `group_on_model`: whether to add a GROUP BY clause on the primary model. """ # XXX: to counteract the hack in project_key_stats.py if model in [ TSDBModel.key_total_received, TSDBModel.key_total_blacklisted, TSDBModel.key_total_rejected, ]: keys = list(set(map(lambda x: int(x), keys))) # 10s is the only rollup under an hour that we support if rollup and rollup == 10 and model in self.lower_rollup_query_settings: model_query_settings = self.lower_rollup_query_settings.get(model) else: model_query_settings = self.model_query_settings.get(model) if model_query_settings is None: raise Exception(u"Unsupported TSDBModel: {}".format(model.name)) model_group = model_query_settings.groupby model_aggregate = model_query_settings.aggregate groupby = [] if group_on_model and model_group is not None: groupby.append(model_group) if group_on_time: groupby.append("time") if aggregation == "count()" and model_aggregate is not None: # Special case, because count has different semantics, we change: # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate` groupby.append(model_aggregate) model_aggregate = None columns = (model_query_settings.groupby, model_query_settings.aggregate) keys_map = dict(zip(columns, self.flatten_keys(keys))) keys_map = { k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None } if environment_ids is not None: keys_map["environment"] = environment_ids aggregations = [[aggregation, model_aggregate, "aggregate"]] # For historical compatibility with bucket-counted TSDB implementations # we grab the original bucketed series and add the rollup time to the # timestamp of the last bucket to get the end time. rollup, series = self.get_optimal_rollup_series(start, end, rollup) start = to_datetime(series[0]) end = to_datetime(series[-1] + rollup) limit = min(10000, int(len(keys) * ((end - start).total_seconds() / rollup))) conditions = conditions if conditions is not None else [] if model_query_settings.conditions is not None: conditions += deepcopy(model_query_settings.conditions) # copy because we modify the conditions in snuba.query if keys: result = snuba.query( dataset=model_query_settings.dataset, start=start, end=end, groupby=groupby, conditions=conditions, filter_keys=keys_map, aggregations=aggregations, rollup=rollup, limit=limit, referrer="tsdb-modelid:{}".format(model.value), is_grouprelease=( model == TSDBModel.frequent_releases_by_group), ) else: result = {} if group_on_time: keys_map["time"] = series self.zerofill(result, groupby, keys_map) self.trim(result, groupby, keys) return result
def to_python(self, value): if not value: value = [] if isinstance(value, str): value = json.loads(value) return map(self.of.to_python, value)
def resolve_field_list(fields, snuba_args, params=None, auto_fields=True): """ Expand a list of fields based on aliases and aggregate functions. Returns a dist of aggregations, selected_columns, and groupby that can be merged into the result of get_snuba_query_args() to build a more complete snuba query based on event search conventions. """ aggregations = [] columns = [] groupby = [] project_key = "" # Which column to map to project names project_column = "project_id" # If project is requested, we need to map ids to their names since snuba only has ids if "project" in fields: fields.remove("project") project_key = "project" # since project.name is more specific, if both are included use project.name instead of project if PROJECT_NAME_ALIAS in fields: fields.remove(PROJECT_NAME_ALIAS) project_key = PROJECT_NAME_ALIAS if project_key: if "project.id" not in fields: fields.append("project.id") for field in fields: column_additions, agg_additions = resolve_field(field, params) if column_additions: columns.extend(column_additions) if agg_additions: aggregations.extend(agg_additions) rollup = snuba_args.get("rollup") if not rollup and auto_fields: # Ensure fields we require to build a functioning interface # are present. We don't add fields when using a rollup as the additional fields # would be aggregated away. When there are aggregations # we use argMax to get the latest event/projectid so we can create links. # The `projectid` output name is not a typo, using `project_id` triggers # generates invalid queries. if not aggregations and "id" not in columns: columns.append("id") if not aggregations and "project.id" not in columns: columns.append("project.id") project_column = "project_id" if aggregations and "latest_event" not in map(lambda a: a[-1], aggregations): _, aggregates = resolve_function("latest_event()") aggregations.extend(aggregates) if aggregations and "project.id" not in columns: aggregations.append(["argMax", ["project.id", "timestamp"], "projectid"]) project_column = "projectid" if project_key == "": project_key = PROJECT_NAME_ALIAS if project_key: project_ids = snuba_args.get("filter_keys", {}).get("project_id", []) projects = Project.objects.filter(id__in=project_ids).values("slug", "id") aggregations.append( [ u"transform({}, array({}), array({}), '')".format( project_column, # Need to use join like this so we don't get a list including Ls which confuses clickhouse ",".join([six.text_type(project["id"]) for project in projects]), # Can't just format a list since we'll get u"string" instead of a plain 'string' ",".join([u"'{}'".format(project["slug"]) for project in projects]), ), None, project_key, ] ) if rollup and columns and not aggregations: raise InvalidSearchQuery("You cannot use rollup without an aggregate field.") orderby = snuba_args.get("orderby") if orderby: orderby = resolve_orderby(orderby, columns, aggregations) # If aggregations are present all columns # need to be added to the group by so that the query is valid. if aggregations: groupby.extend(columns) return { "selected_columns": columns, "aggregations": aggregations, "groupby": groupby, "orderby": orderby, }
def bulk_raw_query(snuba_param_list, referrer=None): headers = {} if referrer: headers["referer"] = referrer query_param_list = map(_prepare_query_params, snuba_param_list) def snuba_query(params): query_params, forward, reverse = params try: with timer("snuba_query"): body = json.dumps(query_params) with sentry_sdk.start_span( op="snuba", description=u"query {}".format(body) ) as span: span.set_tag("referrer", headers.get("referer", "<unknown>")) return ( _snuba_pool.urlopen("POST", "/query", body=body, headers=headers), forward, reverse, ) except urllib3.exceptions.HTTPError as err: raise SnubaError(err) if len(snuba_param_list) > 1: query_results = _query_thread_pool.map(snuba_query, query_param_list) else: # No need to submit to the thread pool if we're just performing a # single query query_results = [snuba_query(query_param_list[0])] results = [] for response, _, reverse in query_results: try: body = json.loads(response.data) except ValueError: raise UnexpectedResponseError( u"Could not decode JSON response: {}".format(response.data) ) if response.status != 200: if body.get("error"): error = body["error"] if response.status == 429: raise RateLimitExceeded(error["message"]) elif error["type"] == "schema": raise SchemaValidationError(error["message"]) elif error["type"] == "clickhouse": raise clickhouse_error_codes_map.get(error["code"], QueryExecutionError)( error["message"] ) else: raise SnubaError(error["message"]) else: raise SnubaError(u"HTTP {}".format(response.status)) # Forward and reverse translation maps from model ids to snuba keys, per column body["data"] = [reverse(d) for d in body["data"]] results.append(body) return results
def test_unmerge(self): now = before_now(minutes=5).replace(microsecond=0, tzinfo=pytz.utc) def time_from_now(offset=0): return now + timedelta(seconds=offset) project = self.create_project() sequence = itertools.count(0) tag_values = itertools.cycle(["red", "green", "blue"]) user_values = itertools.cycle([{"id": 1}, {"id": 2}]) def create_message_event(template, parameters, environment, release, fingerprint="group1"): i = next(sequence) event_id = uuid.UUID(fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080)).hex tags = [["color", next(tag_values)]] if release: tags.append(["sentry:release", release]) event = self.store_event( data={ "event_id": event_id, "message": template % parameters, "type": "default", "user": next(user_values), "tags": tags, "fingerprint": [fingerprint], "timestamp": iso_format(now + timedelta(seconds=i)), "environment": environment, "release": release, }, project_id=project.id, ) UserReport.objects.create( project_id=project.id, group_id=event.group.id, event_id=event_id, name="Log Hat", email="*****@*****.**", comments="Quack", ) features.record([event]) return event events = OrderedDict() for event in (create_message_event("This is message #%s.", i, environment="production", release="version") for i in xrange(10)): events.setdefault(get_fingerprint(event), []).append(event) for event in (create_message_event( "This is message #%s!", i, environment="production", release="version2", fingerprint="group2", ) for i in xrange(10, 16)): events.setdefault(get_fingerprint(event), []).append(event) event = create_message_event( "This is message #%s!", 17, environment="staging", release="version3", fingerprint="group3", ) events.setdefault(get_fingerprint(event), []).append(event) merge_source, source, destination = list(Group.objects.all()) assert len(events) == 3 assert sum(map(len, events.values())) == 17 production_environment = Environment.objects.get( organization_id=project.organization_id, name="production") with self.tasks(): eventstream_state = eventstream.start_merge( project.id, [merge_source.id], source.id) merge_groups.delay([merge_source.id], source.id) eventstream.end_merge(eventstream_state) assert set([ (gtv.value, gtv.times_seen) for gtv in tagstore.get_group_tag_values( project.id, source.id, production_environment.id, "color") ]) == set([("red", 6), ("green", 5), ("blue", 5)]) similar_items = features.compare(source) assert len(similar_items) == 2 assert similar_items[0][0] == source.id assert similar_items[0][1]["message:message:character-shingles"] == 1.0 assert similar_items[1][0] == destination.id assert similar_items[1][1]["message:message:character-shingles"] < 1.0 with self.tasks(): eventstream_state = eventstream.start_unmerge( project.id, [list(events.keys())[0]], source.id, destination.id) unmerge.delay(project.id, source.id, destination.id, [events.keys()[0]], None, batch_size=5) eventstream.end_unmerge(eventstream_state) assert (list( Group.objects.filter(id=merge_source.id).values_list( "times_seen", "first_seen", "last_seen")) == []) assert list( Group.objects.filter(id=source.id).values_list( "times_seen", "first_seen", "last_seen")) == [(6, time_from_now(10), time_from_now(15))] assert list( Group.objects.filter(id=destination.id).values_list( "times_seen", "first_seen", "last_seen")) == [(11, time_from_now(0), time_from_now(16))] assert source.id != destination.id assert source.project == destination.project destination_event_ids = map(lambda event: event.event_id, events.values()[1]) assert set( UserReport.objects.filter(group_id=source.id).values_list( "event_id", flat=True)) == set(destination_event_ids) assert set( GroupHash.objects.filter(group_id=source.id).values_list( "hash", flat=True)) == set(itertools.islice(events.keys(), 2)) assert set( GroupRelease.objects.filter(group_id=source.id).values_list( "environment", "first_seen", "last_seen")) == set([(u"production", time_from_now(10), time_from_now(15))]) assert set([ (gtv.value, gtv.times_seen) for gtv in tagstore.get_group_tag_values( project.id, destination.id, production_environment.id, "color") ]) == set([(u"red", 4), (u"green", 3), (u"blue", 3)]) destination_event_ids = map(lambda event: event.event_id, events.values()[0] + events.values()[2]) assert set( UserReport.objects.filter(group_id=destination.id).values_list( "event_id", flat=True)) == set(destination_event_ids) assert set( GroupHash.objects.filter(group_id=destination.id).values_list( "hash", flat=True)) == set(itertools.islice(events.keys(), 2, 3)) assert set( GroupRelease.objects.filter(group_id=destination.id).values_list( "environment", "first_seen", "last_seen")) == set([ ("production", time_from_now(0), time_from_now(9)), ("staging", time_from_now(16), time_from_now(16)), ]) assert set([ (gtk.value, gtk.times_seen) for gtk in tagstore.get_group_tag_values( project.id, destination.id, production_environment.id, "color") ]) == set([("red", 4), ("blue", 3), ("green", 3)]) rollup_duration = 3600 time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) environment_time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, environment_ids=[production_environment.id], ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual.get(key, 0) == value for key in set(actual.keys()) - set(expected.keys()): assert actual.get(key, 0) == default assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1]), time_series[source.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0] + events.values()[2]), time_series[destination.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1]), environment_time_series[source.id], 0, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0][:-1] + events.values()[2]), environment_time_series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) environment_time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, environment_id=production_environment.id, ) def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add( get_event_user_from_interface(event.data["user"]).tag_value) return aggregate for series in [time_series, environment_time_series]: assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[1], collect_by_user_tag).items() }, series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[0] + events.values()[2], collect_by_user_tag, ).items() }, time_series[destination.id], ) def strip_zeroes(data): for group_id, series in data.items(): for _, values in series: for key, val in values.items(): if val == 0: values.pop(key) return data def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = event.get_tag("sentry:release") if not release: return aggregate release = GroupRelease.objects.get( group_id=group.id, environment=event.data["environment"], release_id=Release.objects.get( organization_id=project.organization_id, version=release).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate items = {} for i in [source.id, destination.id]: items[i] = list( GroupRelease.objects.filter(group_id=i).values_list("id", flat=True)) time_series = strip_zeroes( tsdb.get_frequency_series( tsdb.models.frequent_releases_by_group, items, now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, )) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], functools.partial(collect_by_release, source)), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0] + events.values()[2], functools.partial(collect_by_release, destination), ), time_series[destination.id], {}, ) items = {} for i in [source.id, destination.id]: items[i] = list(Environment.objects.all().values_list("id", flat=True)) time_series = strip_zeroes( tsdb.get_frequency_series( tsdb.models.frequent_environments_by_group, items, now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, )) def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data["environment"]).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1], collect_by_environment), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0] + events.values()[2], collect_by_environment), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == ( source.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1][ "message:message:character-shingles"] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == ( destination.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1][ "message:message:character-shingles"] < 1.0