Example #1
0
    def _parse_args(self, request, environment_id=None):
        resolution = request.GET.get('resolution')
        if resolution:
            resolution = self._parse_resolution(resolution)
            assert resolution in tsdb.get_rollups()

        end = request.GET.get('until')
        if end:
            end = to_datetime(float(end))
        else:
            end = datetime.utcnow().replace(tzinfo=utc)

        start = request.GET.get('since')
        if start:
            start = to_datetime(float(start))
            assert start <= end, 'start must be before or equal to end'
        else:
            start = end - timedelta(days=1, seconds=-1)

        return {
            'start': start,
            'end': end,
            'rollup': resolution,
            'environment_ids': environment_id and [environment_id],
        }
Example #2
0
def make_group_generator(random, project):
    epoch = to_timestamp(datetime(2016, 6, 1, 0, 0, 0, tzinfo=timezone.utc))
    for id in itertools.count(1):
        first_seen = epoch + random.randint(0, 60 * 60 * 24 * 30)
        last_seen = random.randint(first_seen, first_seen + (60 * 60 * 24 * 30))

        culprit = make_culprit(random)
        level = random.choice(LOG_LEVELS.keys())
        message = make_message(random)

        group = Group(
            id=id,
            project=project,
            culprit=culprit,
            level=level,
            message=message,
            first_seen=to_datetime(first_seen),
            last_seen=to_datetime(last_seen),
            status=random.choice((GroupStatus.UNRESOLVED, GroupStatus.RESOLVED, )),
            data={
                'type': 'default',
                'metadata': {
                    'title': message,
                }
            }
        )

        if random.random() < 0.8:
            group.data = make_group_metadata(random, group)

        yield group
Example #3
0
def test_clean_series_trims_extra():
    rollup = 60
    n = 5
    start = to_datetime(rollup * 0)
    stop = to_datetime(rollup * n)
    series = [(rollup * i, i) for i in xrange(0, n + 1)]
    assert clean_series(start, stop, rollup, series) == series[:n]
Example #4
0
    def _parse_args(self, request):
        resolution = request.GET.get('resolution')
        if resolution:
            resolution = self._parse_resolution(resolution)

            assert any(r for r in tsdb.rollups if r[0] == resolution)

        end = request.GET.get('until')
        if end:
            end = to_datetime(float(end))
        else:
            end = datetime.utcnow().replace(tzinfo=utc)

        start = request.GET.get('since')
        if start:
            start = to_datetime(float(start))
            assert start <= end, 'start must be before or equal to end'
        else:
            start = end - timedelta(days=1, seconds=-1)

        return {
            'start': start,
            'end': end,
            'rollup': resolution,
        }
Example #5
0
def test_clean_series_rejects_offset_timestamp():
    rollup = 60
    n = 5
    start = to_datetime(rollup * 0)
    stop = to_datetime(rollup * n)
    series = [(rollup * (i * 1.1), i) for i in xrange(0, n)]
    with pytest.raises(AssertionError):
        clean_series(start, stop, rollup, series)
Example #6
0
    def get_data(self, model, keys, start, end, rollup=None, environment_ids=None,
                 aggregation='count()', group_on_model=True, group_on_time=False):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append('time')
        if aggregation == 'count()' and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None}
        if environment_ids is not None:
            keys_map['environment'] = environment_ids

        aggregations = [[aggregation, model_aggregate, 'aggregate']]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)

        if keys:
            result = snuba.query(start, end, groupby, None, keys_map,
                                 aggregations, rollup, referrer='tsdb',
                                 is_grouprelease=(model == TSDBModel.frequent_releases_by_group))
        else:
            result = {}

        if group_on_time:
            keys_map['time'] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #7
0
def test_clean_series():
    rollup = 60
    n = 5
    start = to_datetime(rollup * 0)
    stop = to_datetime(rollup * n)
    series = [(rollup * i, i) for i in xrange(0, n)]
    assert clean_series(
        start,
        stop,
        rollup,
        series,
    ) == series
Example #8
0
    def test_make_counter_key(self):
        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, None)
        assert result == ('ts:1:1368889980:1', 1)

        result = self.db.make_counter_key(
            TSDBModel.project, 1, to_datetime(1368889980), 'foo', None)
        assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo'))

        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 1, 1)
        assert result == ('ts:1:1368889980:1', '1?e=1')

        result = self.db.make_counter_key(TSDBModel.project, 1, to_datetime(1368889980), 'foo', 1)
        assert result == ('ts:1:1368889980:46', self.db.get_model_key('foo') + '?e=1')
Example #9
0
 def remove_invalid_values(item):
     timestamp, value = item
     if timestamp < earliest:
         value = None
     elif to_datetime(timestamp) < project.date_added:
         value = None
     return (timestamp, value)
Example #10
0
    def test_hash_discarded_raised(self, mock_refund, mock_incr):
        project = self.create_project()

        data = {
            'project': project.id,
            'platform': 'NOTMATTLANG',
            'logentry': {
                'formatted': 'test',
            },
            'event_id': uuid.uuid4().hex,
            'extra': {
                'foo': 'bar'
            },
        }

        now = time()
        mock_save = mock.Mock()
        mock_save.side_effect = HashDiscarded
        with mock.patch.object(EventManager, 'save', mock_save):
            save_event(data=data, start_time=now)
            mock_incr.assert_called_with([
                (tsdb.models.project_total_received_discarded, project.id),
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted, project.organization_id),
            ],
                timestamp=to_datetime(now),
            )
Example #11
0
def get_recent_mentions(tenant):
    client = cluster.get_routing_client()
    key = get_key(tenant)
    ids = [x for x in client.zrangebyscore(
        key, time.time() - (RECENT_HOURS * 60), '+inf')][-MAX_RECENT:]

    with cluster.map() as map_client:
        items = [map_client.get('%s:%s' % (key, id)) for id in ids]
    items = [json.loads(x.value) for x in items if x.value is not None]

    projects = items and dict((x.id, x) for x in Project.objects.filter(
        pk__in=[x['project'] for x in items],
    )) or {}
    groups = items and dict((x.id, x) for x in Group.objects.filter(
        pk__in=[x['group'] for x in items],
    )) or {}
    events = items and dict((x.id, x) for x in Event.objects.filter(
        pk__in=[x['event'] for x in items if x['event'] is not None],
    )) or {}

    for item in items:
        item['project'] = projects.get(item['project'])
        item['group'] = groups.get(item['group'])
        item['event'] = events.get(item['event'])
        if item['event'] is None and item['group'] is not None:
            item['event'] = item['group'].get_latest_event()
        item['last_mentioned'] = to_datetime(item['last_mentioned'])

    return items
Example #12
0
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
    def get(self, request, group, environment):
        try:
            environment = Environment.objects.get(
                project_id=group.project_id,
                # XXX(dcramer): we have no great way to pass the empty env
                name='' if environment == 'none' else environment,
            )
        except Environment.DoesNotExist:
            raise ResourceDoesNotExist

        first_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('first_seen').first()

        last_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('-first_seen').first()

        # the current release is the 'latest seen' release within the
        # environment even if it hasnt affected this issue
        current_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
            release_id=ReleaseEnvironment.objects.filter(
                project_id=group.project_id,
                environment_id=environment.id,
            ).order_by('-first_seen').values_list('release_id', flat=True).first(),
        ).first()

        last_seen = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('-last_seen').values_list('last_seen', flat=True).first()

        until = request.GET.get('until')
        if until:
            until = to_datetime(float(until))

        context = {
            'environment': serialize(
                environment, request.user, GroupEnvironmentWithStatsSerializer(
                    group=group,
                    until=until,
                )
            ),
            'firstRelease': serialize(first_release, request.user),
            'lastRelease': serialize(last_release, request.user),
            'currentRelease': serialize(
                current_release, request.user, GroupReleaseWithStatsSerializer(
                    until=until,
                )
            ),
            'lastSeen': last_seen,
            'firstSeen': first_release.first_seen if first_release else None,
        }
        return Response(context)
Example #14
0
 def _convert(x):
     return {
         'type': x['type'],
         'timestamp': to_datetime(x['timestamp']),
         'level': x.get('level', 'info'),
         'message': x.get('message'),
         'category': x.get('category'),
         'data': x.get('data') or None,
         'event_id': x.get('event_id'),
     }
Example #15
0
def make_group_generator(random, project):
    epoch = to_timestamp(datetime(2016, 6, 1, 0, 0, 0, tzinfo=timezone.utc))
    for id in itertools.count(1):
        first_seen = epoch + random.randint(0, 60 * 60 * 24 * 30)
        last_seen = random.randint(first_seen, first_seen + (60 * 60 * 24 * 30))

        group = Group(
            id=id,
            project=project,
            culprit=make_culprit(random),
            level=random.choice(LOG_LEVELS.keys()),
            message=make_message(random),
            first_seen=to_datetime(first_seen),
            last_seen=to_datetime(last_seen),
        )

        if random.random() < 0.8:
            group.data = make_group_metadata(random, group)

        yield group
Example #16
0
    def merge_frequencies(self, model, destination, sources, timestamp=None):
        if not self.enable_frequency_sketches:
            return

        rollups = []
        for rollup, samples in self.rollups.items():
            _, series = self.get_optimal_rollup_series(
                to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)),
                end=None,
                rollup=rollup,
            )
            rollups.append((
                rollup,
                map(to_datetime, series),
            ))

        exports = defaultdict(list)

        for source in sources:
            for rollup, series in rollups:
                for timestamp in series:
                    keys = self.make_frequency_table_keys(
                        model,
                        rollup,
                        to_timestamp(timestamp),
                        source,
                    )
                    arguments = ['EXPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS)
                    exports[source].extend([
                        (CountMinScript, keys, arguments),
                        ('DEL',) + tuple(keys),
                    ])

        imports = []

        for source, results in self.cluster.execute_commands(exports).items():
            results = iter(results)
            for rollup, series in rollups:
                for timestamp in series:
                    imports.append((
                        CountMinScript,
                        self.make_frequency_table_keys(
                            model,
                            rollup,
                            to_timestamp(timestamp),
                            destination,
                        ),
                        ['IMPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) + next(results).value,
                    ))
                    next(results)  # pop off the result of DEL

        self.cluster.execute_commands({
            destination: imports,
        })
 def get_registered_devices(self):
     rv = []
     for device in self.config.get('devices') or ():
         rv.append({
             'timestamp': to_datetime(device['ts']),
             'name': device['name'],
             'key_handle': device['binding']['keyHandle'],
             'app_id': device['binding']['appId'],
         })
     rv.sort(key=lambda x: x['name'])
     return rv
Example #18
0
    def _parse_args(self, request):
        resolution = request.GET.get("resolution")
        if resolution:
            resolution = self._parse_resolution(resolution)
            assert resolution in tsdb.rollups

        end = request.GET.get("until")
        if end:
            end = to_datetime(float(end))
        else:
            end = datetime.utcnow().replace(tzinfo=utc)

        start = request.GET.get("since")
        if start:
            start = to_datetime(float(start))
            assert start <= end, "start must be before or equal to end"
        else:
            start = end - timedelta(days=1, seconds=-1)

        return {"start": start, "end": end, "rollup": resolution}
Example #19
0
 def get_registered_devices(self):
     rv = []
     for device in self.config.get("devices") or ():
         rv.append(
             {
                 "timestamp": to_datetime(device["ts"]),
                 "name": device["name"],
                 "key_handle": device["binding"]["keyHandle"],
                 "app_id": device["binding"]["appId"],
             }
         )
     rv.sort(key=lambda x: x["name"])
     return rv
Example #20
0
    def build_calendar_data(project):
        start, stop = reports.get_calendar_query_range(interval, 3)
        rollup = 60 * 60 * 24
        series = []

        weekend = frozenset((5, 6))
        value = int(random.weibullvariate(5000, 3))
        for timestamp in tsdb.get_optimal_rollup_series(start, stop, rollup)[1]:
            damping = random.uniform(0.2, 0.6) if to_datetime(timestamp).weekday in weekend else 1
            jitter = random.paretovariate(1.2)
            series.append((timestamp, int(value * damping * jitter)))
            value = value * random.uniform(0.25, 2)

        return reports.clean_calendar_data(project, series, start, stop, rollup, stop)
Example #21
0
 def get_active_series(self, start=None, end=None, timestamp=None):
     rollups = {}
     for rollup, samples in self.rollups.items():
         _, series = self.get_optimal_rollup_series(
             start if start is not None else to_datetime(
                 self.get_earliest_timestamp(
                     rollup,
                     timestamp=timestamp,
                 ),
             ),
             end,
             rollup=rollup,
         )
         rollups[rollup] = map(to_datetime, series)
     return rollups
Example #22
0
 def make_release_generator():
     id_sequence = itertools.count(1)
     while True:
         dt = to_datetime(
             random.randint(
                 timestamp - (30 * 24 * 60 * 60),
                 timestamp,
             ),
         )
         p = random.choice(projects)
         yield Release(
             id=next(id_sequence),
             project=p,
             organization_id=p.organization_id,
             version=''.join([random.choice('0123456789abcdef') for _ in range(40)]),
             date_added=dt,
         )
Example #23
0
def to_context(organization, interval, reports):
    report = reduce(merge_reports, reports.values())
    series = [(to_datetime(timestamp), Point(*values)) for timestamp, values in report.series]
    return {
        'series': {
            'points': series,
            'maximum': max(sum(point) for timestamp, point in series),
            'all': sum([sum(point) for timestamp, point in series]),
            'resolved': sum([point.resolved for timestamp, point in series]),
        },
        'distribution': {
            'types':
            list(
                zip(
                    (
                        DistributionType(
                            'New', '#8477e0'), DistributionType(
                            'Reopened', '#6C5FC7'),
                        DistributionType('Existing', '#534a92'),
                    ),
                    report.issue_summaries,
                ),
            ),
            'total':
            sum(report.issue_summaries),
        },
        'comparisons': [
            ('last week', change(report.aggregates[-1], report.aggregates[-2])),
            (
                'four week average', change(
                    report.aggregates[-1],
                    mean(report.aggregates) if all(v is not None
                                                   for v in report.aggregates) else None,
                )
            ),
        ],
        'projects': {
            'series': build_project_breakdown_series(reports),
        },
        'calendar':
        to_calendar(
            interval,
            report.calendar_series,
        ),
    }
Example #24
0
 def _convert(x):
     return {
         'type': x['type'],
         'timestamp': to_datetime(x['timestamp']),
         'data': x['data'],
     }
Example #25
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome
    from sentry.ingest.outcomes_consumer import mark_signal_sent

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data["event_id"]

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop("project")

    key_id = None if data is None else data.get("key_id")
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    # We only need to delete raw events for events that support
    # reprocessing.  If the data cannot be found we want to assume
    # that we need to delete the raw event.
    if not data or reprocessing.event_supports_reprocessing(data):
        delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr("events.failed",
                     tags={
                         "reason": "cache",
                         "stage": "post"
                     },
                     skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        # event.project.organization is populated after this statement.
        event = manager.save(project_id, assume_normalized=True)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id,
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        # There is no signal supposed to be sent for this particular
        # outcome-reason combination. Prevent the outcome consumer from
        # emitting it for now.
        #
        # XXX(markus): Revisit decision about signals once outcomes consumer is stable.
        mark_signal_sent(project_id, event_id)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id,
        )

    else:
        if cache_key:
            # Note that event is now a model, and no longer the data
            save_attachments(cache_key, event)

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or features.has("organizations:event-attachments",
                                             event.project.organization,
                                             actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing("events.time-to-process",
                           time() - start_time,
                           instance=data["platform"])
Example #26
0
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
def alert(request):
    platform = request.GET.get('platform', 'python')
    org = Organization(
        id=1,
        slug='example',
        name='Example',
    )
    project = Project(
        id=1,
        slug='example',
        name='Example',
        organization=org,
    )

    random = get_random(request)
    group = next(
        make_group_generator(random, project),
    )

    event = Event(
        id=1,
        event_id='44f1419e73884cd2b45c79918f4b6dc4',
        project=project,
        group=group,
        message=group.message,
        data=load_data(platform),
        datetime=to_datetime(
            random.randint(
                to_timestamp(group.first_seen),
                to_timestamp(group.last_seen),
            ),
        ),
    )

    rule = Rule(label="An example rule")

    interface_list = []
    for interface in six.itervalues(event.interfaces):
        body = interface.to_email_html(event)
        if not body:
            continue
        interface_list.append((interface.get_title(), mark_safe(body)))

    return MailPreview(
        html_template='sentry/emails/error.html',
        text_template='sentry/emails/error.txt',
        context={
            'rule': rule,
            'group': group,
            'event': event,
            'link': 'http://example.com/link',
            'interfaces': interface_list,
            'tags': event.get_tags(),
            'project_label': project.slug,
            'tags': [
                ('logger', 'javascript'), ('environment', 'prod'), ('level', 'error'),
                ('device', 'Other')
            ],
            'commits': [{
                # TODO(dcramer): change to use serializer
                "repository": {"status": "active", "name": "Example Repo", "url": "https://github.com/example/example", "dateCreated": "2018-02-28T23:39:22.402Z", "provider": {"id": "github", "name": "GitHub"}, "id": "1"},
                "score": 2,
                "subject": "feat: Do something to raven/base.py",
                "message": "feat: Do something to raven/base.py\naptent vivamus vehicula tempus volutpat hac tortor",
                "id": "1b17483ffc4a10609e7921ee21a8567bfe0ed006",
                "shortId": "1b17483",
                "author": {"username": "******", "isManaged": False, "lastActive": "2018-03-01T18:25:28.149Z", "id": "1", "isActive": True, "has2fa": False, "name": "*****@*****.**", "avatarUrl": "https://secure.gravatar.com/avatar/51567a4f786cd8a2c41c513b592de9f9?s=32&d=mm", "dateJoined": "2018-02-27T22:04:32.847Z", "emails": [{"is_verified": False, "id": "1", "email": "*****@*****.**"}], "avatar": {"avatarUuid": None, "avatarType": "letter_avatar"}, "lastLogin": "******", "email": "*****@*****.**"}
            }],
        },
    ).render(request)
Example #28
0
    def get_data(
        self,
        model,
        keys,
        start,
        end,
        rollup=None,
        environment_ids=None,
        aggregation="count()",
        group_on_model=True,
        group_on_time=False,
    ):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        # XXX: to counteract the hack in project_key_stats.py
        if model in [
                TSDBModel.key_total_received,
                TSDBModel.key_total_blacklisted,
                TSDBModel.key_total_rejected,
        ]:
            keys = list(set(map(lambda x: int(x), keys)))

        # 10s is the only rollup under an hour that we support
        if rollup and rollup == 10 and model in self.lower_rollup_query_settings.keys(
        ):
            model_query_settings = self.lower_rollup_query_settings.get(model)
        else:
            model_query_settings = self.model_query_settings.get(model)

        if model_query_settings is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group = model_query_settings.groupby
        model_aggregate = model_query_settings.aggregate

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append("time")
        if aggregation == "count()" and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        columns = (model_query_settings.groupby,
                   model_query_settings.aggregate)
        keys_map = dict(zip(columns, self.flatten_keys(keys)))
        keys_map = {
            k: v
            for k, v in six.iteritems(keys_map)
            if k is not None and v is not None
        }
        if environment_ids is not None:
            keys_map["environment"] = environment_ids

        aggregations = [[aggregation, model_aggregate, "aggregate"]]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)
        limit = min(10000,
                    int(len(keys) * ((end - start).total_seconds() / rollup)))

        if keys:
            result = snuba.query(
                dataset=model_query_settings.dataset,
                start=start,
                end=end,
                groupby=groupby,
                conditions=deepcopy(
                    model_query_settings.conditions
                ),  # copy because we modify the conditions in snuba.query
                filter_keys=keys_map,
                aggregations=aggregations,
                rollup=rollup,
                limit=limit,
                referrer="tsdb",
                is_grouprelease=(
                    model == TSDBModel.frequent_releases_by_group),
            )
        else:
            result = {}

        if group_on_time:
            keys_map["time"] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #29
0
def build_project_breakdown_series(reports):
    Key = namedtuple('Key', 'label url color data')

    def get_legend_data(report):
        filtered, rate_limited = report.usage_summary
        return {
            'events': sum(sum(value) for timestamp, value in report.series),
            'filtered': filtered,
            'rate_limited': rate_limited,
        }

    # Find the reports with the most total events. (The number of reports to
    # keep is the same as the number of colors available to use in the legend.)
    instances = map(
        operator.itemgetter(0),
        sorted(
            reports.items(),
            key=lambda (instance, report): sum(sum(values) for timestamp, values in report[0]),
            reverse=True,
        ),
    )[:len(colors)]

    # Starting building the list of items to include in the report chart. This
    # is a list of [Key, Report] pairs, in *ascending* order of the total sum
    # of values in the series. (This is so when we render the series, the
    # largest color blocks are at the bottom and it feels appropriately
    # weighted.)
    selections = map(
        lambda (instance, color): (
            Key(
                instance.slug,
                instance.get_absolute_url(),
                color,
                get_legend_data(reports[instance]),
            ),
            reports[instance],
        ),
        zip(
            instances,
            colors,
        ),
    )[::-1]

    # Collect any reports that weren't in the selection set, merge them
    # together and add it at the top (front) of the stack.
    overflow = set(reports) - set(instances)
    if overflow:
        overflow_report = reduce(
            merge_reports,
            [reports[instance] for instance in overflow],
        )
        selections.insert(
            0, (Key('Other', None, '#f2f0fa', get_legend_data(overflow_report)), overflow_report, )
        )

    def summarize(key, points):
        total = sum(points)
        return [(key, total)] if total else []

    # Collect all of the independent series into a single series to make it
    # easier to render, resulting in a series where each value is a sequence of
    # (key, count) pairs.
    series = reduce(
        merge_series,
        [series_map(
            functools.partial(summarize, key),
            report[0],
        ) for key, report in selections],
    )

    legend = [key for key, value in reversed(selections)]
    return {
        'points': [(to_datetime(timestamp), value) for timestamp, value in series],
        'maximum': max(sum(count for key, count in value) for timestamp, value in series),
        'legend': {
            'rows': legend,
            'total': Key(
                'Total',
                None,
                None,
                reduce(merge_mappings, [key.data for key in legend]),
            ),
        },
    }
Example #30
0
 def datetime(self):
     return to_datetime(self.timestamp)
Example #31
0
def save_event(cache_key=None, data=None, start_time=None, event_id=None,
               project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'post'})
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    try:
        manager = EventManager(data)
        event = manager.save(project_id)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

    except HashDiscarded:
        increment_list = [
            (tsdb.models.project_total_received_discarded, project_id),
        ]

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            increment_list.extend([
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_blacklisted, project.organization_id),
            ])

            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass
                else:
                    increment_list.append((tsdb.models.key_total_blacklisted, project_key.id))

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

        tsdb.incr_multi(
            increment_list,
            timestamp=to_datetime(start_time) if start_time is not None else None,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
            attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
Example #32
0
def _do_save_event(cache_key=None,
                   data=None,
                   start_time=None,
                   event_id=None,
                   project_id=None,
                   **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     },
                     skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments',
                        event.project.organization,
                        actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(event.project.organization_id, event.project.id, key_id,
                      Outcome.ACCEPTED, None, timestamp, event_id)

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(project.organization_id, project_id, key_id,
                      Outcome.FILTERED, reason, timestamp, event_id)

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Example #33
0
    def get_data(
        self,
        model,
        keys,
        start,
        end,
        rollup=None,
        environment_ids=None,
        aggregation="count()",
        group_on_model=True,
        group_on_time=False,
    ):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception(u"Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append("time")
        if aggregation == "count()" and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {k: v for k, v in six.iteritems(keys_map) if k is not None and v is not None}
        if environment_ids is not None:
            keys_map["environment"] = environment_ids

        aggregations = [[aggregation, model_aggregate, "aggregate"]]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)
        limit = min(10000, int(len(keys) * ((end - start).total_seconds() / rollup)))

        if keys:
            result = snuba.query(
                start=start,
                end=end,
                groupby=groupby,
                conditions=None,
                filter_keys=keys_map,
                aggregations=aggregations,
                rollup=rollup,
                limit=limit,
                referrer="tsdb",
                is_grouprelease=(model == TSDBModel.frequent_releases_by_group),
            )
        else:
            result = {}

        if group_on_time:
            keys_map["time"] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #34
0
    def get(self, request, group, environment):
        project = group.project
        try:
            environment = Environment.objects.get(
                projects=project,
                organization_id=project.organization_id,
                # XXX(dcramer): we have no great way to pass the empty env
                name='' if environment == 'none' else environment,
            )
        except Environment.DoesNotExist:
            raise ResourceDoesNotExist

        first_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('first_seen').first()

        last_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('-first_seen').first()

        # the current release is the 'latest seen' release within the
        # environment even if it hasnt affected this issue

        current_release = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
            release_id=ReleaseEnvironment.objects.filter(
                release_id__in=ReleaseProject.objects.filter(
                    project_id=group.project_id).values_list('release_id',
                                                             flat=True),
                organization_id=group.project.organization_id,
                environment_id=environment.id,
            ).order_by('-first_seen').values_list('release_id',
                                                  flat=True).first(),
        ).first()

        last_seen = GroupRelease.objects.filter(
            group_id=group.id,
            environment=environment.name,
        ).order_by('-last_seen').values_list('last_seen', flat=True).first()

        until = request.GET.get('until')
        if until:
            until = to_datetime(float(until))

        context = {
            'environment':
            serialize(
                environment, request.user,
                GroupEnvironmentWithStatsSerializer(
                    group=group,
                    until=until,
                )),
            'firstRelease':
            serialize(first_release, request.user),
            'lastRelease':
            serialize(last_release, request.user),
            'currentRelease':
            serialize(current_release, request.user,
                      GroupReleaseWithStatsSerializer(until=until, )),
            'lastSeen':
            last_seen,
            'firstSeen':
            first_release.first_seen if first_release else None,
        }
        return Response(context)
def digest(request):
    random = get_random(request)

    # TODO: Refactor all of these into something more manageable.
    org = Organization(
        id=1,
        slug='example',
        name='Example Organization',
    )

    team = Team(
        id=1,
        slug='example',
        name='Example Team',
        organization=org,
    )

    project = Project(
        id=1,
        slug='example',
        name='Example Project',
        team=team,
        organization=org,
    )

    rules = {
        i: Rule(
            id=i,
            project=project,
            label="Rule #%s" % (i, ),
        )
        for i in range(1, random.randint(2, 4))
    }

    state = {
        'project': project,
        'groups': {},
        'rules': rules,
        'event_counts': {},
        'user_counts': {},
    }

    records = []

    event_sequence = itertools.count(1)
    group_generator = make_group_generator(random, project)

    for i in range(random.randint(1, 30)):
        group = next(group_generator)
        state['groups'][group.id] = group

        offset = timedelta(seconds=0)
        for i in range(random.randint(1, 10)):
            offset += timedelta(seconds=random.random() * 120)
            event = Event(id=next(event_sequence),
                          event_id=uuid.uuid4().hex,
                          project=project,
                          group=group,
                          message=group.message,
                          data=load_data('python'),
                          datetime=to_datetime(
                              random.randint(
                                  to_timestamp(group.first_seen),
                                  to_timestamp(group.last_seen),
                              ), ))

            records.append(
                Record(
                    event.event_id,
                    Notification(
                        event,
                        random.sample(state['rules'],
                                      random.randint(1, len(state['rules']))),
                    ),
                    to_timestamp(event.datetime),
                ))

            state['event_counts'][group.id] = random.randint(10, 1e4)
            state['user_counts'][group.id] = random.randint(10, 1e4)

    digest = build_digest(project, records, state)
    start, end, counts = get_digest_metadata(digest)

    context = {
        'project': project,
        'counts': counts,
        'digest': digest,
        'start': start,
        'end': end,
    }
    add_unsubscribe_link(context)

    return MailPreview(
        html_template='sentry/emails/digests/body.html',
        text_template='sentry/emails/digests/body.txt',
        context=context,
    ).render(request)
def report(request):
    from sentry.tasks import reports

    random = get_random(request)

    duration = 60 * 60 * 24 * 7
    timestamp = to_timestamp(
        reports.floor_to_utc_day(
            to_datetime(
                random.randint(
                    to_timestamp(
                        datetime(2015, 6, 1, 0, 0, 0, tzinfo=timezone.utc)),
                    to_timestamp(
                        datetime(2016, 7, 1, 0, 0, 0, tzinfo=timezone.utc)),
                ))))

    start, stop = interval = reports._to_interval(timestamp, duration)

    organization = Organization(
        id=1,
        slug='example',
        name='Example',
    )

    team = Team(
        id=1,
        slug='example',
        name='Example',
        organization=organization,
    )

    projects = []
    for i in xrange(0, random.randint(1, 8)):
        name = ' '.join(random.sample(loremipsum.words, random.randint(1, 4)))
        projects.append(
            Project(
                id=i,
                organization=organization,
                team=team,
                slug=slugify(name),
                name=name,
                date_added=start - timedelta(days=random.randint(0, 120)),
            ))

    def make_release_generator():
        id_sequence = itertools.count(1)
        while True:
            dt = to_datetime(
                random.randint(
                    timestamp - (30 * 24 * 60 * 60),
                    timestamp,
                ), )
            p = random.choice(projects)
            yield Release(
                id=next(id_sequence),
                project=p,
                organization_id=p.organization_id,
                version=''.join(
                    [random.choice('0123456789abcdef') for _ in range(40)]),
                date_added=dt,
            )

    def build_issue_summaries():
        summaries = []
        for i in range(3):
            summaries.append(
                int(random.weibullvariate(10, 1) * random.paretovariate(0.5)))
        return summaries

    def build_usage_summary():
        return (
            int(random.weibullvariate(3, 1) * random.paretovariate(0.2)),
            int(random.weibullvariate(5, 1) * random.paretovariate(0.2)),
        )

    def build_calendar_data(project):
        start, stop = reports.get_calendar_query_range(interval, 3)
        rollup = 60 * 60 * 24
        series = []

        weekend = frozenset((5, 6))
        value = int(random.weibullvariate(5000, 3))
        for timestamp in tsdb.get_optimal_rollup_series(start, stop,
                                                        rollup)[1]:
            damping = random.uniform(
                0.2, 0.6) if to_datetime(timestamp).weekday in weekend else 1
            jitter = random.paretovariate(1.2)
            series.append((timestamp, int(value * damping * jitter)))
            value = value * random.uniform(0.25, 2)

        return reports.clean_calendar_data(project, series, start, stop,
                                           rollup, stop)

    def build_report(project):
        daily_maximum = random.randint(1000, 10000)

        rollup = 60 * 60 * 24
        series = [(timestamp + (i * rollup), (random.randint(0, daily_maximum),
                                              random.randint(0,
                                                             daily_maximum)))
                  for i in xrange(0, 7)]

        aggregates = [
            random.randint(0, daily_maximum *
                           7) if random.random() < 0.9 else None
            for _ in xrange(0, 4)
        ]

        return reports.Report(
            series,
            aggregates,
            build_issue_summaries(),
            build_usage_summary(),
            build_calendar_data(project),
        )

    if random.random() < 0.85:
        personal = {
            'resolved': random.randint(0, 100),
            'users': int(random.paretovariate(0.2)),
        }
    else:
        personal = {
            'resolved': 0,
            'users': 0,
        }

    return MailPreview(
        html_template='sentry/emails/reports/body.html',
        text_template='sentry/emails/reports/body.txt',
        context={
            'duration':
            reports.durations[duration],
            'interval': {
                'start': reports.date_format(start),
                'stop': reports.date_format(stop),
            },
            'report':
            reports.to_context(
                organization, interval,
                {project: build_report(project)
                 for project in projects}),
            'organization':
            organization,
            'personal':
            personal,
            'user':
            request.user,
        },
    ).render(request)
Example #37
0
def save_event(cache_key=None,
               data=None,
               start_time=None,
               event_id=None,
               **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas, tsdb
    from sentry.models import ProjectKey

    if cache_key:
        data = default_cache.get(cache_key)

    if event_id is None and data is not None:
        event_id = data['event_id']

    if data is None:
        metrics.incr('events.failed',
                     tags={
                         'reason': 'cache',
                         'stage': 'post'
                     })
        return

    project_id = data.pop('project')

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    Raven.tags_context({
        'project': project_id,
    })

    try:
        manager = EventManager(data)
        manager.save(project_id)
    except HashDiscarded:
        tsdb.incr(
            tsdb.models.project_total_received_discarded,
            project_id,
            timestamp=to_datetime(start_time)
            if start_time is not None else None,
        )

        try:
            project = Project.objects.get_from_cache(id=project_id)
        except Project.DoesNotExist:
            pass
        else:
            project_key = None
            if data.get('key_id') is not None:
                try:
                    project_key = ProjectKey.objects.get_from_cache(
                        id=data['key_id'])
                except ProjectKey.DoesNotExist:
                    pass

            quotas.refund(
                project,
                key=project_key,
                timestamp=start_time,
            )

    finally:
        if cache_key:
            default_cache.delete(cache_key)
        if start_time:
            metrics.timing('events.time-to-process',
                           time() - start_time,
                           instance=data['platform'])
Example #38
0
def _to_interval(timestamp, duration):
    return (to_datetime(timestamp - duration), to_datetime(timestamp), )
Example #39
0
def track_outcome(org_id,
                  project_id,
                  key_id,
                  outcome,
                  reason=None,
                  timestamp=None):
    """
    This is a central point to track org/project counters per incoming event.
    NB: This should only ever be called once per incoming event, which means
    it should only be called at the point we know the final outcome for the
    event (invalid, rate_limited, accepted, discarded, etc.)

    This increments all the relevant legacy RedisTSDB counters, as well as
    sending a single metric event to Kafka which can be used to reconstruct the
    counters with SnubaTSDB.
    """
    global outcomes_publisher
    if outcomes_publisher is None:
        outcomes_publisher = QueuedPublisherService(
            KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes['cluster']]))

    timestamp = timestamp or to_datetime(time.time())
    increment_list = []
    if outcome != 'invalid':
        # This simply preserves old behavior. We never counted invalid events
        # (too large, duplicate, CORS) toward regular `received` counts.
        increment_list.extend([
            (tsdb.models.project_total_received, project_id),
            (tsdb.models.organization_total_received, org_id),
            (tsdb.models.key_total_received, key_id),
        ])

    if outcome == 'filtered':
        increment_list.extend([
            (tsdb.models.project_total_blacklisted, project_id),
            (tsdb.models.organization_total_blacklisted, org_id),
            (tsdb.models.key_total_blacklisted, key_id),
        ])
    elif outcome == 'rate_limited':
        increment_list.extend([
            (tsdb.models.project_total_rejected, project_id),
            (tsdb.models.organization_total_rejected, org_id),
            (tsdb.models.key_total_rejected, key_id),
        ])

    if reason in FILTER_STAT_KEYS_TO_VALUES:
        increment_list.append((FILTER_STAT_KEYS_TO_VALUES[reason], project_id))

    increment_list = [(model, key) for model, key in increment_list
                      if key is not None]
    if increment_list:
        tsdb.incr_multi(increment_list, timestamp=timestamp)

    # Send a snuba metrics payload.
    if random.random() <= options.get('snuba.track-outcomes-sample-rate'):
        outcomes_publisher.publish(
            outcomes['topic'],
            json.dumps({
                'timestamp': timestamp,
                'org_id': org_id,
                'project_id': project_id,
                'key_id': key_id,
                'outcome': outcome,
                'reason': reason,
            }))
Example #40
0
def track_outcome(
    org_id,
    project_id,
    key_id,
    outcome,
    reason=None,
    timestamp=None,
    event_id=None,
    category=None,
    quantity=None,
):
    """
    This is a central point to track org/project counters per incoming event.
    NB: This should only ever be called once per incoming event, which means
    it should only be called at the point we know the final outcome for the
    event (invalid, rate_limited, accepted, discarded, etc.)

    This increments all the relevant legacy RedisTSDB counters, as well as
    sending a single metric event to Kafka which can be used to reconstruct the
    counters with SnubaTSDB.
    """
    global outcomes_publisher
    if outcomes_publisher is None:
        outcomes_publisher = KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes["cluster"]])

    if quantity is None:
        quantity = 1

    assert isinstance(org_id, six.integer_types)
    assert isinstance(project_id, six.integer_types)
    assert isinstance(key_id, (type(None), six.integer_types))
    assert isinstance(outcome, Outcome)
    assert isinstance(timestamp, (type(None), datetime))
    assert isinstance(category, (type(None), DataCategory))
    assert isinstance(quantity, int)

    timestamp = timestamp or to_datetime(time.time())

    tsdb_in_consumer = decide_tsdb_in_consumer()

    if not tsdb_in_consumer:
        increment_list = list(
            tsdb_increments_from_outcome(
                org_id=org_id, project_id=project_id, key_id=key_id, outcome=outcome, reason=reason
            )
        )

        if increment_list:
            tsdb.incr_multi(increment_list, timestamp=timestamp)

        if project_id and event_id:
            mark_tsdb_incremented(project_id, event_id)

    # Send a snuba metrics payload.
    outcomes_publisher.publish(
        outcomes["topic"],
        json.dumps(
            {
                "timestamp": timestamp,
                "org_id": org_id,
                "project_id": project_id,
                "key_id": key_id,
                "outcome": outcome.value,
                "reason": reason,
                "event_id": event_id,
                "category": category,
                "quantity": quantity,
            }
        ),
    )

    metrics.incr(
        "events.outcomes",
        skip_internal=True,
        tags={"outcome": outcome.name.lower(), "reason": reason},
    )
Example #41
0
def create_failed_event(cache_key,
                        project_id,
                        issues,
                        event_id,
                        start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT)

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if reprocessing_active and \
       reprocessing.get_reprocessing_revision(project_id, cached=False) != \
       reprocessing_rev:
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False)
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={
                'reprocessing_active': reprocessing_active,
                'issues': issues
            },
        ).send_notification()
        ProjectOption.objects.set_value(project,
                                        'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'})
        error_logger.error('process.failed_raw.empty',
                           extra={'cache_key': cache_key})
        return True

    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(
            data['timestamp']).replace(tzinfo=timezone.utc),
        data=data)

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
Example #42
0
def track_outcome(org_id, project_id, key_id, outcome, reason=None, timestamp=None, event_id=None):
    """
    This is a central point to track org/project counters per incoming event.
    NB: This should only ever be called once per incoming event, which means
    it should only be called at the point we know the final outcome for the
    event (invalid, rate_limited, accepted, discarded, etc.)

    This increments all the relevant legacy RedisTSDB counters, as well as
    sending a single metric event to Kafka which can be used to reconstruct the
    counters with SnubaTSDB.
    """
    global outcomes_publisher
    if outcomes_publisher is None:
        outcomes_publisher = QueuedPublisherService(
            KafkaPublisher(settings.KAFKA_CLUSTERS[outcomes["cluster"]])
        )

    assert isinstance(org_id, six.integer_types)
    assert isinstance(project_id, six.integer_types)
    assert isinstance(key_id, (type(None), six.integer_types))
    assert isinstance(outcome, Outcome)
    assert isinstance(timestamp, (type(None), datetime))

    timestamp = timestamp or to_datetime(time.time())
    increment_list = []
    if outcome != Outcome.INVALID:
        # This simply preserves old behavior. We never counted invalid events
        # (too large, duplicate, CORS) toward regular `received` counts.
        increment_list.extend(
            [
                (tsdb.models.project_total_received, project_id),
                (tsdb.models.organization_total_received, org_id),
                (tsdb.models.key_total_received, key_id),
            ]
        )

    if outcome == Outcome.FILTERED:
        increment_list.extend(
            [
                (tsdb.models.project_total_blacklisted, project_id),
                (tsdb.models.organization_total_blacklisted, org_id),
                (tsdb.models.key_total_blacklisted, key_id),
            ]
        )
    elif outcome == Outcome.RATE_LIMITED:
        increment_list.extend(
            [
                (tsdb.models.project_total_rejected, project_id),
                (tsdb.models.organization_total_rejected, org_id),
                (tsdb.models.key_total_rejected, key_id),
            ]
        )

    if reason in FILTER_STAT_KEYS_TO_VALUES:
        increment_list.append((FILTER_STAT_KEYS_TO_VALUES[reason], project_id))

    increment_list = [(model, key) for model, key in increment_list if key is not None]
    if increment_list:
        tsdb.incr_multi(increment_list, timestamp=timestamp)

    # Send a snuba metrics payload.
    outcomes_publisher.publish(
        outcomes["topic"],
        json.dumps(
            {
                "timestamp": timestamp,
                "org_id": org_id,
                "project_id": project_id,
                "key_id": key_id,
                "outcome": outcome.value,
                "reason": reason,
                "event_id": event_id,
            }
        ),
    )

    metrics.incr(
        "events.outcomes",
        skip_internal=True,
        tags={"outcome": outcome.name.lower(), "reason": reason},
    )
Example #43
0
def digest(request):
    random = get_random(request)

    # TODO: Refactor all of these into something more manageable.
    org = Organization(id=1, slug="example", name="Example Organization")

    project = Project(id=1, slug="example", name="Example Project", organization=org)

    rules = {
        i: Rule(id=i, project=project, label="Rule #%s" % (i,))
        for i in range(1, random.randint(2, 4))
    }

    state = {
        "project": project,
        "groups": {},
        "rules": rules,
        "event_counts": {},
        "user_counts": {},
    }

    records = []

    event_sequence = itertools.count(1)
    group_generator = make_group_generator(random, project)

    for i in range(random.randint(1, 30)):
        group = next(group_generator)
        state["groups"][group.id] = group

        offset = timedelta(seconds=0)
        for i in range(random.randint(1, 10)):
            offset += timedelta(seconds=random.random() * 120)
            event = Event(
                id=next(event_sequence),
                event_id=uuid.uuid4().hex,
                project=project,
                group=group,
                message=group.message,
                data=load_data("python"),
                datetime=to_datetime(
                    random.randint(to_timestamp(group.first_seen), to_timestamp(group.last_seen))
                ),
            )

            records.append(
                Record(
                    event.event_id,
                    Notification(
                        event, random.sample(state["rules"], random.randint(1, len(state["rules"])))
                    ),
                    to_timestamp(event.datetime),
                )
            )

            state["event_counts"][group.id] = random.randint(10, 1e4)
            state["user_counts"][group.id] = random.randint(10, 1e4)

    digest = build_digest(project, records, state)
    start, end, counts = get_digest_metadata(digest)

    context = {
        "project": project,
        "counts": counts,
        "digest": digest,
        "start": start,
        "end": end,
        "referrer": "digest_email",
    }
    add_unsubscribe_link(context)

    return MailPreview(
        html_template="sentry/emails/digests/body.html",
        text_template="sentry/emails/digests/body.txt",
        context=context,
    ).render(request)
Example #44
0
 def datetime(self) -> Optional[datetime]:
     return to_datetime(self.timestamp)
Example #45
0
 def datetime(self):
     return to_datetime(self.timestamp)
Example #46
0
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    start_time = time()
    tsdb_start_time = to_datetime(start_time)
    should_filter, filter_reason = event_manager.should_filter()
    if should_filter:
        increment_list = [
            (tsdb.models.project_total_received, project.id),
            (tsdb.models.project_total_blacklisted, project.id),
            (tsdb.models.organization_total_received, project.organization_id),
            (tsdb.models.organization_total_blacklisted,
             project.organization_id),
            (tsdb.models.key_total_received, key.id),
            (tsdb.models.key_total_blacklisted, key.id),
        ]
        try:
            increment_list.append(
                (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id))
        # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES
        except KeyError:
            pass

        tsdb.incr_multi(
            increment_list,
            timestamp=tsdb_start_time,
        )

        metrics.incr('events.blacklisted',
                     tags={'reason': filter_reason},
                     skip_internal=False)
        event_filtered.send_robust(
            ip=remote_addr,
            project=project,
            sender=process_event,
        )
        raise APIForbidden('Event dropped due to filter: %s' %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug('Dropped event due to error with rate limiter')
        tsdb.incr_multi(
            [
                (tsdb.models.project_total_received, project.id),
                (tsdb.models.project_total_rejected, project.id),
                (tsdb.models.organization_total_received,
                 project.organization_id),
                (tsdb.models.organization_total_rejected,
                 project.organization_id),
                (tsdb.models.key_total_received, key.id),
                (tsdb.models.key_total_rejected, key.id),
            ],
            timestamp=tsdb_start_time,
        )
        metrics.incr(
            'events.dropped',
            tags={
                'reason': rate_limit.reason_code if rate_limit else 'unknown',
            },
            skip_internal=False,
        )
        event_dropped.send_robust(
            ip=remote_addr,
            project=project,
            reason_code=rate_limit.reason_code if rate_limit else None,
            sender=process_event,
        )
        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)
    else:
        tsdb.incr_multi(
            [
                (tsdb.models.project_total_received, project.id),
                (tsdb.models.organization_total_received,
                 project.organization_id),
                (tsdb.models.key_total_received, key.id),
            ],
            timestamp=tsdb_start_time,
        )

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    data = event_manager.get_data()
    del event_manager

    event_id = data['event_id']

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = 'ev:%s:%s' % (
        project.id,
        event_id,
    )

    if cache.get(cache_key) is not None:
        raise APIForbidden('An event with the same ID already exists (%s)' %
                           (event_id, ))

    scrub_ip_address = (
        org_options.get('sentry:require_scrub_ip_address', False)
        or project.get_option('sentry:scrub_ip_address', False))
    scrub_data = (org_options.get('sentry:require_scrub_data', False)
                  or project.get_option('sentry:scrub_data', True))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                            project.get_option(sensitive_fields_key, []))

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (org_options.get(exclude_fields_key, []) +
                          project.get_option(exclude_fields_key, []))

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults',
                                          False)
                          or project.get_option('sentry:scrub_defaults', True))

        SensitiveDataFilter(
            fields=sensitive_fields,
            include_defaults=scrub_defaults,
            exclude_fields=exclude_fields,
        ).apply(data)

    if scrub_ip_address:
        # We filter data immediately before it ever gets into the queue
        helper.ensure_does_not_have_ip(data)

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, '', 60 * 5)

    api_logger.debug('New event received (%s)', event_id)

    event_accepted.send_robust(
        ip=remote_addr,
        data=data,
        project=project,
        sender=process_event,
    )

    return event_id
Example #47
0
    def merge_frequencies(self,
                          model,
                          destination,
                          sources,
                          timestamp=None,
                          environment_ids=None):
        environment_ids = list((set(environment_ids) if environment_ids
                                is not None else set()).union([None]))

        self.validate_arguments([model], environment_ids)

        if not self.enable_frequency_sketches:
            return

        rollups = []
        for rollup, samples in self.rollups.items():
            _, series = self.get_optimal_rollup_series(
                to_datetime(
                    self.get_earliest_timestamp(rollup, timestamp=timestamp)),
                end=None,
                rollup=rollup,
            )
            rollups.append((rollup, map(to_datetime, series)))

        for (cluster, durable
             ), environment_ids in self.get_cluster_groups(environment_ids):
            exports = defaultdict(list)

            for source in sources:
                for rollup, series in rollups:
                    for timestamp in series:
                        keys = []
                        for environment_id in environment_ids:
                            keys.extend(
                                self.make_frequency_table_keys(
                                    model, rollup, to_timestamp(timestamp),
                                    source, environment_id))
                        arguments = ["EXPORT"] + list(
                            self.DEFAULT_SKETCH_PARAMETERS)
                        exports[source].extend([(CountMinScript, keys,
                                                 arguments), ["DEL"] + keys])

            try:
                responses = cluster.execute_commands(exports)
            except Exception:
                if durable:
                    raise
                else:
                    continue

            imports = []

            for source, results in responses.items():
                results = iter(results)
                for rollup, series in rollups:
                    for timestamp in series:
                        for environment_id, payload in zip(
                                environment_ids,
                                next(results).value):
                            imports.append((
                                CountMinScript,
                                self.make_frequency_table_keys(
                                    model,
                                    rollup,
                                    to_timestamp(timestamp),
                                    destination,
                                    environment_id,
                                ),
                                ["IMPORT"] +
                                list(self.DEFAULT_SKETCH_PARAMETERS) +
                                [payload],
                            ))
                        next(results)  # pop off the result of DEL

            try:
                cluster.execute_commands({destination: imports})
            except Exception:
                if durable:
                    raise
Example #48
0
def get_constrained_date_range(
    params,
    allowed_resolution: AllowedResolution = AllowedResolution.one_hour,
    max_points=MAX_POINTS,
) -> Tuple[datetime, datetime, int]:
    interval = parse_stats_period(params.get("interval", "1h"))
    interval = int(3600 if interval is None else interval.total_seconds())

    smallest_interval, interval_str = allowed_resolution.value
    if interval % smallest_interval != 0 or interval < smallest_interval:
        raise InvalidParams(
            f"The interval has to be a multiple of the minimum interval of {interval_str}."
        )

    if interval > ONE_DAY:
        raise InvalidParams("The interval has to be less than one day.")

    if ONE_DAY % interval != 0:
        raise InvalidParams(
            "The interval should divide one day without a remainder.")

    using_minute_resolution = interval % ONE_HOUR != 0

    start, end = get_date_range_from_params(params)
    now = get_now()

    # if `end` is explicitly given, we add a second to it, so it is treated as
    # inclusive. the rounding logic down below will take care of the rest.
    if params.get("end"):
        end += timedelta(seconds=1)

    date_range = end - start
    # round the range up to a multiple of the interval.
    # the minimum is 1h so the "totals" will not go out of sync, as they will
    # use the materialized storage due to no grouping on the `started` column.
    # NOTE: we can remove the difference between `interval` / `rounding_interval`
    # as soon as snuba can provide us with grouped totals in the same query
    # as the timeseries (using `WITH ROLLUP` in clickhouse)

    rounding_interval = int(math.ceil(interval / ONE_HOUR) * ONE_HOUR)

    # Hack to disabled rounding interval for metrics-based queries:
    if interval < ONE_MINUTE:
        rounding_interval = interval

    date_range = timedelta(
        seconds=int(rounding_interval *
                    math.ceil(date_range.total_seconds() / rounding_interval)))

    if using_minute_resolution:
        if date_range.total_seconds() > 6 * ONE_HOUR:
            raise InvalidParams(
                "The time-range when using one-minute resolution intervals is restricted to 6 hours."
            )
        if (now - start).total_seconds() > 30 * ONE_DAY:
            raise InvalidParams(
                "The time-range when using one-minute resolution intervals is restricted to the last 30 days."
            )

    if date_range.total_seconds() / interval > max_points:
        raise InvalidParams(
            "Your interval and date range would create too many results. "
            "Use a larger interval, or a smaller date range.")

    end_ts = int(rounding_interval *
                 math.ceil(to_timestamp(end) / rounding_interval))
    end = to_datetime(end_ts)
    # when expanding the rounding interval, we would adjust the end time too far
    # to the future, in which case the start time would not actually contain our
    # desired date range. adjust for this by extend the time by another interval.
    # for example, when "45m" means the range from 08:49:00-09:34:00, our rounding
    # has to go from 08:00:00 to 10:00:00.
    if rounding_interval > interval and (end - date_range) > start:
        date_range += timedelta(seconds=rounding_interval)
    start = end - date_range

    # snuba <-> sentry has a 5 minute cache for *exact* queries, which these
    # are because of the way we do our rounding. For that reason we round the end
    # of "realtime" queries to one minute into the future to get a one-minute cache instead.
    if end > now:
        end = to_datetime(ONE_MINUTE *
                          (math.floor(to_timestamp(now) / ONE_MINUTE) + 1))

    return start, end, interval
Example #49
0
    def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs):
        metrics.incr('events.total')

        if not data:
            raise APIError('No JSON data was found')

        remote_addr = request.META['REMOTE_ADDR']

        data = LazyData(
            data=data,
            content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''),
            helper=helper,
            project=project,
            key=key,
            auth=auth,
            client_ip=remote_addr,
        )

        event_received.send_robust(
            ip=remote_addr,
            project=project,
            sender=type(self),
        )
        start_time = time()
        tsdb_start_time = to_datetime(start_time)
        should_filter, filter_reason = helper.should_filter(
            project, data, ip_address=remote_addr)
        if should_filter:
            increment_list = [
                (tsdb.models.project_total_received, project.id),
                (tsdb.models.project_total_blacklisted, project.id),
                (tsdb.models.organization_total_received,
                 project.organization_id),
                (tsdb.models.organization_total_blacklisted,
                 project.organization_id),
                (tsdb.models.key_total_received, key.id),
                (tsdb.models.key_total_blacklisted, key.id),
            ]
            try:
                increment_list.append(
                    (FILTER_STAT_KEYS_TO_VALUES[filter_reason], project.id))
            # should error when filter_reason does not match a key in FILTER_STAT_KEYS_TO_VALUES
            except KeyError:
                pass

            tsdb.incr_multi(
                increment_list,
                timestamp=tsdb_start_time,
            )

            metrics.incr('events.blacklisted', tags={
                         'reason': filter_reason})
            event_filtered.send_robust(
                ip=remote_addr,
                project=project,
                sender=type(self),
            )
            raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,))

        # TODO: improve this API (e.g. make RateLimit act on __ne__)
        rate_limit = safe_execute(
            quotas.is_rate_limited, project=project, key=key, _with_transaction=False
        )
        if isinstance(rate_limit, bool):
            rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

        # XXX(dcramer): when the rate limiter fails we drop events to ensure
        # it cannot cascade
        if rate_limit is None or rate_limit.is_limited:
            if rate_limit is None:
                helper.log.debug(
                    'Dropped event due to error with rate limiter')
            tsdb.incr_multi(
                [
                    (tsdb.models.project_total_received, project.id),
                    (tsdb.models.project_total_rejected, project.id),
                    (tsdb.models.organization_total_received,
                     project.organization_id),
                    (tsdb.models.organization_total_rejected,
                     project.organization_id),
                    (tsdb.models.key_total_received, key.id),
                    (tsdb.models.key_total_rejected, key.id),
                ],
                timestamp=tsdb_start_time,
            )
            metrics.incr(
                'events.dropped',
                tags={
                    'reason': rate_limit.reason_code if rate_limit else 'unknown',
                }
            )
            event_dropped.send_robust(
                ip=remote_addr,
                project=project,
                sender=type(self),
                reason_code=rate_limit.reason_code if rate_limit else None,
            )
            if rate_limit is not None:
                raise APIRateLimited(rate_limit.retry_after)
        else:
            tsdb.incr_multi(
                [
                    (tsdb.models.project_total_received, project.id),
                    (tsdb.models.organization_total_received,
                     project.organization_id),
                    (tsdb.models.key_total_received, key.id),
                ],
                timestamp=tsdb_start_time,
            )

        org_options = OrganizationOption.objects.get_all_values(
            project.organization_id)

        event_id = data['event_id']

        # TODO(dcramer): ideally we'd only validate this if the event_id was
        # supplied by the user
        cache_key = 'ev:%s:%s' % (project.id, event_id, )

        if cache.get(cache_key) is not None:
            raise APIForbidden(
                'An event with the same ID already exists (%s)' % (event_id, ))

        scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or
                            project.get_option('sentry:scrub_ip_address', False))
        scrub_data = (org_options.get('sentry:require_scrub_data', False) or
                      project.get_option('sentry:scrub_data', True))

        if scrub_data:
            # We filter data immediately before it ever gets into the queue
            sensitive_fields_key = 'sentry:sensitive_fields'
            sensitive_fields = (
                org_options.get(sensitive_fields_key, []) +
                project.get_option(sensitive_fields_key, [])
            )

            exclude_fields_key = 'sentry:safe_fields'
            exclude_fields = (
                org_options.get(exclude_fields_key, []) +
                project.get_option(exclude_fields_key, [])
            )

            scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or
                              project.get_option('sentry:scrub_defaults', True))

            SensitiveDataFilter(
                fields=sensitive_fields,
                include_defaults=scrub_defaults,
                exclude_fields=exclude_fields,
            ).apply(data)

        if scrub_ip_address:
            # We filter data immediately before it ever gets into the queue
            helper.ensure_does_not_have_ip(data)

        # mutates data (strips a lot of context if not queued)
        helper.insert_data_to_database(data, start_time=start_time, attachments=attachments)

        cache.set(cache_key, '', 60 * 5)

        helper.log.debug('New event received (%s)', event_id)

        event_accepted.send_robust(
            ip=remote_addr,
            data=data,
            project=project,
            sender=type(self),
        )

        return event_id
Example #50
0
def create_failed_event(cache_key, project_id, issues, event_id, start_time=None,
                        reprocessing_rev=None):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    reprocessing_active = ProjectOption.objects.get_value(
        project_id, 'sentry:reprocessing_active', REPROCESSING_DEFAULT
    )

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if reprocessing_active and \
       reprocessing.get_reprocessing_revision(project_id, cached=False) != \
       reprocessing_rev:
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, 'sentry:sent_failed_event_hint', False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={'reprocessing_active': reprocessing_active,
                  'issues': issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, 'sentry:sent_failed_event_hint', True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr('events.failed', tags={'reason': 'cache', 'stage': 'raw'}, skip_internal=False)
        error_logger.error('process.failed_raw.empty', extra={'cache_key': cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue
    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data['timestamp']).replace(tzinfo=timezone.utc),
        data=data
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue['scope'],
            object=issue['object'],
            type=issue['type'],
            data=issue['data'],
        )

    default_cache.delete(cache_key)

    return True
Example #51
0
    def merge_frequencies(self, model, destination, sources, timestamp=None, environment_ids=None):
        environment_ids = list(
            (set(environment_ids) if environment_ids is not None else set()).union(
                [None]))

        self.validate_arguments([model], environment_ids)

        if not self.enable_frequency_sketches:
            return

        rollups = []
        for rollup, samples in self.rollups.items():
            _, series = self.get_optimal_rollup_series(
                to_datetime(self.get_earliest_timestamp(rollup, timestamp=timestamp)),
                end=None,
                rollup=rollup,
            )
            rollups.append((rollup, map(to_datetime, series), ))

        for (cluster, durable), environment_ids in self.get_cluster_groups(environment_ids):
            exports = defaultdict(list)

            for source in sources:
                for rollup, series in rollups:
                    for timestamp in series:
                        keys = []
                        for environment_id in environment_ids:
                            keys.extend(
                                self.make_frequency_table_keys(
                                    model,
                                    rollup,
                                    to_timestamp(timestamp),
                                    source,
                                    environment_id,
                                )
                            )
                        arguments = ['EXPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS)
                        exports[source].extend(
                            [
                                (CountMinScript, keys, arguments),
                                ['DEL'] + keys,
                            ]
                        )

            try:
                responses = cluster.execute_commands(exports)
            except Exception:
                if durable:
                    raise
                else:
                    continue

            imports = []

            for source, results in responses.items():
                results = iter(results)
                for rollup, series in rollups:
                    for timestamp in series:
                        for environment_id, payload in zip(environment_ids, next(results).value):
                            imports.append(
                                (
                                    CountMinScript,
                                    self.make_frequency_table_keys(
                                        model,
                                        rollup,
                                        to_timestamp(timestamp),
                                        destination,
                                        environment_id,
                                    ),
                                    ['IMPORT'] + list(self.DEFAULT_SKETCH_PARAMETERS) + [payload],
                                ),
                            )
                        next(results)  # pop off the result of DEL

            try:
                cluster.execute_commands({
                    destination: imports,
                })
            except Exception:
                if durable:
                    raise
Example #52
0
def get_project_release_stats(project_id, release, stat, rollup, start, end, environments=None):
    assert stat in ("users", "sessions")

    # since snuba end queries are exclusive of the time and we're bucketing to
    # a full hour, we need to round to the next hour since snuba is exclusive
    # on the end.
    end = to_datetime((to_timestamp(end) // DATASET_BUCKET + 1) * DATASET_BUCKET)

    filter_keys = {"project_id": [project_id]}
    conditions = [["release", "=", release]]
    if environments is not None:
        conditions.append(["environment", "IN", environments])

    buckets = int((end - start).total_seconds() / rollup)
    stats = _make_stats(start, rollup, buckets, default=None)

    # Due to the nature of the probabilistic data structures some
    # subtractions can become negative.  As such we're making sure a number
    # never goes below zero to avoid confusion.

    totals = {
        stat: 0,
        stat + "_healthy": 0,
        stat + "_crashed": 0,
        stat + "_abnormal": 0,
        stat + "_errored": 0,
    }

    for rv in raw_query(
        dataset=Dataset.Sessions,
        selected_columns=[
            "bucketed_started",
            stat,
            stat + "_crashed",
            stat + "_abnormal",
            stat + "_errored",
            "duration_quantiles",
        ],
        groupby=["bucketed_started"],
        start=start,
        end=end,
        rollup=rollup,
        conditions=conditions,
        filter_keys=filter_keys,
        referrer="sessions.release-stats-details",
    )["data"]:
        ts = parse_snuba_datetime(rv["bucketed_started"])
        bucket = int((ts - start).total_seconds() / rollup)
        stats[bucket][1] = {
            stat: rv[stat],
            stat + "_healthy": max(0, rv[stat] - rv[stat + "_errored"]),
            stat + "_crashed": rv[stat + "_crashed"],
            stat + "_abnormal": rv[stat + "_abnormal"],
            stat
            + "_errored": max(
                0, rv[stat + "_errored"] - rv[stat + "_crashed"] - rv[stat + "_abnormal"]
            ),
        }
        stats[bucket][1].update(extract_duration_quantiles(rv))

        # Session stats we can sum up directly without another query
        # as the data becomes available.
        if stat == "sessions":
            for k in totals:
                totals[k] += stats[bucket][1][k]

    for idx, bucket in enumerate(stats):
        if bucket[1] is None:
            stats[idx][1] = {
                stat: 0,
                stat + "_healthy": 0,
                stat + "_crashed": 0,
                stat + "_abnormal": 0,
                stat + "_errored": 0,
                "duration_p50": None,
                "duration_p90": None,
            }

    # For users we need a secondary query over the entire time range
    if stat == "users":
        rows = raw_query(
            dataset=Dataset.Sessions,
            selected_columns=["users", "users_crashed", "users_abnormal", "users_errored"],
            start=start,
            end=end,
            conditions=conditions,
            filter_keys=filter_keys,
            referrer="sessions.crash-free-breakdown-users",
        )["data"]
        if rows:
            rv = rows[0]
            totals = {
                "users": rv["users"],
                "users_healthy": max(0, rv["users"] - rv["users_errored"]),
                "users_crashed": rv["users_crashed"],
                "users_abnormal": rv["users_abnormal"],
                "users_errored": max(
                    0, rv["users_errored"] - rv["users_crashed"] - rv["users_abnormal"]
                ),
            }

    return stats, totals
Example #53
0
    def merge_distinct_counts(self,
                              model,
                              destination,
                              sources,
                              timestamp=None):
        rollups = {}
        for rollup, samples in self.rollups.items():
            _, series = self.get_optimal_rollup_series(
                to_datetime(
                    self.get_earliest_timestamp(rollup, timestamp=timestamp)),
                end=None,
                rollup=rollup,
            )
            rollups[rollup] = map(to_datetime, series)

        temporary_id = uuid.uuid1().hex

        def make_temporary_key(key):
            return '{}{}:{}'.format(self.prefix, temporary_id, key)

        data = {}
        for rollup, series in rollups.items():
            data[rollup] = {timestamp: [] for timestamp in series}

        with self.cluster.fanout() as client:
            for source in sources:
                c = client.target_key(source)
                for rollup, series in data.items():
                    for timestamp, results in series.items():
                        key = self.make_key(
                            model,
                            rollup,
                            to_timestamp(timestamp),
                            source,
                        )
                        results.append(c.get(key))
                        c.delete(key)

        with self.cluster.fanout() as client:
            c = client.target_key(destination)

            temporary_key_sequence = itertools.count()

            for rollup, series in data.items():
                for timestamp, results in series.items():
                    values = {}
                    for result in results:
                        if result.value is None:
                            continue
                        k = make_temporary_key(next(temporary_key_sequence))
                        values[k] = result.value

                    if values:
                        key = self.make_key(
                            model,
                            rollup,
                            to_timestamp(timestamp),
                            destination,
                        )
                        c.mset(values)
                        c.pfmerge(key, key, *values.keys())
                        c.delete(*values.keys())
                        c.expireat(
                            key,
                            self.calculate_expiry(
                                rollup,
                                self.rollups[rollup],
                                timestamp,
                            ),
                        )
Example #54
0
def create_failed_event(
    cache_key, data, project_id, issues, event_id, start_time=None, reprocessing_rev=None
):
    """If processing failed we put the original data from the cache into a
    raw event.  Returns `True` if a failed event was inserted
    """
    # We can only create failed events for events that can potentially
    # create failed events.
    if not reprocessing.event_supports_reprocessing(data):
        return False

    reprocessing_active = ProjectOption.objects.get_value(
        project_id, "sentry:reprocessing_active", REPROCESSING_DEFAULT
    )

    # In case there is reprocessing active but the current reprocessing
    # revision is already different than when we started, we want to
    # immediately retry the event.  This resolves the problem when
    # otherwise a concurrent change of debug symbols might leave a
    # reprocessing issue stuck in the project forever.
    if (
        reprocessing_active
        and reprocessing.get_reprocessing_revision(project_id, cached=False) != reprocessing_rev
    ):
        raise RetryProcessing()

    # The first time we encounter a failed event and the hint was cleared
    # we send a notification.
    sent_notification = ProjectOption.objects.get_value(
        project_id, "sentry:sent_failed_event_hint", False
    )
    if not sent_notification:
        project = Project.objects.get_from_cache(id=project_id)
        Activity.objects.create(
            type=Activity.NEW_PROCESSING_ISSUES,
            project=project,
            datetime=to_datetime(start_time),
            data={"reprocessing_active": reprocessing_active, "issues": issues},
        ).send_notification()
        ProjectOption.objects.set_value(project, "sentry:sent_failed_event_hint", True)

    # If reprocessing is not active we bail now without creating the
    # processing issues
    if not reprocessing_active:
        return False

    # We need to get the original data here instead of passing the data in
    # from the last processing step because we do not want any
    # modifications to take place.
    delete_raw_event(project_id, event_id)
    data = default_cache.get(cache_key)
    if data is None:
        metrics.incr("events.failed", tags={"reason": "cache", "stage": "raw"}, skip_internal=False)
        error_logger.error("process.failed_raw.empty", extra={"cache_key": cache_key})
        return True

    data = CanonicalKeyDict(data)
    from sentry.models import RawEvent, ProcessingIssue

    raw_event = RawEvent.objects.create(
        project_id=project_id,
        event_id=event_id,
        datetime=datetime.utcfromtimestamp(data["timestamp"]).replace(tzinfo=timezone.utc),
        data=data,
    )

    for issue in issues:
        ProcessingIssue.objects.record_processing_issue(
            raw_event=raw_event,
            scope=issue["scope"],
            object=issue["object"],
            type=issue["type"],
            data=issue["data"],
        )

    default_cache.delete(cache_key)

    return True
def alert(request):
    platform = request.GET.get('platform', 'python')
    org = Organization(
        id=1,
        slug='example',
        name='Example',
    )
    team = Team(
        id=1,
        slug='example',
        name='Example',
        organization=org,
    )
    project = Project(
        id=1,
        slug='example',
        name='Example',
        team=team,
        organization=org,
    )

    random = get_random(request)
    group = next(make_group_generator(random, project), )

    event = Event(
        id=1,
        event_id='44f1419e73884cd2b45c79918f4b6dc4',
        project=project,
        group=group,
        message=group.message,
        data=load_data(platform),
        datetime=to_datetime(
            random.randint(
                to_timestamp(group.first_seen),
                to_timestamp(group.last_seen),
            ), ),
    )

    rule = Rule(label="An example rule")

    interface_list = []
    for interface in six.itervalues(event.interfaces):
        body = interface.to_email_html(event)
        if not body:
            continue
        interface_list.append((interface.get_title(), mark_safe(body)))

    return MailPreview(
        html_template='sentry/emails/error.html',
        text_template='sentry/emails/error.txt',
        context={
            'rule':
            rule,
            'group':
            group,
            'event':
            event,
            'link':
            'http://example.com/link',
            'interfaces':
            interface_list,
            'tags':
            event.get_tags(),
            'project_label':
            project.name,
            'tags': [('logger', 'javascript'), ('environment', 'prod'),
                     ('level', 'error'), ('device', 'Other')]
        },
    ).render(request)
Example #56
0
def build_project_breakdown_series(reports):
    def get_legend_data(report):
        accepted_errors, accepted_transactions, filtered, rate_limited = report.series_outcomes
        return {
            "accepted_errors": accepted_errors,
            "accepted_transactions": accepted_transactions,
            "filtered": filtered,
            "rate_limited": rate_limited,
        }

    # Find the reports with the most total events. (The number of reports to
    # keep is the same as the number of colors available to use in the legend.)
    instances = map(
        operator.itemgetter(0),
        sorted(
            reports.items(),
            key=lambda instance__report: sum(
                sum(values) for timestamp, values in instance__report[1][0]),
            reverse=True,
        ),
    )[:len(project_breakdown_colors)]

    # Starting building the list of items to include in the report chart. This
    # is a list of [Key, Report] pairs, in *ascending* order of the total sum
    # of values in the series. (This is so when we render the series, the
    # largest color blocks are at the bottom and it feels appropriately
    # weighted.)
    selections = map(
        lambda instance__color: (
            Key(
                instance__color[0].slug,
                instance__color[0].get_absolute_url(),
                instance__color[1],
                get_legend_data(reports[instance__color[0]]),
            ),
            reports[instance__color[0]],
        ),
        zip(instances, project_breakdown_colors),
    )[::-1]

    # Collect any reports that weren't in the selection set, merge them
    # together and add it at the top (front) of the stack.
    overflow = set(reports) - set(instances)
    if overflow:
        overflow_report = reduce(merge_reports,
                                 [reports[instance] for instance in overflow])
        selections.insert(
            0, (Key("Other", None, "#f2f0fa",
                    get_legend_data(overflow_report)), overflow_report))

    def summarize(key, points):
        total = sum(points)
        return [(key, total)] if total else []

    # Collect all of the independent series into a single series to make it
    # easier to render, resulting in a series where each value is a sequence of
    # (key, count) pairs.
    series = reduce(
        merge_series,
        [
            series_map(partial(summarize, key), report[0])
            for key, report in selections
        ],
    )

    legend = [key for key, value in reversed(selections)]
    return {
        "points":
        [(to_datetime(timestamp), value) for timestamp, value in series],
        "maximum":
        max(sum(count for key, count in value) for timestamp, value in series),
        "legend": {
            "rows":
            legend,
            "total":
            Key("Total", None, total_color,
                reduce(merge_mappings, [key.data for key in legend])),
        },
    }
Example #57
0
    def get_data(self,
                 model,
                 keys,
                 start,
                 end,
                 rollup=None,
                 environment_id=None,
                 aggregation='count()',
                 group_on_model=True,
                 group_on_time=False):
        """
        Normalizes all the TSDB parameters and sends a query to snuba.

        `group_on_time`: whether to add a GROUP BY clause on the 'time' field.
        `group_on_model`: whether to add a GROUP BY clause on the primary model.
        """
        model_columns = self.model_columns.get(model)

        if model_columns is None:
            raise Exception("Unsupported TSDBModel: {}".format(model.name))

        model_group, model_aggregate = model_columns

        groupby = []
        if group_on_model and model_group is not None:
            groupby.append(model_group)
        if group_on_time:
            groupby.append('time')
        if aggregation == 'count()' and model_aggregate is not None:
            # Special case, because count has different semantics, we change:
            # `COUNT(model_aggregate)` to `COUNT() GROUP BY model_aggregate`
            groupby.append(model_aggregate)
            model_aggregate = None

        keys_map = dict(zip(model_columns, self.flatten_keys(keys)))
        keys_map = {
            k: v
            for k, v in six.iteritems(keys_map)
            if k is not None and v is not None
        }
        if environment_id is not None:
            keys_map['environment'] = [environment_id]

        aggregations = [[aggregation, model_aggregate, 'aggregate']]

        # For historical compatibility with bucket-counted TSDB implementations
        # we grab the original bucketed series and add the rollup time to the
        # timestamp of the last bucket to get the end time.
        rollup, series = self.get_optimal_rollup_series(start, end, rollup)
        start = to_datetime(series[0])
        end = to_datetime(series[-1] + rollup)

        result = snuba.query(start,
                             end,
                             groupby,
                             None,
                             keys_map,
                             aggregations,
                             rollup,
                             referrer='tsdb')

        if group_on_time:
            keys_map['time'] = series

        self.zerofill(result, groupby, keys_map)
        self.trim(result, groupby, keys)

        return result
Example #58
0
def _to_interval(timestamp, duration):
    return (
        to_datetime(timestamp - duration),
        to_datetime(timestamp),
    )
Example #59
0
def digest(request):
    random = get_random(request)

    # TODO: Refactor all of these into something more manageable.
    org = Organization(id=1, slug="example", name="Example Organization")

    project = Project(id=1,
                      slug="example",
                      name="Example Project",
                      organization=org)

    rules = {
        i: Rule(id=i, project=project, label="Rule #%s" % (i, ))
        for i in range(1, random.randint(2, 4))
    }

    state = {
        "project": project,
        "groups": {},
        "rules": rules,
        "event_counts": {},
        "user_counts": {},
    }

    records = []

    group_generator = make_group_generator(random, project)

    for i in range(random.randint(1, 30)):
        group = next(group_generator)
        state["groups"][group.id] = group

        offset = timedelta(seconds=0)
        for i in range(random.randint(1, 10)):
            offset += timedelta(seconds=random.random() * 120)

            data = dict(load_data("python"))
            data["message"] = group.message
            data.pop("logentry", None)

            event_manager = EventManager(data)
            event_manager.normalize()
            data = event_manager.get_data()

            timestamp = to_datetime(
                random.randint(to_timestamp(group.first_seen),
                               to_timestamp(group.last_seen)))

            event = SnubaEvent({
                "event_id":
                uuid.uuid4().hex,
                "project_id":
                project.id,
                "group_id":
                group.id,
                "message":
                group.message,
                "data":
                data.data,
                "timestamp":
                timestamp.strftime("%Y-%m-%dT%H:%M:%S"),
            })
            event.group = group

            records.append(
                Record(
                    event.event_id,
                    Notification(
                        event,
                        random.sample(state["rules"],
                                      random.randint(1, len(state["rules"])))),
                    to_timestamp(event.datetime),
                ))

            state["event_counts"][group.id] = random.randint(10, 1e4)
            state["user_counts"][group.id] = random.randint(10, 1e4)

    digest = build_digest(project, records, state)
    start, end, counts = get_digest_metadata(digest)

    context = {
        "project": project,
        "counts": counts,
        "digest": digest,
        "start": start,
        "end": end,
        "referrer": "digest_email",
    }
    add_unsubscribe_link(context)

    return MailPreview(
        html_template="sentry/emails/digests/body.html",
        text_template="sentry/emails/digests/body.txt",
        context=context,
    ).render(request)