Beispiel #1
0
    def test_quantize_time_matches_duration(self):
        """ The number of seconds between keys changing should match duration """
        previous_key = quantize_time(self.now, 0, duration=10)
        changes = []
        for i in range(21):
            current_time = self.now + timedelta(seconds=i)
            current_key = quantize_time(current_time, 0, duration=10)
            if current_key != previous_key:
                changes.append(current_time)
                previous_key = current_key

        assert len(changes) == 2
        assert (changes[1] - changes[0]).total_seconds() == 10
Beispiel #2
0
    def test_quantize_day_edges(self):
        """a suffix should still behave correctly around the end of a day

        This test is nearly identical to test_quantize_hour_edges, but is to confirm that date changes don't
        cause a different behaviour
        """
        before = datetime(2019, 9, 5, 23, 59, 59)
        next_day = datetime(2019, 9, 6, 0, 0, 0)
        changed_on_hour = 0
        for key_hash in range(10):
            before_key = quantize_time(before, key_hash, duration=10)
            next_key = quantize_time(next_day, key_hash, duration=10)
            if before_key != next_key:
                changed_on_hour += 1

        assert changed_on_hour == 1
Beispiel #3
0
    def test_quantize_hour_edges(self):
        """a suffix should still behave correctly around the end of the hour

        At a duration of 10 only one key between 0-10 should flip on the hour, the other 9
        should flip at different times.
        """
        before = datetime(2019, 9, 5, 17, 59, 59)
        on_hour = datetime(2019, 9, 5, 18, 0, 0)
        changed_on_hour = 0
        # Check multiple keyhashes so that this test doesn't depend on implementation
        for key_hash in range(10):
            before_key = quantize_time(before, key_hash, duration=10)
            on_key = quantize_time(on_hour, key_hash, duration=10)
            if before_key != on_key:
                changed_on_hour += 1

        assert changed_on_hour == 1
Beispiel #4
0
    def test_quantize_time_jitter(self):
        """ Different key hashes should change keys at different times

            While starting_key and other_key might begin as the same values they should change at different times
        """
        starting_key = quantize_time(self.now, 0, duration=10)
        for i in range(11):
            current_key = quantize_time(self.now + timedelta(seconds=i), 0, duration=10)
            if current_key != starting_key:
                break

        other_key = quantize_time(self.now, 5, duration=10)
        for j in range(11):
            current_key = quantize_time(self.now + timedelta(seconds=j), 5, duration=10)
            if current_key != other_key:
                break

        assert i != j
Beispiel #5
0
 def quantize_date_params(self, request: Request, params: Dict[str, Any]) -> Dict[str, Any]:
     # We only need to perform this rounding on relative date periods
     if "statsPeriod" not in request.GET:
         return params
     results = params.copy()
     duration = (params["end"] - params["start"]).total_seconds()
     # Only perform rounding on durations longer than an hour
     if duration > 3600:
         # Round to 15 minutes if over 30 days, otherwise round to the minute
         round_to = 15 * 60 if duration >= 30 * 24 * 3600 else 60
         for key in ["start", "end"]:
             results[key] = snuba.quantize_time(
                 params[key], params.get("organization_id", 0), duration=round_to
             )
     return results
Beispiel #6
0
    def test_cache_suffix_time(self):
        starting_key = quantize_time(self.now, 0)
        finishing_key = quantize_time(self.now + timedelta(seconds=300), 0)

        assert starting_key != finishing_key
Beispiel #7
0
    def __get_tag_keys_for_projects(
        self,
        projects,
        group_id,
        environments,
        start,
        end,
        limit=1000,
        keys=None,
        include_values_seen=True,
        use_cache=False,
        **kwargs
    ):
        """ Query snuba for tag keys based on projects

            When use_cache is passed, we'll attempt to use the cache. There's an exception if group_id was passed
            which refines the query enough caching isn't required.
            The cache key is based on the filters being passed so that different queries don't hit the same cache, with
            exceptions for start and end dates. Since even a microsecond passing would result in a different caching
            key, which means always missing the cache.
            Instead, to keep the cache key the same for a short period we append the duration, and the end time rounded
            with a certain jitter to the cache key.
            This jitter is based on the hash of the key before duration/end time is added for consistency per query.
            The jitter's intent is to avoid a dogpile effect of many queries being invalidated at the same time.
            This is done by changing the rounding of the end key to a random offset. See snuba.quantize_time for
            further explanation of how that is done.
        """
        default_start, default_end = default_start_end_dates()
        if start is None:
            start = default_start
        if end is None:
            end = default_end

        filters = {"project_id": sorted(projects)}
        if environments:
            filters["environment"] = sorted(environments)
        if group_id is not None:
            filters["group_id"] = [group_id]
        if keys is not None:
            filters["tags_key"] = sorted(keys)
        aggregations = [["count()", "", "count"]]

        if include_values_seen:
            aggregations.append(["uniq", "tags_value", "values_seen"])
        conditions = []

        should_cache = use_cache and group_id is None
        result = None

        if should_cache:
            filtering_strings = [
                u"{}={}".format(key, value) for key, value in six.iteritems(filters)
            ]
            cache_key = u"tagstore.__get_tag_keys:{}".format(
                md5_text(*filtering_strings).hexdigest()
            )
            key_hash = hash(cache_key)
            should_cache = (key_hash % 1000) / 1000.0 <= options.get(
                "snuba.tagstore.cache-tagkeys-rate"
            )

        # If we want to continue attempting to cache after checking against the cache rate
        if should_cache:
            # Needs to happen before creating the cache suffix otherwise rounding will cause different durations
            duration = (end - start).total_seconds()
            # Cause there's rounding to create this cache suffix, we want to update the query end so results match
            end = snuba.quantize_time(end, key_hash)
            cache_key += u":{}@{}".format(duration, end.isoformat())
            result = cache.get(cache_key, None)
            if result is not None:
                metrics.incr("testing.tagstore.cache_tag_key.hit")
            else:
                metrics.incr("testing.tagstore.cache_tag_key.miss")

        if result is None:
            result = snuba.query(
                start=start,
                end=end,
                groupby=["tags_key"],
                conditions=conditions,
                filter_keys=filters,
                aggregations=aggregations,
                limit=limit,
                orderby="-count",
                referrer="tagstore.__get_tag_keys",
                **kwargs
            )
            if should_cache:
                cache.set(cache_key, result, 300)
                metrics.incr("testing.tagstore.cache_tag_key.len", amount=len(result))

        if group_id is None:
            ctor = TagKey
        else:
            ctor = functools.partial(GroupTagKey, group_id=group_id)

        results = set()
        for key, data in six.iteritems(result):
            params = {"key": key}
            if include_values_seen:
                params["values_seen"] = data["values_seen"]
                params["count"] = data["count"]
            else:
                # If only one aggregate is requested then data is just that raw
                # aggregate value, rather than a dictionary of
                # key:aggregate_value pairs
                params["count"] = data
            results.add(ctor(**params))
        return results