def test_quantize_time_matches_duration(self): """ The number of seconds between keys changing should match duration """ previous_key = quantize_time(self.now, 0, duration=10) changes = [] for i in range(21): current_time = self.now + timedelta(seconds=i) current_key = quantize_time(current_time, 0, duration=10) if current_key != previous_key: changes.append(current_time) previous_key = current_key assert len(changes) == 2 assert (changes[1] - changes[0]).total_seconds() == 10
def test_quantize_day_edges(self): """a suffix should still behave correctly around the end of a day This test is nearly identical to test_quantize_hour_edges, but is to confirm that date changes don't cause a different behaviour """ before = datetime(2019, 9, 5, 23, 59, 59) next_day = datetime(2019, 9, 6, 0, 0, 0) changed_on_hour = 0 for key_hash in range(10): before_key = quantize_time(before, key_hash, duration=10) next_key = quantize_time(next_day, key_hash, duration=10) if before_key != next_key: changed_on_hour += 1 assert changed_on_hour == 1
def test_quantize_hour_edges(self): """a suffix should still behave correctly around the end of the hour At a duration of 10 only one key between 0-10 should flip on the hour, the other 9 should flip at different times. """ before = datetime(2019, 9, 5, 17, 59, 59) on_hour = datetime(2019, 9, 5, 18, 0, 0) changed_on_hour = 0 # Check multiple keyhashes so that this test doesn't depend on implementation for key_hash in range(10): before_key = quantize_time(before, key_hash, duration=10) on_key = quantize_time(on_hour, key_hash, duration=10) if before_key != on_key: changed_on_hour += 1 assert changed_on_hour == 1
def test_quantize_time_jitter(self): """ Different key hashes should change keys at different times While starting_key and other_key might begin as the same values they should change at different times """ starting_key = quantize_time(self.now, 0, duration=10) for i in range(11): current_key = quantize_time(self.now + timedelta(seconds=i), 0, duration=10) if current_key != starting_key: break other_key = quantize_time(self.now, 5, duration=10) for j in range(11): current_key = quantize_time(self.now + timedelta(seconds=j), 5, duration=10) if current_key != other_key: break assert i != j
def quantize_date_params(self, request: Request, params: Dict[str, Any]) -> Dict[str, Any]: # We only need to perform this rounding on relative date periods if "statsPeriod" not in request.GET: return params results = params.copy() duration = (params["end"] - params["start"]).total_seconds() # Only perform rounding on durations longer than an hour if duration > 3600: # Round to 15 minutes if over 30 days, otherwise round to the minute round_to = 15 * 60 if duration >= 30 * 24 * 3600 else 60 for key in ["start", "end"]: results[key] = snuba.quantize_time( params[key], params.get("organization_id", 0), duration=round_to ) return results
def test_cache_suffix_time(self): starting_key = quantize_time(self.now, 0) finishing_key = quantize_time(self.now + timedelta(seconds=300), 0) assert starting_key != finishing_key
def __get_tag_keys_for_projects( self, projects, group_id, environments, start, end, limit=1000, keys=None, include_values_seen=True, use_cache=False, **kwargs ): """ Query snuba for tag keys based on projects When use_cache is passed, we'll attempt to use the cache. There's an exception if group_id was passed which refines the query enough caching isn't required. The cache key is based on the filters being passed so that different queries don't hit the same cache, with exceptions for start and end dates. Since even a microsecond passing would result in a different caching key, which means always missing the cache. Instead, to keep the cache key the same for a short period we append the duration, and the end time rounded with a certain jitter to the cache key. This jitter is based on the hash of the key before duration/end time is added for consistency per query. The jitter's intent is to avoid a dogpile effect of many queries being invalidated at the same time. This is done by changing the rounding of the end key to a random offset. See snuba.quantize_time for further explanation of how that is done. """ default_start, default_end = default_start_end_dates() if start is None: start = default_start if end is None: end = default_end filters = {"project_id": sorted(projects)} if environments: filters["environment"] = sorted(environments) if group_id is not None: filters["group_id"] = [group_id] if keys is not None: filters["tags_key"] = sorted(keys) aggregations = [["count()", "", "count"]] if include_values_seen: aggregations.append(["uniq", "tags_value", "values_seen"]) conditions = [] should_cache = use_cache and group_id is None result = None if should_cache: filtering_strings = [ u"{}={}".format(key, value) for key, value in six.iteritems(filters) ] cache_key = u"tagstore.__get_tag_keys:{}".format( md5_text(*filtering_strings).hexdigest() ) key_hash = hash(cache_key) should_cache = (key_hash % 1000) / 1000.0 <= options.get( "snuba.tagstore.cache-tagkeys-rate" ) # If we want to continue attempting to cache after checking against the cache rate if should_cache: # Needs to happen before creating the cache suffix otherwise rounding will cause different durations duration = (end - start).total_seconds() # Cause there's rounding to create this cache suffix, we want to update the query end so results match end = snuba.quantize_time(end, key_hash) cache_key += u":{}@{}".format(duration, end.isoformat()) result = cache.get(cache_key, None) if result is not None: metrics.incr("testing.tagstore.cache_tag_key.hit") else: metrics.incr("testing.tagstore.cache_tag_key.miss") if result is None: result = snuba.query( start=start, end=end, groupby=["tags_key"], conditions=conditions, filter_keys=filters, aggregations=aggregations, limit=limit, orderby="-count", referrer="tagstore.__get_tag_keys", **kwargs ) if should_cache: cache.set(cache_key, result, 300) metrics.incr("testing.tagstore.cache_tag_key.len", amount=len(result)) if group_id is None: ctor = TagKey else: ctor = functools.partial(GroupTagKey, group_id=group_id) results = set() for key, data in six.iteritems(result): params = {"key": key} if include_values_seen: params["values_seen"] = data["values_seen"] params["count"] = data["count"] else: # If only one aggregate is requested then data is just that raw # aggregate value, rather than a dictionary of # key:aggregate_value pairs params["count"] = data results.add(ctor(**params)) return results