def test_epoch_tz_aware(self): dt = pytz.utc.localize(datetime(1970, 1, 1, 0, 10, 0, 0)) self.assertEqual(util.epoch(dt), 600) dt = pytz.timezone('Europe/Berlin').localize( datetime(1970, 1, 1, 1, 10, 0, 0)) self.assertEqual(util.epoch(dt), 600)
def fetchData(requestContext, pathExpr): seriesList = [] startTime = int( epoch( requestContext['startTime'] ) ) endTime = int( epoch( requestContext['endTime'] ) ) def _fetchData(pathExpr,startTime, endTime, requestContext, seriesList): matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly']) log.info("matching_nodes " + str([node for node in matching_nodes])) log.info("is leafs" + str([node.is_leaf for node in matching_nodes])) fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf] for node, results in fetches: if isinstance(results, FetchInProgress): results = results.waitForResults() if not results: log.info("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception("could not parse timeInfo/values from metric '%s': %s" % (node.path, e)) (start, end, step) = timeInfo series = TimeSeries(node.path, start, end, step, values) series.pathExpression = pathExpr #hack to pass expressions through to render functions seriesList.append(series) # Prune empty series with duplicate metric paths to avoid showing empty graph elements for old whisper data names = set([ s.name for s in seriesList ]) for name in names: series_with_duplicate_names = [ s for s in seriesList if s.name == name ] empty_duplicates = [ s for s in series_with_duplicate_names if not nonempty(s) ] if series_with_duplicate_names == empty_duplicates and len(empty_duplicates) > 0: # if they're all empty empty_duplicates.pop() # make sure we leave one in seriesList for series in empty_duplicates: seriesList.remove(series) return seriesList retries = 1 # start counting at one to make log output and settings more readable while True: try: seriesList = _fetchData(pathExpr,startTime, endTime, requestContext, seriesList) return seriesList except Exception, e: if retries >= settings.MAX_FETCH_RETRIES: log.exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e)) raise Exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e)) else: log.exception("Got an exception when fetching data! See: %s Will do it again! Run: %i of %i" % (e, retries, settings.MAX_FETCH_RETRIES)) retries += 1
def test_epoch_naive(self, mock_log): with self.settings(TIME_ZONE='UTC'): dt = datetime(1970, 1, 1, 0, 10, 0, 0) self.assertEqual(util.epoch(dt), 600) self.assertEqual(mock_log.call_count, 1) self.assertEqual(len(mock_log.call_args[0]), 1) self.assertRegexpMatches(mock_log.call_args[0][0], 'epoch\(\) called with non-timezone-aware datetime in test_epoch_naive at .+/webapp/tests/test_util\.py:[0-9]+') with self.settings(TIME_ZONE='Europe/Berlin'): dt = datetime(1970, 1, 1, 1, 10, 0, 0) self.assertEqual(util.epoch(dt), 600) self.assertEqual(mock_log.call_count, 2) self.assertEqual(len(mock_log.call_args[0]), 1) self.assertRegexpMatches(mock_log.call_args[0][0], 'epoch\(\) called with non-timezone-aware datetime in test_epoch_naive at .+/webapp/tests/test_util\.py:[0-9]+')
def default(self, obj): if isinstance(obj, datetime.datetime): return epoch(obj) return json.JSONEncoder.default(self, obj)
def find_view(request): "View for finding metrics matching a given pattern" queryParams = request.GET.copy() queryParams.update(request.POST) format = queryParams.get('format', 'treejson') leaves_only = queryParamAsInt(queryParams, 'leavesOnly', 0) local_only = queryParamAsInt(queryParams, 'local', 0) wildcards = queryParamAsInt(queryParams, 'wildcards', 0) tzinfo = pytz.timezone(settings.TIME_ZONE) if 'tz' in queryParams: try: value = queryParams['tz'] tzinfo = pytz.timezone(value) except pytz.UnknownTimeZoneError: pass except Exception as e: raise InputParameterError( 'Invalid value {value} for param tz: {err}'.format( value=repr(value), err=str(e))) if 'now' in queryParams: try: value = queryParams['now'] now = parseATTime(value, tzinfo) except Exception as e: raise InputParameterError( 'Invalid value {value} for param now: {err}'.format( value=repr(value), err=str(e))) else: now = datetime.now(tzinfo) if 'from' in queryParams and str(queryParams['from']) != '-1': try: value = queryParams['from'] fromTime = int(epoch(parseATTime(value, tzinfo, now))) except Exception as e: raise InputParameterError( 'Invalid value {value} for param from: {err}'.format( value=repr(value), err=str(e))) else: fromTime = -1 if 'until' in queryParams and str(queryParams['until']) != '-1': try: value = queryParams['until'] untilTime = int(epoch(parseATTime(value, tzinfo, now))) except Exception as e: raise InputParameterError( 'Invalid value {value} for param until: {err}'.format( value=repr(value), err=str(e))) else: untilTime = -1 nodePosition = queryParamAsInt(queryParams, 'position', -1) jsonp = queryParams.get('jsonp', False) forward_headers = extractForwardHeaders(request) if fromTime == -1: fromTime = None if untilTime == -1: untilTime = None automatic_variants = queryParamAsInt(queryParams, 'automatic_variants', 0) try: query = str(queryParams['query']) except KeyError: raise InputParameterError('Missing required parameter \'query\'') if query == '': raise InputParameterError('Required parameter \'query\' is empty') if '.' in query: base_path = query.rsplit('.', 1)[0] + '.' else: base_path = '' if format == 'completer': query = query.replace('..', '*.') if not query.endswith('*'): query += '*' if automatic_variants: query_parts = query.split('.') for i, part in enumerate(query_parts): if ',' in part and '{' not in part: query_parts[i] = '{%s}' % part query = '.'.join(query_parts) try: matches = list( STORE.find( query, fromTime, untilTime, local=local_only, headers=forward_headers, leaves_only=leaves_only, )) except Exception: log.exception() raise log.info('find_view query=%s local_only=%s matches=%d' % (query, local_only, len(matches))) matches.sort(key=lambda node: node.name) log.info( "received remote find request: pattern=%s from=%s until=%s local_only=%s format=%s matches=%d" % (query, fromTime, untilTime, local_only, format, len(matches))) if format == 'treejson': profile = getProfile(request) content = tree_json(matches, base_path, wildcards=profile.advancedUI or wildcards) response = json_response_for(request, content, jsonp=jsonp) elif format == 'nodelist': content = nodes_by_position(matches, nodePosition) response = json_response_for(request, content, jsonp=jsonp) elif format == 'pickle': content = pickle_nodes(matches) response = HttpResponse(content, content_type='application/pickle') elif format == 'msgpack': content = msgpack_nodes(matches) response = HttpResponse(content, content_type='application/x-msgpack') elif format == 'json': content = json_nodes(matches) response = json_response_for(request, content, jsonp=jsonp) elif format == 'completer': results = [] for node in matches: node_info = dict(path=node.path, name=node.name, is_leaf=str(int(node.is_leaf))) if not node.is_leaf: node_info['path'] += '.' results.append(node_info) if len(results) > 1 and wildcards: wildcardNode = {'name': '*'} results.append(wildcardNode) response = json_response_for(request, {'metrics': results}, jsonp=jsonp) else: return HttpResponseBadRequest( content="Invalid value for 'format' parameter", content_type='text/plain') response['Pragma'] = 'no-cache' response['Cache-Control'] = 'no-cache' return response
def fetchData(requestContext, pathExpr): seriesList = {} startTime = int(epoch(requestContext['startTime'])) endTime = int(epoch(requestContext['endTime'])) def _fetchData(pathExpr, startTime, endTime, requestContext, seriesList): matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly']) fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf] for node, results in fetches: if isinstance(results, FetchInProgress): results = results.waitForResults() if not results: log.info( "render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception( "could not parse timeInfo/values from metric '%s': %s" % (node.path, e)) (start, end, step) = timeInfo series = TimeSeries(node.path, start, end, step, values) series.pathExpression = pathExpr #hack to pass expressions through to render functions # Used as a cache to avoid recounting series None values below. series_best_nones = {} if series.name in seriesList: # This counts the Nones in each series, and is unfortunately O(n) for each # series, which may be worth further optimization. The value of doing this # at all is to avoid the "flipping" effect of loading a graph multiple times # and having inconsistent data returned if one of the backing stores has # inconsistent data. This is imperfect as a validity test, but in practice # nicely keeps us using the "most complete" dataset available. Think of it # as a very weak CRDT resolver. candidate_nones = 0 if not settings.REMOTE_STORE_MERGE_RESULTS: candidate_nones = len( [val for val in series['values'] if val is None]) known = seriesList[series.name] # To avoid repeatedly recounting the 'Nones' in series we've already seen, # cache the best known count so far in a dict. if known.name in series_best_nones: known_nones = series_best_nones[known.name] else: known_nones = len([val for val in known if val is None]) if known_nones > candidate_nones: if settings.REMOTE_STORE_MERGE_RESULTS: # This series has potential data that might be missing from # earlier series. Attempt to merge in useful data and update # the cache count. log.info("Merging multiple TimeSeries for %s" % known.name) for i, j in enumerate(known): if j is None and series[i] is not None: known[i] = series[i] known_nones -= 1 # Store known_nones in our cache series_best_nones[known.name] = known_nones else: # Not merging data - # we've found a series better than what we've already seen. Update # the count cache and replace the given series in the array. series_best_nones[known.name] = candidate_nones seriesList[known.name] = series else: # In case if we are merging data - the existing series has no gaps and there is nothing to merge # together. Save ourselves some work here. # # OR - if we picking best serie: # # We already have this series in the seriesList, and the # candidate is 'worse' than what we already have, we don't need # to compare anything else. Save ourselves some work here. break # If we looked at this series above, and it matched a 'known' # series already, then it's already in the series list (or ignored). # If not, append it here. else: seriesList[series.name] = series # Stabilize the order of the results by ordering the resulting series by name. # This returns the result ordering to the behavior observed pre PR#1010. return [seriesList[k] for k in sorted(seriesList)] retries = 1 # start counting at one to make log output and settings more readable while True: try: seriesList = _fetchData(pathExpr, startTime, endTime, requestContext, seriesList) return seriesList except Exception, e: if retries >= settings.MAX_FETCH_RETRIES: log.exception("Failed after %s retry! Root cause:\n%s" % (settings.MAX_FETCH_RETRIES, format_exc())) raise e else: log.exception( "Got an exception when fetching data! Try: %i of %i. Root cause:\n%s" % (retries, settings.MAX_FETCH_RETRIES, format_exc())) retries += 1
def find_view(request): "View for finding metrics matching a given pattern" queryParams = request.GET.copy() queryParams.update(request.POST) format = queryParams.get('format', 'treejson') leaves_only = int( queryParams.get('leavesOnly', 0) ) local_only = int( queryParams.get('local', 0) ) wildcards = int( queryParams.get('wildcards', 0) ) tzinfo = pytz.timezone(settings.TIME_ZONE) if 'tz' in queryParams: try: tzinfo = pytz.timezone(queryParams['tz']) except pytz.UnknownTimeZoneError: pass if 'now' in queryParams: now = parseATTime(queryParams['now'], tzinfo) else: now = datetime.now(tzinfo) if 'from' in queryParams and str(queryParams['from']) != '-1': fromTime = int(epoch(parseATTime(queryParams['from'], tzinfo, now))) else: fromTime = -1 if 'until' in queryParams and str(queryParams['from']) != '-1': untilTime = int(epoch(parseATTime(queryParams['until'], tzinfo, now))) else: untilTime = -1 nodePosition = int( queryParams.get('position', -1) ) jsonp = queryParams.get('jsonp', False) forward_headers = extractForwardHeaders(request) if fromTime == -1: fromTime = None if untilTime == -1: untilTime = None automatic_variants = int( queryParams.get('automatic_variants', 0) ) try: query = str(queryParams['query']) except KeyError: return HttpResponseBadRequest(content="Missing required parameter 'query'", content_type='text/plain') if query == '': return HttpResponseBadRequest(content="Required parameter 'query' is empty", content_type='text/plain') if '.' in query: base_path = query.rsplit('.', 1)[0] + '.' else: base_path = '' if format == 'completer': query = query.replace('..', '*.') if not query.endswith('*'): query += '*' if automatic_variants: query_parts = query.split('.') for i,part in enumerate(query_parts): if ',' in part and '{' not in part: query_parts[i] = '{%s}' % part query = '.'.join(query_parts) try: matches = list(STORE.find( query, fromTime, untilTime, local=local_only, headers=forward_headers, leaves_only=leaves_only, )) except Exception: log.exception() raise log.info('find_view query=%s local_only=%s matches=%d' % (query, local_only, len(matches))) matches.sort(key=lambda node: node.name) log.info("received remote find request: pattern=%s from=%s until=%s local_only=%s format=%s matches=%d" % (query, fromTime, untilTime, local_only, format, len(matches))) if format == 'treejson': profile = getProfile(request) content = tree_json(matches, base_path, wildcards=profile.advancedUI or wildcards) response = json_response_for(request, content, jsonp=jsonp) elif format == 'nodelist': content = nodes_by_position(matches, nodePosition) response = json_response_for(request, content, jsonp=jsonp) elif format == 'pickle': content = pickle_nodes(matches) response = HttpResponse(content, content_type='application/pickle') elif format == 'msgpack': content = msgpack_nodes(matches) response = HttpResponse(content, content_type='application/x-msgpack') elif format == 'json': content = json_nodes(matches) response = json_response_for(request, content, jsonp=jsonp) elif format == 'completer': results = [] for node in matches: node_info = dict(path=node.path, name=node.name, is_leaf=str(int(node.is_leaf))) if not node.is_leaf: node_info['path'] += '.' results.append(node_info) if len(results) > 1 and wildcards: wildcardNode = {'name' : '*'} results.append(wildcardNode) response = json_response_for(request, { 'metrics' : results }, jsonp=jsonp) else: return HttpResponseBadRequest( content="Invalid value for 'format' parameter", content_type='text/plain') response['Pragma'] = 'no-cache' response['Cache-Control'] = 'no-cache' return response
def fetchData(requestContext, pathExpr): seriesList = [] startTime = int(epoch(requestContext['startTime'])) endTime = int(epoch(requestContext['endTime'])) def _fetchData(pathExpr, startTime, endTime, requestContext, seriesList): matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly']) fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf] for node, results in fetches: if isinstance(results, FetchInProgress): results = results.waitForResults() if not results: log.info( "render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception( "could not parse timeInfo/values from metric '%s': %s" % (node.path, e)) (start, end, step) = timeInfo series = TimeSeries(node.path, start, end, step, values) series.pathExpression = pathExpr #hack to pass expressions through to render functions seriesList.append(series) # Prune empty series with duplicate metric paths to avoid showing empty graph elements for old whisper data names = set([s.name for s in seriesList]) for name in names: series_with_duplicate_names = [ s for s in seriesList if s.name == name ] empty_duplicates = [ s for s in series_with_duplicate_names if not nonempty(s) ] if series_with_duplicate_names == empty_duplicates and len( empty_duplicates) > 0: # if they're all empty empty_duplicates.pop() # make sure we leave one in seriesList for series in empty_duplicates: seriesList.remove(series) return seriesList retries = 1 # start counting at one to make log output and settings more readable while True: try: seriesList = _fetchData(pathExpr, startTime, endTime, requestContext, seriesList) return seriesList except Exception, e: if retries >= settings.MAX_FETCH_RETRIES: log.exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e)) raise Exception("Failed after %i retry! See: %s" % (settings.MAX_FETCH_RETRIES, e)) else: log.exception( "Got an exception when fetching data! See: %s Will do it again! Run: %i of %i" % (e, retries, settings.MAX_FETCH_RETRIES)) retries += 1
def test_epoch_tz_aware(self): dt = pytz.utc.localize(datetime(1970, 1, 1, 0, 10, 0, 0)) self.assertEqual(util.epoch(dt), 600) dt = pytz.timezone('Europe/Berlin').localize(datetime(1970, 1, 1, 1, 10, 0, 0)) self.assertEqual(util.epoch(dt), 600)
def fetchData(requestContext, pathExpr): seriesList = {} startTime = int( epoch( requestContext['startTime'] ) ) endTime = int( epoch( requestContext['endTime'] ) ) def _fetchData(pathExpr,startTime, endTime, requestContext, seriesList): matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly']) fetches = [(node, node.fetch(startTime, endTime)) for node in matching_nodes if node.is_leaf] for node, results in fetches: if isinstance(results, FetchInProgress): results = results.waitForResults() if not results: log.info("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception("could not parse timeInfo/values from metric '%s': %s" % (node.path, e)) (start, end, step) = timeInfo series = TimeSeries(node.path, start, end, step, values) series.pathExpression = pathExpr #hack to pass expressions through to render functions # Used as a cache to avoid recounting series None values below. series_best_nones = {} if series.name in seriesList: # This counts the Nones in each series, and is unfortunately O(n) for each # series, which may be worth further optimization. The value of doing this # at all is to avoid the "flipping" effect of loading a graph multiple times # and having inconsistent data returned if one of the backing stores has # inconsistent data. This is imperfect as a validity test, but in practice # nicely keeps us using the "most complete" dataset available. Think of it # as a very weak CRDT resolver. candidate_nones = 0 if not settings.REMOTE_STORE_MERGE_RESULTS: candidate_nones = len( [val for val in series['values'] if val is None]) known = seriesList[series.name] # To avoid repeatedly recounting the 'Nones' in series we've already seen, # cache the best known count so far in a dict. if known.name in series_best_nones: known_nones = series_best_nones[known.name] else: known_nones = len([val for val in known if val is None]) if known_nones > candidate_nones: if settings.REMOTE_STORE_MERGE_RESULTS: # This series has potential data that might be missing from # earlier series. Attempt to merge in useful data and update # the cache count. log.info("Merging multiple TimeSeries for %s" % known.name) for i, j in enumerate(known): if j is None and series[i] is not None: known[i] = series[i] known_nones -= 1 # Store known_nones in our cache series_best_nones[known.name] = known_nones else: # Not merging data - # we've found a series better than what we've already seen. Update # the count cache and replace the given series in the array. series_best_nones[known.name] = candidate_nones seriesList[known.name] = series else: # In case if we are merging data - the existing series has no gaps and there is nothing to merge # together. Save ourselves some work here. # # OR - if we picking best serie: # # We already have this series in the seriesList, and the # candidate is 'worse' than what we already have, we don't need # to compare anything else. Save ourselves some work here. break # If we looked at this series above, and it matched a 'known' # series already, then it's already in the series list (or ignored). # If not, append it here. else: seriesList[series.name] = series # Stabilize the order of the results by ordering the resulting series by name. # This returns the result ordering to the behavior observed pre PR#1010. return [seriesList[k] for k in sorted(seriesList)] retries = 1 # start counting at one to make log output and settings more readable while True: try: seriesList = _fetchData(pathExpr,startTime, endTime, requestContext, seriesList) return seriesList except Exception, e: if retries >= settings.MAX_FETCH_RETRIES: log.exception("Failed after %s retry! Root cause:\n%s" % (settings.MAX_FETCH_RETRIES, format_exc())) raise e else: log.exception("Got an exception when fetching data! Try: %i of %i. Root cause:\n%s" % (retries, settings.MAX_FETCH_RETRIES, format_exc())) retries += 1
def _timebounds(requestContext): startTime = int(epoch(requestContext['startTime'])) endTime = int(epoch(requestContext['endTime'])) now = int(epoch(requestContext['now'])) return (startTime, endTime, now)