def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def test_prefetchData(self): # STORE.finders has no non-local finders results = prefetchData({}, []) self.assertEqual(results, None) # STORE.fetch returns list with None value with patch('graphite.render.datalib.STORE.fetch', lambda *_: [None]): startTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) now = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = { 'startTime': startTime, 'endTime': endTime, 'now': now, 'prefetched': { 'somekey': 'somedata', }, } prefetchData(requestContext, ['test']) self.assertEqual( requestContext['prefetched'][timebounds(requestContext)], {})
def fetchData(requestContext, pathExpr, timer=None): timer.set_msg("lookup and merge of \"%s\" took" % str(pathExpr)) seriesList = {} (startTime, endTime, now) = timebounds(requestContext) prefetched = requestContext.get('prefetched', {}).get((startTime, endTime, now), {}).get(pathExpr) if not prefetched: return [] return _merge_results(pathExpr, startTime, endTime, prefetched, seriesList, requestContext)
def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) # Several third-party readers including rrdtool and biggraphite return values in a # generator which can only be iterated on once. These must be converted to a list. for pathExpression, items in prefetched.items(): for i, (name, (time_info, values)) in enumerate(items): if isinstance(values, types.GeneratorType): prefetched[pathExpression][i] = (name, (time_info, list(values))) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} if (startTime, endTime, now) in requestContext['prefetched']: requestContext['prefetched'][(startTime, endTime, now)].update(prefetched) else: requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def prefetchRemoteData(requestContext, targets): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching that doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by datalib. """ log.rendering("Prefetching remote data") pathExpressions = extractPathExpressions(targets) (startTime, endTime, now) = timebounds(requestContext) results = STORE.fetch_remote(pathExpressions, startTime, endTime, now, requestContext) requestContext['prefetched'][(startTime, endTime, now)] = PrefetchedData(results)
def test__fetchData(self): pathExpr = 'collectd.test-db.load.value' startDateTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endDateTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = self._build_requestContext(startDateTime, endDateTime) requestContext['now'] = endDateTime requestContext['forwardHeaders'] = None (startTime, endTime, now) = timebounds(requestContext) results = _fetchData(pathExpr, startTime, endTime, now, requestContext, []) expectedResults = [] self.assertEqual(results, expectedResults)
def prefetchRemoteData(remote_stores, requestContext, pathExpressions): if requestContext['localOnly']: return if requestContext is None: requestContext = {} (startTime, endTime, now) = timebounds(requestContext) log.info('thread %s prefetchRemoteData:: Starting fetch_list on all backends' % current_thread().name) # Go through all of the remote nodes, and launch a fetch for each one. # Each fetch will take place in its own thread, since it's naturally parallel work. for store in remote_stores: reader = RemoteReader(store, {'intervals': []}, bulk_query=pathExpressions) reader.fetch_list(startTime, endTime, now, requestContext)
def prefetchRemoteData(requestContext, targets): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching that doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by datalib. """ pathExpressions = extractPathExpressions(targets) log.rendering("Prefetching remote data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) results = STORE.fetch_remote(pathExpressions, startTime, endTime, now, requestContext) requestContext['prefetched'][(startTime, endTime, now)] = PrefetchedData(results)
def prefetchRemoteData(remote_stores, requestContext, pathExpressions): if requestContext['localOnly']: return if requestContext is None: requestContext = {} (startTime, endTime, now) = timebounds(requestContext) log.info( 'thread %s prefetchRemoteData:: Starting fetch_list on all backends' % current_thread().name) # Go through all of the remote nodes, and launch a fetch for each one. # Each fetch will take place in its own thread, since it's naturally parallel work. for store in remote_stores: reader = RemoteReader(store, {'intervals': []}, bulk_query=pathExpressions) reader.fetch_list(startTime, endTime, now, requestContext)
def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) # Several third-party readers including rrdtool and biggraphite return values in a # generator which can only be iterated on once. These must be converted to a list. for pathExpression, items in prefetched.items(): for i, (name, (time_info, values)) in enumerate(items): if isinstance(values, types.GeneratorType): prefetched[pathExpression][i] = (name, (time_info, list(values))) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def fetchData(requestContext, pathExpr): seriesList = {} (startTime, endTime, now) = timebounds(requestContext) retries = 1 # start counting at one to make log output and settings more readable while True: try: seriesList = _fetchData(pathExpr, startTime, endTime, now, requestContext, seriesList) break except Exception: if retries >= settings.MAX_FETCH_RETRIES: log.exception("Failed after %s retry! Root cause:\n%s" % (settings.MAX_FETCH_RETRIES, format_exc())) raise else: log.exception("Got an exception when fetching data! Try: %i of %i. Root cause:\n%s" % (retries, settings.MAX_FETCH_RETRIES, format_exc())) retries += 1 return seriesList
def prefetchRemoteData(requestContext, targets): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching that doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by datalib. """ # only prefetch if there is at least one active remote finder # this is to avoid the overhead of tagdb lookups in extractPathExpressions if len([finder for finder in STORE.finders if not getattr(finder, 'local', True) and not getattr(finder, 'disabled', False)]) < 1: return pathExpressions = extractPathExpressions(targets) log.rendering("Prefetching remote data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) results = STORE.fetch_remote(pathExpressions, startTime, endTime, now, requestContext) requestContext['prefetched'][(startTime, endTime, now)] = PrefetchedData(results)
def fetch_remote(self, patterns, requestContext): if requestContext['localOnly']: return if patterns is None: return (startTime, endTime, now) = timebounds(requestContext) log.debug('prefetchRemoteData:: Starting fetch_list on all backends') results = [] for finder in self.finders: if not hasattr(finder, 'fetch') or finder.local: continue result = finder.fetch(patterns, startTime, endTime, now=now, requestContext=requestContext) results.append(result) return results
def test__fetchData_remote_fetch_data_none(self): pathExpr = 'collectd.test-db.load.value' startDateTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endDateTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = self._build_requestContext(startDateTime, endDateTime) requestContext['now'] = endDateTime requestContext['forwardHeaders'] = None # Use this form of the start/end times (startTime, endTime, now) = timebounds(requestContext) # Get the remote data requestContext['prefetched'] = {} with self.settings(REMOTE_PREFETCH_DATA=True): results = _fetchData(pathExpr, startTime, endTime, now, requestContext, {}) expectedResults = [] self.assertEqual(results, expectedResults)
def test_prefetchData(self): # STORE.finders has no non-local finders results = prefetchData({}, []) self.assertEqual(results, None) # STORE.fetch returns list with None value with patch('graphite.render.datalib.STORE.fetch', lambda *_: [None]): startTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) now = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = { 'startTime': startTime, 'endTime': endTime, 'now': now, 'prefetched': { 'somekey': 'somedata', }, } prefetchData(requestContext, ['test']) self.assertEqual(requestContext['prefetched'][timebounds(requestContext)], {})
def test__fetchData_remote_fetch_data(self): pathExpr = 'collectd.test-db.load.value' startDateTime = datetime(1970, 1, 1, 0, 10, 0, 0, pytz.timezone(settings.TIME_ZONE)) endDateTime = datetime(1970, 1, 1, 0, 20, 0, 0, pytz.timezone(settings.TIME_ZONE)) requestContext = self._build_requestContext(startDateTime, endDateTime) requestContext['now'] = endDateTime requestContext['forwardHeaders'] = None # Use this form of the start/end times (startTime, endTime, now) = timebounds(requestContext) # First item in list is a proper fetched response # Second item is None, which is what happens if there is no data back from wait_for_results prefetched_results = [[{ 'pathExpression': 'collectd.test-db.load.value', 'name': 'collectd.test-db.load.value', 'time_info': (startTime, endTime, now), 'step': 60, 'values': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] }], None] # Get the remote data requestContext['prefetched'] = {} requestContext['prefetched'][( startTime, endTime, now)] = PrefetchedData(prefetched_results) with self.settings(REMOTE_PREFETCH_DATA=True): results = _fetchData(pathExpr, startTime, endTime, now, requestContext, {}) expectedResults = [ TimeSeries("collectd.test-db.load.value", startTime, endTime, 1200, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), ] self.assertEqual(results, expectedResults)