def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def _find(self, query): jobs = [ Job(finder.find_nodes, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) done = 0 errors = 0 # Start finds start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("Find for %s failed after %fs: %s" % (str(query), time.time() - start, str(job.exception))) continue log.debug("Got a find result for %s after %fs" % (str(query), time.time() - start)) for node in job.result or []: nodes_by_path[node.path].append(node) except PoolTimeoutError: log.info("Timed out in find after %fs" % (time.time() - start)) if errors == done: raise Exception('All finds failed for %s' % (str(query))) log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start)) return self._list_nodes(query, nodes_by_path)
def wait_jobs(self, jobs, timeout, context): if not jobs: return [] start = time.time() results = [] failed = [] done = 0 try: for job in self.pool_exec(jobs, timeout): elapsed = time.time() - start done += 1 if job.exception: failed.append(job) log.info("Exception during %s after %fs: %s" % (job, elapsed, str(job.exception))) else: log.debug("Got a result for %s after %fs" % (job, elapsed)) results.append(job.result) except PoolTimeoutError: message = "Timed out after %fs for %s" % (time.time() - start, context) log.info(message) if done == 0: raise Exception(message) if len(failed) == done: message = "All requests failed for %s (%d)" % (context, len(failed)) for job in failed: message += "\n\n%s: %s: %s" % (job, job.exception, '\n'.join( traceback.format_exception(*job.exception_info))) raise Exception(message) return results
def request(self, path, fields=None, headers=None, timeout=None): url = "%s%s" % (self.url, path) url_full = "%s?%s" % (url, urlencode(fields)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=fields, headers=headers, timeout=timeout, preload_content=False) except BaseException as err: self.fail() log.exception("RemoteFinder[%s] Error requesting %s: %s" % (self.host, url_full, err)) raise Exception("Error requesting %s: %s" % (url_full, err)) if result.status != 200: result.release_conn() self.fail() log.exception("RemoteFinder[%s] Error response %d from %s" % (self.host, result.status, url_full)) raise Exception("Error response %d from %s" % (result.status, url_full)) result.url_full = url_full # reset last failure time so that retried fetches can re-enable a remote self.last_failure = 0 log.debug("RemoteFinder[%s] Fetched %s" % (self.host, url_full)) return result
def fetch_remote(self, patterns, startTime, endTime, now, requestContext): patterns = set(patterns) # TODO: Change this to simply `fetch()` in order to support optimizations # for local finders too. This also require using the thread pool and # limiting the number of results using the warning and failure thresholds. # Also support the nice merging features of MultiReader. if requestContext['localOnly']: return [] if not patterns: return [] log.debug( 'prefetchRemoteData:: Starting fetch_list on all backends') results = [] for finder in self.finders: is_local = getattr(finder, 'local', True) if is_local: continue result = finder.fetch( patterns, startTime, endTime, now=now, requestContext=requestContext ) results.append(result) return results
def __init__(self, name, start, end, step, values, consolidate='average', tags=None): list.__init__(self, values) self.name = name self.start = start self.end = end self.step = step self.consolidationFunc = consolidate self.valuesPerPoint = 1 self.options = {} self.pathExpression = name if tags: self.tags = tags else: self.tags = {'name': name} # parse for tags if a tagdb is configured and name doesn't look like a function-wrapped name if STORE.tagdb and not re.match('^[a-z]+[(].+[)]$', name, re.IGNORECASE): try: self.tags = STORE.tagdb.parse(name).tags except Exception as err: # tags couldn't be parsed, just use "name" tag log.debug("Couldn't parse tags for %s: %s" % (name, err))
def fetch_remote(self, patterns, startTime, endTime, now, requestContext): patterns = set(patterns) # TODO: Change this to simply `fetch()` in order to support optimizations # for local finders too. This also require using the thread pool and # limiting the number of results using the warning and failure thresholds. # Also support the nice merging features of MultiReader. if requestContext['localOnly']: return [] if not patterns: return [] log.debug('prefetchRemoteData:: Starting fetch_list on all backends') results = [] for finder in self.finders: is_local = getattr(finder, 'local', True) if is_local: continue result = finder.fetch(patterns, startTime, endTime, now=now, requestContext=requestContext) results.append(result) return results
def request(self, path, fields=None, headers=None, timeout=None): url = "%s%s" % (self.url, path) url_full = "%s?%s" % (url, urlencode(fields)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=fields, headers=headers, timeout=timeout, preload_content=False) except BaseException as err: self.fail() log.exception("RemoteFinder[%s] Error requesting %s: %s" % (self.host, url_full, err)) raise Exception("Error requesting %s: %s" % (url_full, err)) if result.status != 200: result.release_conn() self.fail() log.exception( "RemoteFinder[%s] Error response %d from %s" % (self.host, result.status, url_full)) raise Exception("Error response %d from %s" % (result.status, url_full)) result.url_full = url_full # reset last failure time so that retried fetches can re-enable a remote self.last_failure = 0 log.debug("RemoteFinder[%s] Fetched %s" % (self.host, url_full)) return result
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = sorted(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [] tag_patterns = None pattern_aliases = defaultdict(list) for finder in self.get_finders(requestContext.get('localOnly')): # if the finder supports tags, just pass the patterns through if getattr(finder, 'tags', False): job = Job(finder.fetch, 'fetch for %s' % patterns, patterns, startTime, endTime, now=now, requestContext=requestContext) jobs.append(job) continue # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table if tag_patterns is None: tag_patterns, pattern_aliases = self._tag_patterns( patterns, requestContext) # dispatch resolved patterns to finder job = Job(finder.fetch, 'fetch for %s' % tag_patterns, tag_patterns, startTime, endTime, now=now, requestContext=requestContext) jobs.append(job) # Start fetches start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, 'fetch for %s' % str(patterns)) results = [i for l in results for i in l] # flatten # translate path expressions for responses from resolved seriesByTag patterns for result in results: if result['name'] == result['pathExpression'] and result[ 'pathExpression'] in pattern_aliases: for pathExpr in pattern_aliases[result['pathExpression']]: newresult = deepcopy(result) newresult['pathExpression'] = pathExpr results.append(newresult) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = sorted(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [] tag_patterns = None pattern_aliases = defaultdict(list) for finder in self.get_finders(requestContext.get('localOnly')): # if the finder supports tags, just pass the patterns through if getattr(finder, 'tags', False): job = Job( self._observed_fetch, 'fetch for %s' % patterns, finder, patterns, startTime, endTime, now=now, requestContext=requestContext ) jobs.append(job) continue # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table if tag_patterns is None: tag_patterns, pattern_aliases = self._tag_patterns(patterns, requestContext) # dispatch resolved patterns to finder job = Job( self._observed_fetch, 'fetch for %s' % tag_patterns, finder, tag_patterns, startTime, endTime, now=now, requestContext=requestContext ) jobs.append(job) done = 0 errors = 0 # Start fetches start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, 'fetch for %s' % str(patterns)) results = [i for l in results for i in l] # flatten # translate path expressions for responses from resolved seriesByTag patterns for result in results: if result['name'] == result['pathExpression'] and result['pathExpression'] in pattern_aliases: for pathExpr in pattern_aliases[result['pathExpression']]: newresult = deepcopy(result) newresult['pathExpression'] = pathExpr results.append(newresult) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def _fetch_list_locked(self, url, query_string, query_params, headers): url_full = "%s?%s" % (url, query_string) jobs = [(self._fetch, url, query_string, query_params, headers)] q = pool_apply(self.store.finder.worker_pool(), jobs) log.debug('RemoteReader:: Storing FetchInProgress for %s' % url_full) return FetchInProgress(_Results(q))
def prepare_slow_pool(self, req_key): self.pool_name = 'graphouse_slow_requests_pool' if settings.USE_WORKER_POOL: self.thread_count = min(parallel_jobs_for_slow_pool, settings.POOL_MAX_WORKERS) log.debug('DEBUG[{}]: Using slow pool with "{}" threads'.format( req_key, self.thread_count))
def tagdb_auto_complete_values(self, exprs, tag, valuePrefix=None, limit=None, requestContext=None): log.debug( 'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends' ) if requestContext is None: requestContext = {} context = 'values for %s %s %s' % (str(exprs), tag, valuePrefix or '') jobs = [] use_tagdb = False for finder in self.get_finders(requestContext.get('localOnly')): if getattr(finder, 'tags', False): job = Job(finder.auto_complete_values, context, exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext) jobs.append(job) else: use_tagdb = True # start finder jobs start = time.time() results = set() # if we're using the local tagdb then execute it (in the main thread # so that LocalDatabaseTagDB will work) if use_tagdb: results.update( self.tagdb.auto_complete_values(exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) for result in self.wait_jobs(jobs, settings.FIND_TIMEOUT, context): results.update(result) # sort & limit results results = sorted(results) if limit: results = results[:int(limit)] log.debug("Got all autocomplete %s in %fs" % (context, time.time() - start)) return results
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = list(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [ Job(finder.fetch, patterns, startTime, endTime, now=now, requestContext=requestContext) for finder in self.get_finders(requestContext.get('localOnly')) ] results = [] done = 0 errors = 0 # Start fetches start = time.time() try: for job in pool_exec(get_pool(), jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.debug("Fetch for %s failed after %fs: %s" % (str(patterns), time.time() - start, str(job.exception))) continue log.debug("Got a fetch result for %s after %fs" % (str(patterns), time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.debug("Timed out in fetch after %fs" % (time.time() - start)) if errors == done: raise Exception('All fetches failed for %s' % (str(patterns))) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def wait_jobs(self, jobs, timeout, context): if not jobs: return [] start = time.time() results = [] failed = [] done = 0 try: for job in self.pool_exec(jobs, timeout): elapsed = time.time() - start done += 1 if job.exception: failed.append(job) log.info("Exception during %s after %fs: %s" % ( job, elapsed, str(job.exception)) ) else: log.debug("Got a result for %s after %fs" % (job, elapsed)) results.append(job.result) except PoolTimeoutError: message = "Timed out after %fs for %s" % ( time.time() - start, context ) log.info(message) if done == 0: raise Exception(message) if len(failed) == done: message = "All requests failed for %s (%d)" % ( context, len(failed) ) for job in failed: message += "\n\n%s: %s: %s" % ( job, job.exception, '\n'.join(traceback.format_exception(*job.exception_info)) ) raise Exception(message) if len(results) < len(jobs) and settings.STORE_FAIL_ON_ERROR: message = "%s request(s) failed for %s (%d)" % ( len(jobs) - len(results), context, len(jobs) ) for job in failed: message += "\n\n%s: %s: %s" % ( job, job.exception, '\n'.join(traceback.format_exception(*job.exception_info)) ) raise Exception(message) return results
def _request(self, url, query, flatbuffers=False): tag_headers = copy.deepcopy(self.headers) if flatbuffers: tag_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list' if not isinstance(query, dict): query = {'query': query} source = "" if settings.DEBUG: source = sys._getframe().f_back.f_code.co_name for i in range(0, self.max_retries): try: if self.zipkin_enabled == True: traceheader = binascii.hexlify(os.urandom(8)) tag_headers['X-B3-TraceId'] = traceheader tag_headers['X-B3-SpanId'] = traceheader if self.zipkin_event_trace_level == 1: tag_headers['X-Mtev-Trace-Event'] = '1' elif self.zipkin_event_trace_level == 2: tag_headers['X-Mtev-Trace-Event'] = '2' r = requests.get(url, params=query, headers=tag_headers, timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0))) r.raise_for_status() if flatbuffers: r = irondb_flatbuf.metric_find_results(r.content) else: r = r.json() if settings.DEBUG: log.debug("IRONdbTagFetcher.%s, result: %s" % (source, json.dumps(r))) return r except (socket.gaierror, requests.exceptions.ConnectionError) as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbTagFetcher.%s ConnectionError %s" % (source, ex)) except requests.exceptions.ConnectTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbTagFetcher.%s ConnectTimeout %s" % (source, ex)) except irondb_flatbuf.FlatBufferError as ex: # flatbuffer error, try again log.exception("IRONdbTagFetcher.%s FlatBufferError %s" % (source, ex)) except JSONDecodeError as ex: # json error, try again log.exception("IRONdbTagFetcher.%s JSONDecodeError %s" % (source, ex)) except requests.exceptions.ReadTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbTagFetcher.%s ReadTimeout %s" % (source, ex)) except requests.exceptions.HTTPError as ex: # http status code errors are failures, stop immediately log.exception("IRONdbTagFetcher.%s HTTPError %s %s" % (source, ex, r.content)) break return ()
def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) # Several third-party readers including rrdtool and biggraphite return values in a # generator which can only be iterated on once. These must be converted to a list. for pathExpression, items in prefetched.items(): for i, (name, (time_info, values)) in enumerate(items): if isinstance(values, types.GeneratorType): prefetched[pathExpression][i] = (name, (time_info, list(values))) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} if (startTime, endTime, now) in requestContext['prefetched']: requestContext['prefetched'][(startTime, endTime, now)].update(prefetched) else: requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def tagdb_auto_complete_tags(self, exprs, tagPrefix=None, limit=None, requestContext=None): log.debug( 'graphite.storage.Store.auto_complete_tags :: Starting lookup on all backends') if requestContext is None: requestContext = {} context = 'tags for %s %s' % (str(exprs), tagPrefix or '') jobs = [] use_tagdb = False for finder in self.get_finders(requestContext.get('localOnly')): if getattr(finder, 'tags', False): job = Job( finder.auto_complete_tags, context, exprs, tagPrefix=tagPrefix, limit=limit, requestContext=requestContext ) jobs.append(job) else: use_tagdb = True results = set() # if we're using the local tagdb then execute it (in the main thread # so that LocalDatabaseTagDB will work) if use_tagdb: results.update(self.tagdb.auto_complete_tags( exprs, tagPrefix=tagPrefix, limit=limit, requestContext=requestContext )) # Start fetches start = time.time() for result in self.wait_jobs(jobs, settings.FIND_TIMEOUT, context): results.update(result) # sort & limit results results = sorted(results) if limit: results = results[:int(limit)] log.debug("Got all autocomplete %s in %fs" % ( context, time.time() - start) ) return results
def get_index(self, requestContext=None): log.debug('graphite.storage.Store.get_index :: Starting get_index on all backends') if not requestContext: requestContext = {} context = 'get_index' jobs = [ Job(finder.get_index, context, requestContext=requestContext) for finder in self.get_finders(local=requestContext.get('localOnly')) ] start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, context) results = [i for l in results if l is not None for i in l] # flatten log.debug("Got all index results in %fs" % (time.time() - start)) return sorted(list(set(results)))
def prefetchData(requestContext, pathExpressions): """Prefetch a bunch of path expressions and stores them in the context. The idea is that this will allow more batching than doing a query each time evaluateTarget() needs to fetch a path. All the prefetched data is stored in the requestContext, to be accessed later by fetchData. """ if not pathExpressions: return start = time.time() log.debug("Fetching data for [%s]" % (', '.join(pathExpressions))) (startTime, endTime, now) = timebounds(requestContext) prefetched = collections.defaultdict(list) for result in STORE.fetch(pathExpressions, startTime, endTime, now, requestContext): if result is None: continue prefetched[result['pathExpression']].append(( result['name'], ( result['time_info'], result['values'], ), )) # Several third-party readers including rrdtool and biggraphite return values in a # generator which can only be iterated on once. These must be converted to a list. for pathExpression, items in prefetched.items(): for i, (name, (time_info, values)) in enumerate(items): if isinstance(values, types.GeneratorType): prefetched[pathExpression][i] = (name, (time_info, list(values))) if not requestContext.get('prefetched'): requestContext['prefetched'] = {} requestContext['prefetched'][(startTime, endTime, now)] = prefetched log.rendering("Fetched data for [%s] in %fs" % (', '.join(pathExpressions), time.time() - start))
def _find(self, query): context = 'find %s' % query jobs = [ Job(finder.find_nodes, context, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) # Start finds start = time.time() results = self.wait_jobs(jobs, settings.FIND_TIMEOUT, context) for result in results: for node in result or []: nodes_by_path[node.path].append(node) log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start)) return self._list_nodes(query, nodes_by_path)
def fetch_list(self, startTime, endTime, now=None, requestContext=None): t = time.time() in_flight = InFlight(self.store, requestContext) query_params = [ ('format', 'pickle'), ('local', '1'), ('noCache', '1'), ('from', str(int(startTime))), ('until', str(int(endTime))) ] if not self.bulk_query: return [] for target in self.bulk_query: query_params.append(('target', target)) if now is not None: query_params.append(('now', str(int(now)))) query_string = urlencode(query_params) urlpath = '/render/' url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host, urlpath) url_full = "%s?%s" % (url, query_string) headers = requestContext.get('forwardHeaders') if requestContext else None lock = in_flight.get_request_lock(url_full) with lock: request = in_flight.get_request(url_full) if request: log.debug("RemoteReader:: Returning cached FetchInProgress %s" % url_full) return request data = self._fetch_list_locked(url, query_string, query_params, headers) in_flight.start_request(url_full, data) log.debug( "RemoteReader:: Returning %s in %fs" % (url_full, time.time() - t)) return data
def fetch_list(self, startTime, endTime, now=None, requestContext=None): t = time.time() in_flight = InFlight(self.store, requestContext) query_params = [ ('format', 'pickle'), ('local', '1'), ('noCache', '1'), ('from', str(int(startTime))), ('until', str(int(endTime))) ] if not self.bulk_query: return [] for target in self.bulk_query: query_params.append(('target', target)) if now is not None: query_params.append(('now', str(int(now)))) query_string = urlencode(query_params) urlpath = '/render/' url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host, urlpath) url_full = "%s?%s" % (url, query_string) headers = requestContext.get('forwardHeaders') if requestContext else None lock = in_flight.get_request_lock(url_full) with lock: request = in_flight.get_request(url_full) if request: log.debug("RemoteReader:: Returning cached FetchInProgress %s" % url_full) return request data = self._fetch_list_locked(url, query_string, query_params, headers) in_flight.start_request(url, data) log.debug( "RemoteReader:: Returning %s in %fs" % (url_full, time.time() - t)) return data
def fetch_remote(self, patterns, requestContext): if requestContext['localOnly']: return if patterns is None: return (startTime, endTime, now) = timebounds(requestContext) log.debug('prefetchRemoteData:: Starting fetch_list on all backends') results = [] for finder in self.finders: if not hasattr(finder, 'fetch') or finder.local: continue result = finder.fetch(patterns, startTime, endTime, now=now, requestContext=requestContext) results.append(result) return results
def get_index(self, requestContext=None): log.debug( 'graphite.storage.Store.get_index :: Starting get_index on all backends' ) if not requestContext: requestContext = {} context = 'get_index' jobs = [ Job(finder.get_index, context, requestContext=requestContext) for finder in self.get_finders( local=requestContext.get('localOnly')) ] start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, context) results = [i for l in results if l is not None for i in l] # flatten log.debug("Got all index results in %fs" % (time.time() - start)) return sorted(list(set(results)))
def _find(self, query): context = 'find %s' % query jobs = [ Job(finder.find_nodes, context, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) # Start finds start = time.time() results = self.wait_jobs(jobs, settings.FIND_TIMEOUT, context) for result in results: for node in result or []: nodes_by_path[node.path].append(node) log.debug("Got all find results for %s in %fs" % ( str(query), time.time() - start) ) return self._list_nodes(query, nodes_by_path)
def _fetch(self, url, query_string, query_params, headers): url_full = "%s?%s" % (url, query_string) log.debug( "RemoteReader:: Starting to execute _fetch %s" % url_full) try: log.debug("ReadResult:: Requesting %s" % url_full) result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FETCH_TIMEOUT, ) if result.status != 200: self.store.fail() self.log_error("ReadResult:: Error response %d from %s" % url_full) data = [] else: data = unpickle.loads(result.data) except Exception as err: self.store.fail() self.log_error("ReadResult:: Error requesting %s: %s" % (url_full, err)) data = [] log.debug("RemoteReader:: Completed _fetch %s" % url_full) return data
def deserialize(self, result): """ Based on configuration, either stream-deserialize a response in settings.REMOTE_BUFFER_SIZE chunks, or read the entire payload and use inline deserialization. :param result: an http response object :return: deserialized response payload from cluster server """ start = time.time() try: should_buffer = settings.REMOTE_BUFFER_SIZE > 0 measured_reader = MeasuredReader( BufferedHTTPReader(result, settings.REMOTE_BUFFER_SIZE)) if should_buffer: log.debug("Using streaming deserializer.") reader = BufferedHTTPReader(measured_reader, settings.REMOTE_BUFFER_SIZE) return self._deserialize_stream( reader, result.getheader('content-type')) log.debug("Using inline deserializer for small payload") return self._deserialize_buffer(measured_reader.read(), result.getheader('content-type')) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding response from %s: %s" % (result.url_full, err)) finally: log.debug("Processed %d bytes in %f seconds." % (measured_reader.bytes_read, time.time() - start)) result.release_conn()
def deserialize(self, result): """ Based on configuration, either stream-deserialize a response in settings.REMOTE_BUFFER_SIZE chunks, or read the entire payload and use inline deserialization. :param result: an http response object :return: deserialized response payload from cluster server """ start = time.time() try: should_buffer = settings.REMOTE_BUFFER_SIZE > 0 measured_reader = MeasuredReader(BufferedHTTPReader(result, settings.REMOTE_BUFFER_SIZE)) if should_buffer: log.debug("Using streaming deserializer.") reader = BufferedHTTPReader(measured_reader, settings.REMOTE_BUFFER_SIZE) return self._deserialize_stream(reader, result.getheader('content-type')) log.debug("Using inline deserializer for small payload") return self._deserialize_buffer(measured_reader.read(), result.getheader('content-type')) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding response from %s: %s" % (result.url_full, err)) finally: log.debug("Processed %d bytes in %f seconds." % (measured_reader.bytes_read, time.time() - start)) result.release_conn()
def find_all(self, query): start = time.time() jobs = [] # Start local searches for finder in self.finders: # Support legacy finders by defaulting to 'local = True' is_local = not hasattr(finder, 'local') or finder.local if query.local and not is_local: continue if getattr(finder, 'disabled', False): continue jobs.append((finder.find_nodes, query)) result_queue = pool_apply(get_pool(), jobs) # Group matching nodes by their path nodes_by_path = defaultdict(list) timeout = settings.REMOTE_FIND_TIMEOUT deadline = start + timeout done = 0 total = len(jobs) while done < total: wait_time = deadline - time.time() nodes = [] try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time # is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.debug("Timed out in find_nodes after %fs" % timeout) break else: continue log.debug("Got a find result after %fs" % (time.time() - start)) done += 1 for node in nodes or []: nodes_by_path[node.path].append(node) log.debug("Got all find results in %fs" % (time.time() - start)) return self._list_nodes(query, nodes_by_path)
def get_index(self, requestContext=None): log.debug( 'graphite.storage.Store.get_index :: Starting get_index on all backends' ) if not requestContext: requestContext = {} jobs = [ Job(finder.get_index, requestContext=requestContext) for finder in self.get_finders( local=requestContext.get('localOnly')) ] results = [] done = 0 errors = 0 # Start index lookups start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("get_index failed after %fs: %s" % (time.time() - start, str(job.exception))) continue log.debug("Got an index result after %fs" % (time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.info("Timed out in get_index after %fs" % (time.time() - start)) if errors == done: if errors == 1: raise Exception("get_index failed: %s" % (str(job.exception))) raise Exception('All index lookups failed') log.debug("Got all index results in %fs" % (time.time() - start)) return sorted(list(set(results)))
def find_all(self, query): start = time.time() jobs = [] # Start local searches for finder in self.finders: # Support legacy finders by defaulting to 'local = True' is_local = not hasattr(finder, 'local') or finder.local if query.local and not is_local: continue jobs.append((finder.find_nodes, query)) result_queue = pool_apply(get_pool(), jobs) # Group matching nodes by their path nodes_by_path = defaultdict(list) timeout = settings.REMOTE_FIND_TIMEOUT deadline = start + timeout done = 0 total = len(jobs) while done < total: wait_time = deadline - time.time() nodes = [] try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time # is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.debug("Timed out in find_nodes after %fs" % timeout) break else: continue log.debug("Got a find result after %fs" % (time.time() - start)) done += 1 for node in nodes or []: nodes_by_path[node.path].append(node) log.debug("Got all find results in %fs" % (time.time() - start)) return self._list_nodes(query, nodes_by_path)
def find_nodes(self, query): start = time.time() jobs = [] random.shuffle(self.remote_stores) for store in self.remote_stores: if store.available: jobs.append((store.find, query)) queue = pool_apply(self.worker_pool(), jobs) timeout = settings.REMOTE_FIND_TIMEOUT deadline = start + timeout done = 0 total = len(jobs) while done < total: wait_time = deadline - time.time() nodes = [] try: nodes = queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time # is negative. except (Queue.Empty, ValueError): if time.time() > deadline: log.debug("Timed out in find_nodes after %fs" % timeout) break else: continue log.debug("Got a remote find result after %fs" % (time.time() - start)) done += 1 for node in nodes or []: yield node log.debug("Got all remote find results in %fs" % (time.time() - start))
def read_locked(self): if self.results is not None: log.debug( 'RemoteReader:: retrieve completed (cached) %s' % (', '.join([result['path'] for result in self.results])), ) return self.results # otherwise we get it from the queue and keep it for later results = self.queue.get(block=True) for i in range(len(results)): results[i]['path'] = results[i]['name'] if not results: log.debug('RemoteReader:: retrieve has received no results') self.results = results or [] log.debug( 'RemoteReader:: retrieve completed %s' % (', '.join([result['path'] for result in results])), ) return self.results
def fetch(self, patterns, start_time, end_time, now=None, requestContext=None): log.debug("IRONdbFinder.fetch called") all_names = {} for pattern in patterns: log.debug("IRONdbFinder.fetch pattern: %s" % pattern) names = {} name_headers = copy.deepcopy(self.headers) name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list' for i in range(0, self.max_retries): try: node = urls.names query_start = time.gmtime() data_type = "json" if self.zipkin_enabled == True: traceheader = binascii.hexlify(os.urandom(8)) name_headers['X-B3-TraceId'] = traceheader name_headers['X-B3-SpanId'] = traceheader if self.zipkin_event_trace_level == 1: name_headers['X-Mtev-Trace-Event'] = '1' if self.zipkin_event_trace_level == 2: name_headers['X-Mtev-Trace-Event'] = '2' name_params = {'query': pattern} if self.activity_tracking: name_params['activity_start_secs'] = start_time name_params['activity_end_secs'] = end_time r = requests.get(node, params=name_params, headers=name_headers, timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0))) r.raise_for_status() if r.headers['content-type'] == 'application/json': names = r.json() elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list': names = irondb_flatbuf.metric_find_results(r.content) data_type = "flatbuffer" else: pass result_count = len(names) if names else -1 self.query_log(node, query_start, r.elapsed, result_count, pattern, "names", data_type, start_time, end_time) break except (socket.gaierror, requests.exceptions.ConnectionError) as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.fetch ConnectionError %s" % ex) except requests.exceptions.ConnectTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.fetch ConnectTimeout %s" % ex) except irondb_flatbuf.FlatBufferError as ex: # flatbuffer error, try again log.exception("IRONdbFinder.fetch FlatBufferError %s" % ex) except JSONDecodeError as ex: # json error, try again log.exception("IRONdbFinder.fetch JSONDecodeError %s" % ex) except requests.exceptions.ReadTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.fetch ReadTimeout %s" % ex) except requests.exceptions.HTTPError as ex: # http status code errors are failures, stop immediately log.exception("IRONdbFinder.fetch HTTPError %s %s" % (ex, r.content)) break all_names[pattern] = names measurement_headers = copy.deepcopy(self.headers) measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list' in_this_batch = 0 fset = [] fetcher = self.newfetcher(fset, measurement_headers) for pattern, names in all_names.items(): for name in names: if 'leaf' in name and 'leaf_data' in name: if self.batch_size == 0 or in_this_batch >= self.batch_size: in_this_batch = 0 fetcher = self.newfetcher(fset, measurement_headers) fetcher.add_leaf(name['name'], name['leaf_data']) name['fetcher'] = fetcher in_this_batch += 1 self.dispatchfetches(fset, start_time, end_time) results = [] first_correction = False for pattern, names in all_names.items(): for name in names: fetcher = fset[0] if 'fetcher' in name: fetcher = name['fetcher'] res = fetcher.series(name['name']) if res is None: continue time_info, values = res # At least one series needs to have the right start time # And to not be complete jerks we cull leading nulls, so on # data fetches where everything has leading nulls, the start # time in the graph can slide forward. We need one anchor, # it will be whatever series we see first. if not first_correction: prepend = [] # time_info is immutable, recreate it so we can muck with it time_info = [ time_info[0], time_info[1], time_info[2] ] while time_info[0] > start_time: time_info[0] -= time_info[2] prepend.append(None) if len(prepend) > 0: values = prepend + values first_correction = True results.append({ 'pathExpression': pattern, 'path' : name['name'], 'name' : name['name'], 'time_info' : time_info, 'values': values }) return results
def find_nodes(self, query): log.debug("IRONdbFinder.find_nodes, query: %s, max_retries: %d" % (query.pattern, self.max_retries)) metrics_expand = False if query.pattern.endswith('.**'): query.pattern = query.pattern[:-1] metrics_expand = True names = {} name_headers = copy.deepcopy(self.headers) name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list' for i in range(0, self.max_retries): try: if self.zipkin_enabled == True: traceheader = binascii.hexlify(os.urandom(8)) name_headers['X-B3-TraceId'] = traceheader name_headers['X-B3-SpanId'] = traceheader if self.zipkin_event_trace_level == 1: name_headers['X-Mtev-Trace-Event'] = '1' elif self.zipkin_event_trace_level == 2: name_headers['X-Mtev-Trace-Event'] = '2' r = requests.get(urls.names, params={'query': query.pattern}, headers=name_headers, timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0))) r.raise_for_status() if r.headers['content-type'] == 'application/json': names = r.json() elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list': names = irondb_flatbuf.metric_find_results(r.content) else: pass break except (socket.gaierror, requests.exceptions.ConnectionError) as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectionError %s" % ex) except requests.exceptions.ConnectTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectTimeout %s" % ex) except irondb_flatbuf.FlatBufferError as ex: # flatbuffer error, try again log.exception("IRONdbFinder.find_nodes FlatBufferError %s" % ex) except JSONDecodeError as ex: # json error, try again log.exception("IRONdbFinder.find_nodes JSONDecodeError %s" % ex) except requests.exceptions.ReadTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ReadTimeout %s" % ex) except requests.exceptions.HTTPError as ex: # http status code errors are failures, stop immediately log.exception("IRONdbFinder.find_nodes HTTPError %s %s" % (ex, r.content)) break if settings.DEBUG: log.debug("IRONdbFinder.find_nodes, result: %s" % json.dumps(names)) # for each set of self.batch_size leafnodes, execute an IRONdbMeasurementFetcher # so we can do these in batches. measurement_headers = copy.deepcopy(self.headers) measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list' fetcher = IRONdbMeasurementFetcher(measurement_headers, self.timeout, self.connection_timeout, self.database_rollups, self.rollup_window, self.max_retries, self.zipkin_enabled, self.zipkin_event_trace_level) for name in names: if 'leaf' in name and 'leaf_data' in name: fetcher.add_leaf(name['name'], name['leaf_data']) reader = IRONdbReader(name['name'], fetcher) yield LeafNode(name['name'], reader) else: yield BranchNode(name['name']) if metrics_expand: query = FindQuery(name['name'] + '.**', None, None) for node in self.find_nodes(query): yield node
def _merge_results(pathExpr, startTime, endTime, result_queue, seriesList): log.debug("render.datalib.fetchData :: starting to merge") for path, results in result_queue: results = wait_for_result(results) if not results: log.debug("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (path, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception("could not parse timeInfo/values from metric '%s': %s" % (path, e)) (start, end, step) = timeInfo series = TimeSeries(path, start, end, step, values) # hack to pass expressions through to render functions series.pathExpression = pathExpr # Used as a cache to avoid recounting series None values below. series_best_nones = {} if series.name in seriesList: # This counts the Nones in each series, and is unfortunately O(n) for each # series, which may be worth further optimization. The value of doing this # at all is to avoid the "flipping" effect of loading a graph multiple times # and having inconsistent data returned if one of the backing stores has # inconsistent data. This is imperfect as a validity test, but in practice # nicely keeps us using the "most complete" dataset available. Think of it # as a very weak CRDT resolver. candidate_nones = 0 if not settings.REMOTE_STORE_MERGE_RESULTS: candidate_nones = len( [val for val in values if val is None]) known = seriesList[series.name] # To avoid repeatedly recounting the 'Nones' in series we've already seen, # cache the best known count so far in a dict. if known.name in series_best_nones: known_nones = series_best_nones[known.name] else: known_nones = len([val for val in known if val is None]) if known_nones > candidate_nones and len(series): if settings.REMOTE_STORE_MERGE_RESULTS: # This series has potential data that might be missing from # earlier series. Attempt to merge in useful data and update # the cache count. log.debug("Merging multiple TimeSeries for %s" % known.name) for i, j in enumerate(known): if j is None and series[i] is not None: known[i] = series[i] known_nones -= 1 # Store known_nones in our cache series_best_nones[known.name] = known_nones else: # Not merging data - # we've found a series better than what we've already seen. Update # the count cache and replace the given series in the array. series_best_nones[known.name] = candidate_nones seriesList[known.name] = series else: if settings.REMOTE_PREFETCH_DATA: # if we're using REMOTE_PREFETCH_DATA we can save some time by skipping # find, but that means we don't know how many nodes to expect so we # have to iterate over all returned results continue # In case if we are merging data - the existing series has no gaps and # there is nothing to merge together. Save ourselves some work here. # # OR - if we picking best serie: # # We already have this series in the seriesList, and the # candidate is 'worse' than what we already have, we don't need # to compare anything else. Save ourselves some work here. break else: # If we looked at this series above, and it matched a 'known' # series already, then it's already in the series list (or ignored). # If not, append it here. seriesList[series.name] = series # Stabilize the order of the results by ordering the resulting series by name. # This returns the result ordering to the behavior observed pre PR#1010. return [seriesList[k] for k in sorted(seriesList)]
def find_nodes(self, query, timer=None): timer.set_msg( 'host: {host}, query: {query}'.format( host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request( url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet( [Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def send(self, headers=None, msg_setter=None): log.debug("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.debug( "FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ( 'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append(('from', self.query.startTime)) if self.query.endTime: query_params.append(('until', self.query.endTime)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception( "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.REMOTE_FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def tagdb_auto_complete_values(self, exprs, tag, valuePrefix=None, limit=None, requestContext=None): log.debug( 'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends' ) if requestContext is None: requestContext = {} jobs = [] use_tagdb = False for finder in self.get_finders(requestContext.get('localOnly')): if getattr(finder, 'tags', False): jobs.append( Job(finder.auto_complete_values, exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) else: use_tagdb = True if not jobs: if not use_tagdb: return [] return self.tagdb.auto_complete_values( exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext) # start finder jobs jobs = self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT) results = set() # if we're using the local tagdb then execute it (in the main thread so that LocalDatabaseTagDB will work) if use_tagdb: results.update( self.tagdb.auto_complete_values(exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) done = 0 errors = 0 # Start fetches start = time.time() try: for job in jobs: done += 1 if job.exception: errors += 1 log.info( "Autocomplete values for %s %s %s failed after %fs: %s" % (str(exprs), tag, valuePrefix or '', time.time() - start, str(job.exception))) continue log.debug( "Got an autocomplete result for %s %s %s after %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) results.update(job.result) except PoolTimeoutError: raise Exception( "Timed out in autocomplete values for %s %s %s after %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) if errors == done: if errors == 1: raise Exception( "Autocomplete values for %s %s %s failed: %s" % (str(exprs), tag, valuePrefix or '', str(job.exception))) raise Exception( 'All autocomplete value requests failed for %s %s %s' % (str(exprs), tag, valuePrefix or '')) # sort & limit results results = sorted(results) if limit: results = results[:int(limit)] log.debug("Got all autocomplete value results for %s %s %s in %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) return results
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = sorted(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [] tag_patterns = None pattern_aliases = defaultdict(list) for finder in self.get_finders(requestContext.get('localOnly')): # if the finder supports tags, just pass the patterns through if getattr(finder, 'tags', False): jobs.append( Job(finder.fetch, patterns, startTime, endTime, now=now, requestContext=requestContext)) continue # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table if tag_patterns is None: tag_patterns, pattern_aliases = self._tag_patterns( patterns, requestContext) # dispatch resolved patterns to finder jobs.append( Job(finder.fetch, tag_patterns, startTime, endTime, now=now, requestContext=requestContext)) results = [] done = 0 errors = 0 # Start fetches start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("Fetch for %s failed after %fs: %s" % (str(patterns), time.time() - start, str(job.exception))) continue log.debug("Got a fetch result for %s after %fs" % (str(patterns), time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.info("Timed out in fetch after %fs" % (time.time() - start)) if errors == done: if errors == 1: raise Exception("Fetch for %s failed: %s" % (str(patterns), str(job.exception))) raise Exception('All fetches failed for %s' % (str(patterns))) # translate path expressions for responses from resolved seriesByTag patterns for result in results: if result['name'] == result['pathExpression'] and result[ 'pathExpression'] in pattern_aliases: for pathExpr in pattern_aliases[result['pathExpression']]: newresult = deepcopy(result) newresult['pathExpression'] = pathExpr results.append(newresult) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def send(self, headers=None, msg_setter=None): log.debug( "FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.debug( "FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ( 'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append(('from', self.query.startTime)) if self.query.endTime: query_params.append(('until', self.query.endTime)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception( "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter( 'host: {host}, query: {query}'.format( host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet( [Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader( self.store, node_info, bulk_query=[ self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def _merge_results(pathExpr, startTime, endTime, prefetched, seriesList, requestContext): log.debug("render.datalib.fetchData :: starting to merge") # Used as a cache to avoid recounting series None values below. series_best_nones = {} for path, results in prefetched: if not results: log.debug("render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (path, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError as e: raise Exception("could not parse timeInfo/values from metric '%s': %s" % (path, e)) (start, end, step) = timeInfo series = TimeSeries(path, start, end, step, values, xFilesFactor=requestContext.get('xFilesFactor')) # hack to pass expressions through to render functions series.pathExpression = pathExpr if series.name in seriesList: # This counts the Nones in each series, and is unfortunately O(n) for each # series, which may be worth further optimization. The value of doing this # at all is to avoid the "flipping" effect of loading a graph multiple times # and having inconsistent data returned if one of the backing stores has # inconsistent data. This is imperfect as a validity test, but in practice # nicely keeps us using the "most complete" dataset available. Think of it # as a very weak CRDT resolver. candidate_nones = 0 if not settings.REMOTE_STORE_MERGE_RESULTS: candidate_nones = len( [val for val in values if val is None]) known = seriesList[series.name] # To avoid repeatedly recounting the 'Nones' in series we've already seen, # cache the best known count so far in a dict. if known.name in series_best_nones: known_nones = series_best_nones[known.name] else: known_nones = len([val for val in known if val is None]) series_best_nones[known.name] = known_nones if known_nones > candidate_nones and len(series): if settings.REMOTE_STORE_MERGE_RESULTS and len(series) == len(known): # This series has potential data that might be missing from # earlier series. Attempt to merge in useful data and update # the cache count. log.debug("Merging multiple TimeSeries for %s" % known.name) for i, j in enumerate(known): if j is None and series[i] is not None: known[i] = series[i] known_nones -= 1 # Store known_nones in our cache series_best_nones[known.name] = known_nones else: # Not merging data - # we've found a series better than what we've already seen. Update # the count cache and replace the given series in the array. series_best_nones[known.name] = candidate_nones seriesList[known.name] = series else: # If we looked at this series above, and it matched a 'known' # series already, then it's already in the series list (or ignored). # If not, append it here. seriesList[series.name] = series # Stabilize the order of the results by ordering the resulting series by name. # This returns the result ordering to the behavior observed pre PR#1010. return [seriesList[k] for k in sorted(seriesList)]
def find_nodes(self, query, timer=None): timer.set_msg( 'host: {host}, query: {query}'.format( host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request( url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) results = self.deserialize(result) cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks: # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations, # time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool; # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed; nodes = [] for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet( [Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False nodes.append(node) return nodes