def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = sorted(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [] tag_patterns = None pattern_aliases = defaultdict(list) for finder in self.get_finders(requestContext.get('localOnly')): # if the finder supports tags, just pass the patterns through if getattr(finder, 'tags', False): job = Job(finder.fetch, 'fetch for %s' % patterns, patterns, startTime, endTime, now=now, requestContext=requestContext) jobs.append(job) continue # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table if tag_patterns is None: tag_patterns, pattern_aliases = self._tag_patterns( patterns, requestContext) # dispatch resolved patterns to finder job = Job(finder.fetch, 'fetch for %s' % tag_patterns, tag_patterns, startTime, endTime, now=now, requestContext=requestContext) jobs.append(job) # Start fetches start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, 'fetch for %s' % str(patterns)) results = [i for l in results for i in l] # flatten # translate path expressions for responses from resolved seriesByTag patterns for result in results: if result['name'] == result['pathExpression'] and result[ 'pathExpression'] in pattern_aliases: for pathExpr in pattern_aliases[result['pathExpression']]: newresult = deepcopy(result) newresult['pathExpression'] = pathExpr results.append(newresult) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def _find(self, query): jobs = [ Job(finder.find_nodes, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) done = 0 errors = 0 # Start finds start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("Find for %s failed after %fs: %s" % (str(query), time.time() - start, str(job.exception))) continue log.debug("Got a find result for %s after %fs" % (str(query), time.time() - start)) for node in job.result or []: nodes_by_path[node.path].append(node) except PoolTimeoutError: log.info("Timed out in find after %fs" % (time.time() - start)) if errors == done: raise Exception('All finds failed for %s' % (str(query))) log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start)) return self._list_nodes(query, nodes_by_path)
def tagdb_auto_complete_values(self, exprs, tag, valuePrefix=None, limit=None, requestContext=None): log.debug( 'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends' ) if requestContext is None: requestContext = {} context = 'values for %s %s %s' % (str(exprs), tag, valuePrefix or '') jobs = [] use_tagdb = False for finder in self.get_finders(requestContext.get('localOnly')): if getattr(finder, 'tags', False): job = Job(finder.auto_complete_values, context, exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext) jobs.append(job) else: use_tagdb = True # start finder jobs start = time.time() results = set() # if we're using the local tagdb then execute it (in the main thread # so that LocalDatabaseTagDB will work) if use_tagdb: results.update( self.tagdb.auto_complete_values(exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) for result in self.wait_jobs(jobs, settings.FIND_TIMEOUT, context): results.update(result) # sort & limit results results = sorted(results) if limit: results = results[:int(limit)] log.debug("Got all autocomplete %s in %fs" % (context, time.time() - start)) return results
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = list(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [ Job(finder.fetch, patterns, startTime, endTime, now=now, requestContext=requestContext) for finder in self.get_finders(requestContext.get('localOnly')) ] results = [] done = 0 errors = 0 # Start fetches start = time.time() try: for job in pool_exec(get_pool(), jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.debug("Fetch for %s failed after %fs: %s" % (str(patterns), time.time() - start, str(job.exception))) continue log.debug("Got a fetch result for %s after %fs" % (str(patterns), time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.debug("Timed out in fetch after %fs" % (time.time() - start)) if errors == done: raise Exception('All fetches failed for %s' % (str(patterns))) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def get_index(self, requestContext=None): log.debug( 'graphite.storage.Store.get_index :: Starting get_index on all backends' ) if not requestContext: requestContext = {} jobs = [ Job(finder.get_index, requestContext=requestContext) for finder in self.get_finders( local=requestContext.get('localOnly')) ] results = [] done = 0 errors = 0 # Start index lookups start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("get_index failed after %fs: %s" % (time.time() - start, str(job.exception))) continue log.debug("Got an index result after %fs" % (time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.info("Timed out in get_index after %fs" % (time.time() - start)) if errors == done: if errors == 1: raise Exception("get_index failed: %s" % (str(job.exception))) raise Exception('All index lookups failed') log.debug("Got all index results in %fs" % (time.time() - start)) return sorted(list(set(results)))
def _find(self, query): context = 'find %s' % query jobs = [ Job(finder.find_nodes, context, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) # Start finds start = time.time() results = self.wait_jobs(jobs, settings.FIND_TIMEOUT, context) for result in results: for node in result or []: nodes_by_path[node.path].append(node) log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start)) return self._list_nodes(query, nodes_by_path)
def get_index(self, requestContext=None): log.debug( 'graphite.storage.Store.get_index :: Starting get_index on all backends' ) if not requestContext: requestContext = {} context = 'get_index' jobs = [ Job(finder.get_index, context, requestContext=requestContext) for finder in self.get_finders( local=requestContext.get('localOnly')) ] start = time.time() results = self.wait_jobs(jobs, settings.FETCH_TIMEOUT, context) results = [i for l in results if l is not None for i in l] # flatten log.debug("Got all index results in %fs" % (time.time() - start)) return sorted(list(set(results)))
def find_multi(self, patterns, reqkey=None): ''' This method processed in graphite 1.1 and newer from self.fetch Returns: Generator of (pattern, [nodes]) ''' reqkey = reqkey or uuid.uuid4() jobs = [ Job(self._search_request, 'Query graphouse for {}'.format(pattern), pattern) for pattern in patterns ] results = self.wait_jobs(jobs, getattr(settings, 'FIND_TIMEOUT'), 'Find nodes for {} request'.format(reqkey)) for pattern, metric_names in results: leafs = [] for metric in metric_names: if metric and not metric.endswith('.'): leafs.append(LeafNode(metric, None)) yield (pattern, leafs)
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = sorted(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [] tag_patterns = None pattern_aliases = defaultdict(list) for finder in self.get_finders(requestContext.get('localOnly')): # if the finder supports tags, just pass the patterns through if getattr(finder, 'tags', False): jobs.append( Job(finder.fetch, patterns, startTime, endTime, now=now, requestContext=requestContext)) continue # if we haven't resolved the seriesByTag calls, build resolved patterns and translation table if tag_patterns is None: tag_patterns, pattern_aliases = self._tag_patterns( patterns, requestContext) # dispatch resolved patterns to finder jobs.append( Job(finder.fetch, tag_patterns, startTime, endTime, now=now, requestContext=requestContext)) results = [] done = 0 errors = 0 # Start fetches start = time.time() try: for job in self.pool_exec(jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.info("Fetch for %s failed after %fs: %s" % (str(patterns), time.time() - start, str(job.exception))) continue log.debug("Got a fetch result for %s after %fs" % (str(patterns), time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.info("Timed out in fetch after %fs" % (time.time() - start)) if errors == done: if errors == 1: raise Exception("Fetch for %s failed: %s" % (str(patterns), str(job.exception))) raise Exception('All fetches failed for %s' % (str(patterns))) # translate path expressions for responses from resolved seriesByTag patterns for result in results: if result['name'] == result['pathExpression'] and result[ 'pathExpression'] in pattern_aliases: for pathExpr in pattern_aliases[result['pathExpression']]: newresult = deepcopy(result) newresult['pathExpression'] = pathExpr results.append(newresult) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def tagdb_auto_complete_values(self, exprs, tag, valuePrefix=None, limit=None, requestContext=None): log.debug( 'graphite.storage.Store.auto_complete_values :: Starting lookup on all backends' ) if requestContext is None: requestContext = {} jobs = [] use_tagdb = False for finder in self.get_finders(requestContext.get('localOnly')): if getattr(finder, 'tags', False): jobs.append( Job(finder.auto_complete_values, exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) else: use_tagdb = True if not jobs: if not use_tagdb: return [] return self.tagdb.auto_complete_values( exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext) # start finder jobs jobs = self.pool_exec(jobs, settings.REMOTE_FIND_TIMEOUT) results = set() # if we're using the local tagdb then execute it (in the main thread so that LocalDatabaseTagDB will work) if use_tagdb: results.update( self.tagdb.auto_complete_values(exprs, tag, valuePrefix=valuePrefix, limit=limit, requestContext=requestContext)) done = 0 errors = 0 # Start fetches start = time.time() try: for job in jobs: done += 1 if job.exception: errors += 1 log.info( "Autocomplete values for %s %s %s failed after %fs: %s" % (str(exprs), tag, valuePrefix or '', time.time() - start, str(job.exception))) continue log.debug( "Got an autocomplete result for %s %s %s after %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) results.update(job.result) except PoolTimeoutError: raise Exception( "Timed out in autocomplete values for %s %s %s after %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) if errors == done: if errors == 1: raise Exception( "Autocomplete values for %s %s %s failed: %s" % (str(exprs), tag, valuePrefix or '', str(job.exception))) raise Exception( 'All autocomplete value requests failed for %s %s %s' % (str(exprs), tag, valuePrefix or '')) # sort & limit results results = sorted(results) if limit: results = results[:int(limit)] log.debug("Got all autocomplete value results for %s %s %s in %fs" % (str(exprs), tag, valuePrefix or '', time.time() - start)) return results
def fetch(self, patterns, start_time, end_time, now=None, requestContext=None): """Fetch multiple patterns at once. This method is used to fetch multiple patterns at once, this allows alternate finders to do batching on their side when they can. Returns: an iterable of { 'pathExpression': pattern, 'path': node.path, 'name': node.path, 'time_info': time_info, 'values': values, } """ log.debug('Multifetcher, patterns={}'.format(patterns)) profilingTime = {'start': time.time()} requestContext = requestContext or {} reqkey = uuid.uuid4() results = [] find_results = self.find_multi(patterns, reqkey) profilingTime['find'] = time.time() current_fetcher = GraphouseMultiFetcher() subreqs = [current_fetcher] for pair in find_results: pattern = pair[0] nodes = pair[1] log.debug('Results from find_multy={}'.format(pattern, nodes)) for node in nodes: try: current_fetcher.append(node) except OverflowError: current_fetcher = GraphouseMultiFetcher() subreqs.append(current_fetcher) current_fetcher.append(node) results.append({ 'pathExpression': pattern, 'name': node.path, 'path': node.path, 'fetcher': current_fetcher, }) jobs = [ Job( sub.fetch, 'Fetch values for reqkey={} for {} metrics'.format( reqkey, len(sub.nodes)), sub.nodes[0].path, reqkey, start_time, end_time) for sub in subreqs if sub.nodes ] profilingTime['gen_fetch'] = time.time() # Fetch everything in parallel _ = self.wait_jobs(jobs, getattr(settings, 'FETCH_TIMEOUT'), 'Multifetch for request key {}'.format(reqkey)) profilingTime['fetch'] = time.time() for result in results: result['time_info'], result['values'] = result['fetcher'].fetch( result['path'], reqkey, start_time, end_time) profilingTime['fill'] = time.time() log.debug('DEBUG:graphouse_multifetch[{}]: full={}, find={}, ' 'generetion_fetch_jobs={}, fetch_graphouse_parallel={}, ' 'parse_values={}'.format( reqkey, profilingTime['fill'] - profilingTime['start'], profilingTime['find'] - profilingTime['start'], profilingTime['gen_fetch'] - profilingTime['find'], profilingTime['fetch'] - profilingTime['gen_fetch'], profilingTime['fill'] - profilingTime['fetch'], )) return results
def fetch(self, patterns, start_time, end_time, now=None, requestContext=None): """Fetch multiple patterns at once. This method is used to fetch multiple patterns at once, this allows alternate finders to do batching on their side when they can. Returns: an iterable of { 'pathExpression': pattern, 'path': node.path, 'name': node.path, 'time_info': time_info, 'values': values, } """ req_key = uuid.uuid4() log.debug( 'reqKey:{}, Multifetcher, patternCount: {}, patterns={}'.format( req_key, len(patterns), patterns)) profiling_time = {'start': time.time()} requestContext = requestContext or {} context_to_log = requestContext.copy() context_to_log['prefetched'] = {} log.debug('reqKey:{}, requestContext:{}'.format( req_key, context_to_log)) results = [] find_results = self.find_multi(patterns, req_key) profiling_time['find'] = time.time() current_fetcher = GraphouseMultiFetcher() sub_reqs = [current_fetcher] find_results_count = 0 total_nodes_count = 0 processed_nodes_count = 0 for pair in find_results: find_results_count += 1 pattern = pair[0] nodes = pair[1] total_nodes_count += len(nodes) log.debug( 'reqKey:{} Results from find_multi, pattern:{}, nodesCount:{}'. format(req_key, pattern, len(nodes))) for node in nodes: if processed_nodes_count >= max_allowed_nodes_count: if nodes_limit_exceeded_policy == 'EXCEPTION': raise Exception( 'Max nodes (wildcards / substitutions) "{}" exceeded by patterns:{}' .format(max_allowed_nodes_count, patterns)) else: # CUT_BY_LIMIT break try: current_fetcher.append(node) except OverflowError: current_fetcher = GraphouseMultiFetcher() sub_reqs.append(current_fetcher) log.debug('reqKey:{}, subreq size:{}'.format( req_key, len(sub_reqs))) current_fetcher.append(node) results.append({ 'pathExpression': pattern, 'name': node.path, 'path': node.path, 'fetcher': current_fetcher, }) processed_nodes_count += 1 msg = "" if total_nodes_count > max_allowed_nodes_count: msg = ", nodes count will be limited with max allowed limit: {}".format( max_allowed_nodes_count) log.debug( 'reqKey:{} find_results count:{}, total Nodes Count: {}{}'.format( req_key, find_results_count, total_nodes_count, msg)) jobs = [ Job( sub.fetch, 'Fetch values for reqKey={} for {} metrics'.format( req_key, len(sub.nodes)), sub.nodes[0].path, req_key, start_time, end_time) for sub in sub_reqs if sub.nodes ] profiling_time['gen_fetch'] = time.time() # Fetch everything in parallel if total_nodes_count >= nodes_count_to_use_slow_pool: self.prepare_slow_pool(req_key) else: self.prepare_fast_pool(req_key) _ = self.wait_jobs(jobs, fetch_timeout_seconds, 'Multifetch for request key {}'.format(req_key)) profiling_time['fetch'] = time.time() for result in results: result['time_info'], result['values'] = result['fetcher'].fetch( result['path'], req_key, start_time, end_time) profiling_time['fill'] = time.time() log.debug( 'DEBUG:graphouse_multifetch[{}]: full(unprocessed "yield")={}, find={}, ' 'generetion_fetch_jobs={}, fetch_graphouse_parallel={}, ' 'parse_values={}'.format( req_key, profiling_time['fill'] - profiling_time['start'], profiling_time['find'] - profiling_time['start'], profiling_time['gen_fetch'] - profiling_time['find'], profiling_time['fetch'] - profiling_time['gen_fetch'], profiling_time['fill'] - profiling_time['fetch'], )) return results