def get_results(self): if self.failed: return if self.cachedResult is not None: results = self.cachedResult else: if self.connection is None: self.send() try: response = self.connection.getresponse() assert response.status == 200, "received error response %s - %s" % (response.status, response.reason) result_data = response.read() results = unpickle.loads(result_data) except: log.exception("FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: if node_info.get('is_leaf'): reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern) node = LeafNode(node_info['path'], reader) else: node = BranchNode(node_info['path']) node.local = False yield node
def find_nodes(self, query, reqkey): log.info("running blablabla RRd") clean_pattern = query.pattern.replace('\\', '') pattern_parts = clean_pattern.split('.') for root_dir in self.directories: for absolute_path in self._find_paths(root_dir, pattern_parts): if basename(absolute_path).startswith('.'): continue if self.DATASOURCE_DELIMETER in basename(absolute_path): (absolute_path, datasource_pattern) = absolute_path.rsplit( self.DATASOURCE_DELIMETER, 1) else: datasource_pattern = None relative_path = absolute_path[len(root_dir):].lstrip('/') metric_path = fs_to_metric(relative_path) real_metric_path = get_real_metric_path( absolute_path, metric_path) metric_path_parts = metric_path.split('.') for field_index in find_escaped_pattern_fields(query.pattern): metric_path_parts[field_index] = pattern_parts[ field_index].replace('\\', '') metric_path = '.'.join(metric_path_parts) # Now we construct and yield an appropriate Node object if isdir(absolute_path): yield BranchNode(metric_path) elif isfile(absolute_path): if absolute_path.endswith( '.wsp') and WhisperReader.supported: reader = WhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith( '.wsp.gz') and GzippedWhisperReader.supported: reader = GzippedWhisperReader(absolute_path, real_metric_path) yield LeafNode(metric_path, reader) elif absolute_path.endswith( '.rrd') and RRDReader.supported: if datasource_pattern is None: yield BranchNode(metric_path) else: for datasource_name in RRDReader.get_datasources( absolute_path): if match_entries([datasource_name], datasource_pattern): reader = RRDReader(absolute_path, datasource_name) yield LeafNode( metric_path + "." + datasource_name, reader)
def get_results(self): if self.failed: return if self.cachedResult is not None: results = self.cachedResult else: if self.connection is None: self.send() try: try: # Python 2.7+, use buffering of HTTP responses response = self.connection.getresponse(buffering=True) except TypeError: # Python 2.6 and older response = self.connection.getresponse() assert response.status == 200, "received error response %s - %s" % ( response.status, response.reason) result_data = response.read() results = unpickle.loads(result_data) except: log.exception( "FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def test_MultiReader_init(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) self.assertIsNotNone(reader)
def test_MultiReader_get_intervals(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) intervals = reader.get_intervals() for interval in intervals: self.assertEqual(int(interval.start), self.start_ts - 60) self.assertEqual(int(interval.end), self.start_ts)
def find_nodes(self, query, cache_incomplete_nodes=None): clean_patterns = query.pattern.replace('\\', '') has_wildcard = clean_patterns.find('{') > -1 or clean_patterns.find( '[') > -1 or clean_patterns.find('*') > -1 or clean_patterns.find( '?') > -1 if cache_incomplete_nodes is None: cache_incomplete_nodes = {} # CarbonLink has some hosts if CarbonLink.hosts: metric = clean_patterns # Let's combine these two cases: # 1) has_wildcard # 2) single metric query # Expand queries in CarbonLink # we will get back a list of tuples (metric_name, is_leaf) here. # For example, # [(metric1, False), (metric2, True)] metrics = CarbonLink.expand_query(metric) # dedup, because of BranchNodes metrics = list(set(metrics)) # check all metrics in same valid query range prechecks = [] for m, is_leaf in metrics: if is_leaf: prechecks.append(CarbonLink.precheck(m, query.startTime)) else: # return True for BranchNode prechecks.append((True, True)) exists = all((exist for exist, partial_exist in prechecks)) partial_exists = all( (partial_exist for exist, partial_exist in prechecks)) if exists: for metric, is_leaf in metrics: if is_leaf: reader = CarbonCacheReader(metric) yield LeafNode(metric, reader) else: yield BranchNode(metric) elif partial_exists: for metric, is_leaf in metrics: if is_leaf: reader = CarbonCacheReader(metric) cache_incomplete_nodes[metric] = LeafNode( metric, reader) else: cache_incomplete_nodes[metric] = BranchNode(metric)
def find_nodes(self, query): names = {} at_least_tries = 3 for i in range(0, max(urls.host_count, at_least_tries)): try: names = requests.get(urls.names, params={'query': query.pattern}, headers=self.headers, timeout=((self.connection_timeout / 1000), (self.timeout / 1000))).json() break except requests.exceptions.RequestException: # on down nodes, try again on another node until we try them all pass # for each set of self.batch_size leafnodes, execute an IronDBMeasurementFetcher # so we can do these in batches. counter = 0 fetcher = IronDBMeasurementFetcher(self.headers, self.timeout, self.connection_timeout, self.database_rollups) for name in names: if name['leaf']: fetcher.add_leaf(name['name'], name['leaf_data']) reader = IronDBReader(name['name'], fetcher) counter = counter + 1 if (counter % self.batch_size == 0): fetcher = IronDBMeasurementFetcher(self.headers, self.timeout, self.connection_timeout, self.database_rollups) counter = 0 yield LeafNode(name['name'], reader) else: yield BranchNode(name['name'])
def _get_branch_nodes(self, kudu_table, input_branch, path): results = input_branch.getChildren() if results: if path: path += '.' branches = [] leaves = [] for item in results: if item.isLeaf(): leaves.append(item) else: branches.append(item) if (len(branches) != 0): for branch in branches: node_name = branch.getName() node_path = path + node_name yield BranchNode(node_path), branch, node_name, node_path if (len(leaves) != 0): for leaf in leaves: node_name = leaf.getName() node_path = path + node_name reader = KuduReader(self.kudu_table, node_path) yield LeafNode(node_path, reader), leaf, node_name, node_path
def find_nodes(self, query): # translate query pattern if it is tagged tagged = not query.pattern.startswith( '_tagged.') and ';' in query.pattern if tagged: # tagged series are stored in ceres using encoded names, so to retrieve them we need to encode the # query pattern using the same scheme used in carbon when they are written. variants = [TaggedSeries.encode(query.pattern)] else: variants = extract_variants(query.pattern) for variant in variants: for fs_path in glob(self.tree.getFilesystemPath(variant)): metric_path = self.tree.getNodePath(fs_path) if CeresNode.isNodeDir(fs_path): ceres_node = self.tree.getNode(metric_path) if ceres_node.hasDataForInterval(query.startTime, query.endTime): real_metric_path = get_real_metric_path( fs_path, metric_path) reader = CeresReader(ceres_node, real_metric_path) # if we're finding by tag, return the proper metric path if tagged: metric_path = query.pattern yield LeafNode(metric_path, reader) elif os.path.isdir(fs_path): yield BranchNode(metric_path)
def test_MultiReader_fetch(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) results = reader.fetch(self.start_ts - 5, self.start_ts) (_, values) = results self.assertEqual(values, [None, None, None, None, 1.0])
def __init__(self, *args, **kwargs): name = None label = None if kwargs.has_key('name'): name = kwargs['name'] del kwargs['name'] if kwargs.has_key('label'): label = kwargs['label'] del kwargs['label'] LeafNode.__init__(self, *args, **kwargs) if name: self.name = name if label: self.label = label self.metric_path = self.path
def test_MultiReader_merge_normal(self): results1 = ((1496252939, 1496252944, 1), [None, None, None, None, 1.0]) results2 = ((1496252939, 1496252944, 1), [1.0, 1.0, 1.0, 1.0, 1.0]) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) reader = MultiReader([node1]) (_, values) = reader.merge(results1, results2) self.assertEqual(values, [1.0, 1.0, 1.0, 1.0, 1.0])
def find_nodes(self, query): if query.pattern == 'foo': yield BranchNode('foo') elif query.pattern == 'bar.*': for i in xrange(10): path = 'bar.{0}'.format(i) yield LeafNode(path, DummyReader(path))
def find_nodes(self, query): # find some paths matching the query, then yield them #for path in matches: # if is_branch(path): # yield BranchNode(path) # if is_leaf(path): # yield LeafNode(path, Reader(path)) yield LeafNode(path, Reader(path))
def find_nodes(self, query): ''' This method processed in graphite 1.0 and older ''' result = self._search_request(query.pattern) fetcher = GraphouseMultiFetcher() for metric in result[1]: if not metric: continue if metric.endswith('.'): yield BranchNode(metric[:-1]) else: try: yield LeafNode(metric, GraphouseReader(metric, fetcher=fetcher)) except OverflowError: fetcher = GraphouseMultiFetcher() yield LeafNode(metric, GraphouseReader(metric, fetcher=fetcher))
def find_nodes(self, query): childs = self.tree.selfAndChildPaths(query.pattern) childNodes = self.tree.getNode( [child for child, isMetric in childs if isMetric]) # make sure we yield in the DB order for child, isMetric in childs: if isMetric: yield LeafNode(child, CassandraReader(childNodes[child], child)) else: yield BranchNode(child)
def find_nodes(self, query): request = requests.post('%s/search' % graphouse_url, data={'query': query.pattern}) request.raise_for_status() result = request.text.split('\n') for metric in result: if not metric: continue if metric.endswith('.'): yield BranchNode(metric[:-1]) else: yield LeafNode(metric, GraphouseReader(metric))
def _find_paths(self, currNodeRowKey, patterns): """Recursively generates absolute paths whose components underneath current_node match the corresponding pattern in patterns""" from graphite.node import BranchNode, LeafNode from graphite.intervals import Interval, IntervalSet pattern = patterns[0] patterns = patterns[1:] nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None) if len(nodeRow) == 0: return subnodes = {} for k, v in nodeRow[0].columns.items(): if k.startswith("cf:c_"): # branches start with c_ key = k.split("_", 2)[1] # pop off cf:c_ prefix subnodes[key] = v.value matching_subnodes = match_entries(subnodes.keys(), pattern) if patterns: # we've still got more directories to traverse for subnode in matching_subnodes: rowKey = subnodes[subnode] subNodeContents = self.client.getRow(self.metaTable, rowKey, None) # leafs have a cf:INFO column describing their data # we can't possibly match on a leaf here because we have more components in the pattern, # so only recurse on branches if "cf:INFO" not in subNodeContents[0].columns: for m in self._find_paths(rowKey, patterns): yield m else: # at the end of the pattern for subnode in matching_subnodes: rowKey = subnodes[subnode] nodeRow = self.client.getRow(self.metaTable, rowKey, None) if len(nodeRow) == 0: continue metric = rowKey.split("_", 2)[1] # pop off "m_" in key if "cf:INFO" in nodeRow[0].columns: info = json.loads(nodeRow[0].columns["cf:INFO"].value) start = time.time() - info['maxRetention'] end = time.time() intervals = IntervalSet([Interval(start, end)]) reader = HbaseReader(metric, intervals, info, self) yield LeafNode(metric, reader) else: yield BranchNode(metric)
def find_nodes(self, query): for fs_path in glob( self.tree.getFilesystemPath(query.pattern) ): metric_path = self.tree.getNodePath(fs_path) if CeresNode.isNodeDir(fs_path): ceres_node = self.tree.getNode(metric_path) if ceres_node.hasDataForInterval(query.startTime, query.endTime): real_metric_path = get_real_metric_path(fs_path, metric_path) reader = CeresReader(ceres_node, real_metric_path) yield LeafNode(metric_path, reader) elif isdir(fs_path): yield BranchNode(metric_path)
def find_nodes(self, query): log.info('find_nodes: %s' % (query.pattern)) # Parse the query path_items = filter(None, query.pattern.split('.')) records = [] # Take request addressed only for this finder if path_items[0] == '*' or path_items[0] in self.tree: # Get the part of tree described by the query records = self.get_records(path_items) # Build node for record in records: if record['leaf']: yield LeafNode(record['id'], RandomReader(record['id'])) else: yield BranchNode(record['id'])
def find_nodes(self, query): variants = extract_variants(query.pattern) for variant in variants: for fs_path in glob( self.tree.getFilesystemPath(variant)): metric_path = self.tree.getNodePath(fs_path) if CeresNode.isNodeDir(fs_path): ceres_node = self.tree.getNode(metric_path) if ceres_node.hasDataForInterval(query.startTime, query.endTime): relative_path = fs_path[len(self.directory):].lstrip('/') real_metric_path = get_real_metric_path(fs_path, relative_path) reader = CeresReader(ceres_node, real_metric_path) yield LeafNode(metric_path, reader) elif os.path.isdir(fs_path): yield BranchNode(metric_path)
def find_multi(self, patterns, reqkey=None): ''' This method processed in graphite 1.1 and newer from self.fetch Returns: Generator of (pattern, [nodes]) ''' reqkey = reqkey or uuid.uuid4() jobs = [ Job(self._search_request, 'Query graphouse for {}'.format(pattern), pattern) for pattern in patterns ] results = self.wait_jobs(jobs, getattr(settings, 'FIND_TIMEOUT'), 'Find nodes for {} request'.format(reqkey)) for pattern, metric_names in results: leafs = [] for metric in metric_names: if metric and not metric.endswith('.'): leafs.append(LeafNode(metric, None)) yield (pattern, leafs)
def test_RemoteFinder_fetch(self, http_request): finder = test_finder = RemoteFinder() store = test_finder.remote_stores[0] reader = RemoteReader(store, {'intervals': [], 'path': 'a.b.c.d'}, bulk_query='a.b.c.d') node = LeafNode('a.b.c.d', reader) startTime = 1496262000 endTime = 1496262060 data = [ {'start': startTime, 'step': 60, 'end': endTime, 'values': [1.0, 0.0, 1.0, 0.0, 1.0], 'name': 'a.b.c.d' } ] responseObject = HTTPResponse(body=StringIO(pickle.dumps(data)), status=200) http_request.return_value = responseObject ret = finder.fetch(['a.b.c.d'], startTime, endTime) expected_response = ((1496262000, 1496262060, 60), [1.0, 0.0, 1.0, 0.0, 1.0]) expected_response = [ { 'name': 'a.b.c.d', 'values': [1.0, 0.0, 1.0, 0.0, 1.0], 'pathExpression': 'a.b.c.d', 'time_info': (1496262000, 1496262060, 60) }, { 'name': 'a.b.c.d', 'values': [1.0, 0.0, 1.0, 0.0, 1.0], 'pathExpression': 'a.b.c.d', 'time_info': (1496262000, 1496262060, 60) } ] result = list(ret.waitForResults()) self.assertEqual(result, expected_response)
def find_nodes(self, query): yield BranchNode('a.b.c') yield LeafNode('a.b.c.d', DummyReader('a.b.c.d')) yield LeafNode('a.b.c.e', DummyReader('a.b.c.e'))
def send(self, headers=None, msg_setter=None): log.debug("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.debug( "FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ( 'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append(('from', self.query.startTime)) if self.query.endTime: query_params.append(('until', self.query.endTime)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception( "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.REMOTE_FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query, timer=None): timer.set_msg( 'host: {host}, query: {query}'.format( host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request( url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet( [Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find(self, pattern, startTime=None, endTime=None, local=False): query = FindQuery(pattern, startTime, endTime) # Start remote searches if not local: remote_requests = [ r.find(query) for r in self.remote_stores if r.available ] matching_nodes = set() # Search locally for finder in self.finders: for node in finder.find_nodes(query): #log.info("find() :: local :: %s" % node) matching_nodes.add(node) # Gather remote search results if not local: for request in remote_requests: for node in request.get_results(): #log.info("find() :: remote :: %s from %s" % (node,request.store.host)) matching_nodes.add(node) # Group matching nodes by their path nodes_by_path = {} for node in matching_nodes: if node.path not in nodes_by_path: nodes_by_path[node.path] = [] nodes_by_path[node.path].append(node) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() for path, nodes in nodes_by_path.iteritems(): leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int( time.time() ) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval( float('-inf'), tolerance_window ) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval(query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval(prior_to_window) return covered_intervals.union(relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union(best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def find_nodes(self, query): log.debug("IRONdbFinder.find_nodes, query: %s, max_retries: %d" % (query.pattern, self.max_retries)) metrics_expand = False if query.pattern.endswith('.**'): query.pattern = query.pattern[:-1] metrics_expand = True names = {} name_headers = copy.deepcopy(self.headers) name_headers['Accept'] = 'application/x-flatbuffer-metric-find-result-list' for i in range(0, self.max_retries): try: if self.zipkin_enabled == True: traceheader = binascii.hexlify(os.urandom(8)) name_headers['X-B3-TraceId'] = traceheader name_headers['X-B3-SpanId'] = traceheader if self.zipkin_event_trace_level == 1: name_headers['X-Mtev-Trace-Event'] = '1' elif self.zipkin_event_trace_level == 2: name_headers['X-Mtev-Trace-Event'] = '2' r = requests.get(urls.names, params={'query': query.pattern}, headers=name_headers, timeout=((self.connection_timeout / 1000.0), (self.timeout / 1000.0))) r.raise_for_status() if r.headers['content-type'] == 'application/json': names = r.json() elif r.headers['content-type'] == 'application/x-flatbuffer-metric-find-result-list': names = irondb_flatbuf.metric_find_results(r.content) else: pass break except (socket.gaierror, requests.exceptions.ConnectionError) as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectionError %s" % ex) except requests.exceptions.ConnectTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ConnectTimeout %s" % ex) except irondb_flatbuf.FlatBufferError as ex: # flatbuffer error, try again log.exception("IRONdbFinder.find_nodes FlatBufferError %s" % ex) except JSONDecodeError as ex: # json error, try again log.exception("IRONdbFinder.find_nodes JSONDecodeError %s" % ex) except requests.exceptions.ReadTimeout as ex: # on down nodes, try again on another node until "tries" log.exception("IRONdbFinder.find_nodes ReadTimeout %s" % ex) except requests.exceptions.HTTPError as ex: # http status code errors are failures, stop immediately log.exception("IRONdbFinder.find_nodes HTTPError %s %s" % (ex, r.content)) break if settings.DEBUG: log.debug("IRONdbFinder.find_nodes, result: %s" % json.dumps(names)) # for each set of self.batch_size leafnodes, execute an IRONdbMeasurementFetcher # so we can do these in batches. measurement_headers = copy.deepcopy(self.headers) measurement_headers['Accept'] = 'application/x-flatbuffer-metric-get-result-list' fetcher = IRONdbMeasurementFetcher(measurement_headers, self.timeout, self.connection_timeout, self.database_rollups, self.rollup_window, self.max_retries, self.zipkin_enabled, self.zipkin_event_trace_level) for name in names: if 'leaf' in name and 'leaf_data' in name: fetcher.add_leaf(name['name'], name['leaf_data']) reader = IRONdbReader(name['name'], fetcher) yield LeafNode(name['name'], reader) else: yield BranchNode(name['name']) if metrics_expand: query = FindQuery(name['name'] + '.**', None, None) for node in self.find_nodes(query): yield node
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) if settings.USE_WORKER_POOL: return_result = lambda x: result_queue.put(x) for job in jobs: get_pool().apply_async(func=job[0], args=job[1:], callback=return_result) else: for job in jobs: result_queue.put(job[0](*job[1:])) # Group matching nodes by their path nodes_by_path = defaultdict(list) deadline = start + settings.REMOTE_FIND_TIMEOUT result_cnt = 0 while result_cnt < len(jobs): wait_time = deadline - time.time() try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.info("Timed out in find_all after %fs" % (settings.REMOTE_FIND_TIMEOUT)) break else: continue log.info("Got a find result after %fs" % (time.time() - start)) result_cnt += 1 if nodes: for node in nodes: nodes_by_path[node.path].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def send(self, headers=None, msg_setter=None): log.info("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.info("FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append( ('from', self.query.startTime) ) if self.query.endTime: query_params.append( ('until', self.query.endTime) ) try: result = http.request('POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except: log.exception("FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception("FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except: log.exception("FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def _merge_leaf_nodes(self, query, path, leaf_nodes): """Get a single node from a list of leaf nodes.""" if not leaf_nodes: return None # Fast-path when there is a single node. if len(leaf_nodes) == 1: return leaf_nodes[0] # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably # do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within # tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if not minimal_node_set: return None elif len(minimal_node_set) == 1: return minimal_node_set.pop() else: reader = MultiReader(minimal_node_set) return LeafNode(path, reader)
def find_nodes(self, query, timer=None): timer.set_msg( 'host: {host}, query: {query}'.format( host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash(query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request( url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) results = self.deserialize(result) cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks: # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations, # time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool; # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed; nodes = [] for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet( [Interval(interval[0], interval[1]) for interval in intervals]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False nodes.append(node) return nodes