def __init__(self, endpoint, start_time, end_time, debug=False, timeseries=False): self.endpoint = endpoint self.start_time = start_time self.end_time = end_time self.timeseries = timeseries self.debug = debug if start_time and end_time: if end_time - start_time > 6 * 30 * 24 * 3600: self.step = 86400 elif end_time - start_time > 30 * 24 * 3600: self.step = 3600 elif end_time - start_time > 24 * 3600: self.step = 300 else: self.step = None self.intervals = IntervalSet([Interval(start_time, end_time)]) else: self.intervals = IntervalSet([Interval(0, 2**32 - 1)])
def get_intervals(self): intervals = [] for info in self.ceres_node.slice_info: (start, end, step) = info intervals.append( Interval(start, end) ) return IntervalSet(intervals)
def get_results(self): if self.failed: return if self.cachedResult is not None: results = self.cachedResult else: if self.connection is None: self.send() try: try: # Python 2.7+, use buffering of HTTP responses response = self.connection.getresponse(buffering=True) except TypeError: # Python 2.6 and older response = self.connection.getresponse() assert response.status == 200, "received error response %s - %s" % ( response.status, response.reason) result_data = response.read() results = unpickle.loads(result_data) except: log.exception( "FindRequest.get_results(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=self.query.pattern) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def get_intervals(self): fh = gzip.GzipFile(self.fs_path, 'rb') try: info = whisper__readHeader(fh) # evil, but necessary. finally: fh.close() start = time.time() - info['maxRetention'] end = max( stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def get_intervals(self): log.info('===GET_INTERVALS===') # We have data from the beginning of the epoch :o) start = 1 # We can see one hour into the future :o) end = int(time() + 3600) log.info("get_interval: start=%s; end=%s" % (start, end)) return IntervalSet([Interval(start, end)])
def _find_paths(self, currNodeRowKey, patterns): """Recursively generates absolute paths whose components underneath current_node match the corresponding pattern in patterns""" from graphite.node import BranchNode, LeafNode from graphite.intervals import Interval, IntervalSet pattern = patterns[0] patterns = patterns[1:] nodeRow = self.client.getRow(self.metaTable, currNodeRowKey, None) if len(nodeRow) == 0: return subnodes = {} for k, v in nodeRow[0].columns.items(): if k.startswith("cf:c_"): # branches start with c_ key = k.split("_", 2)[1] # pop off cf:c_ prefix subnodes[key] = v.value matching_subnodes = match_entries(subnodes.keys(), pattern) if patterns: # we've still got more directories to traverse for subnode in matching_subnodes: rowKey = subnodes[subnode] subNodeContents = self.client.getRow(self.metaTable, rowKey, None) # leafs have a cf:INFO column describing their data # we can't possibly match on a leaf here because we have more components in the pattern, # so only recurse on branches if "cf:INFO" not in subNodeContents[0].columns: for m in self._find_paths(rowKey, patterns): yield m else: # at the end of the pattern for subnode in matching_subnodes: rowKey = subnodes[subnode] nodeRow = self.client.getRow(self.metaTable, rowKey, None) if len(nodeRow) == 0: continue metric = rowKey.split("_", 2)[1] # pop off "m_" in key if "cf:INFO" in nodeRow[0].columns: info = json.loads(nodeRow[0].columns["cf:INFO"].value) start = time.time() - info['maxRetention'] end = time.time() intervals = IntervalSet([Interval(start, end)]) reader = HbaseReader(metric, intervals, info, self) yield LeafNode(metric, reader) else: yield BranchNode(metric)
def __init__(self, uri, metric_path, start_time, end_time, username=None, password=None, debug=False): self.metric_path = metric_path self.start_time = start_time self.end_time = end_time self.username = username self.password = password self.debug = debug self.client = EsmondAPI(uri, debug=debug) if username and password: self.auth_client = EsmondAPI(uri, debug=debug, username=username, password=password) else: self.auth_client = self.client if start_time and end_time: if end_time - start_time > 6 * 30 * 24 * 3600: self.step = 86400 elif end_time - start_time > 30 * 24 * 3600: self.step = 3600 elif end_time - start_time > 24 * 3600: self.step = 300 else: self.step = None self.intervals = IntervalSet([Interval(start_time, end_time)]) else: self.intervals = IntervalSet([Interval(0, 2**32 - 1)])
def get_intervals(self): start = time.time() - whisper.info(self.fs_path)['maxRetention'] end = max( stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def get_intervals(self): # intervals doesn't matter in such type of reader # Let's return time.time() start = time.time() end = start return IntervalSet([Interval(start, end)])
def get_intervals(self): return IntervalSet([Interval(time.time() - 3600, time.time())])
def send(self, headers=None, msg_setter=None): log.debug("FindRequest.send(host=%s, query=%s) called" % (self.store.host, self.query)) if headers is None: headers = {} results = cache.get(self.cacheKey) if results is not None: log.debug( "FindRequest.send(host=%s, query=%s) using cached result" % (self.store.host, self.query)) else: url = "%s://%s/metrics/find/" % ( 'https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host) query_params = [ ('local', '1'), ('format', 'pickle'), ('query', self.query.pattern), ] if self.query.startTime: query_params.append(('from', self.query.startTime)) if self.query.endTime: query_params.append(('until', self.query.endTime)) try: result = http.request( 'POST' if settings.REMOTE_STORE_USE_POST else 'GET', url, fields=query_params, headers=headers, timeout=settings.REMOTE_FIND_TIMEOUT) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception during request" % (self.store.host, self.query)) self.store.fail() return if result.status != 200: log.exception( "FindRequest.send(host=%s, query=%s) error response %d from %s?%s" % (self.store.host, self.query, result.status, url, urlencode(query_params))) self.store.fail() return try: results = unpickle.loads(result.data) except BaseException: log.exception( "FindRequest.send(host=%s, query=%s) exception processing response" % (self.store.host, self.query)) self.store.fail() return cache.set(self.cacheKey, results, settings.FIND_CACHE_DURATION) msg_setter('host: {host}, query: {query}'.format(host=self.store.host, query=self.query)) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self.store, node_info, bulk_query=[self.query.pattern]) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def get_intervals(self): log.info("------->> get_intervals()") return IntervalSet([Interval(0, time.time())])
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # single metric query, let's hit carbon-cache first, # if we can fetch all data from carbon-cache, then # DO NOT hit disk. It helps us reduce iowait. # Please use the right version of carbon-cache. found_in_cache = False # Let's cache nodes with incomplete results, in case we need it and # don't have to query carbon-cache again. nodes_with_incomplete_result = {} for leaf_node in self.carbon_cache_finder.find_nodes( query, nodes_with_incomplete_result): yield leaf_node found_in_cache = True if found_in_cache and query.startTime != 0: return # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) # Group matching nodes by their path nodes_by_path = defaultdict(list) def _work(job): return job[0](*job[1:]) nodes_list = self.worker_pool.map(_work, jobs) for nodes in nodes_list: if nodes: for node in nodes: nodes_by_path[node.path].append(node) # That means we should search all matched nodes. # it would merge nodes with new metrics that only exists in carbon-cache # merge any new metric node that only exists in carbon-cache, # although they partial exist. for name, node in nodes_with_incomplete_result.iteritems(): if name not in nodes_by_path: nodes_by_path[name].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Search Carbon Cache if nodes_by_path is empty # # We have this block of code here, because i wanna cover # an edge case. # 1) metric: carbon.foo # 2) carbon-cache includes 2 hours data for carbon.foo # 3) query data starting from 3 hours ago. # in such case, previous carbon_cache_finder will not return any node # because carbon-cache doesn't have enough data. However, if we reach # this point, that means we should return all we have in carbon cache. if not nodes_by_path: for name, node in nodes_with_incomplete_result.iteritems(): # it only exists one value yield node return # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def get_intervals(self): start = time.time() - self.get_retention(self.fs_path) end = max( stat(self.fs_path).st_mtime, start ) return IntervalSet( [Interval(start, end)] )
def _merge_leaf_nodes(self, query, path, leaf_nodes): """Get a single node from a list of leaf nodes.""" if not leaf_nodes: return None # Fast-path when there is a single node. if len(leaf_nodes) == 1: return leaf_nodes[0] # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably # do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within # tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted( node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min( leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if not minimal_node_set: return None elif len(minimal_node_set) == 1: return minimal_node_set.pop() else: reader = MultiReader(minimal_node_set) return LeafNode(path, reader)
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.REMOTE_FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False yield node
def find_nodes(self, query, timer=None): timer.set_msg('host: {host}, query: {query}'.format(host=self.host, query=query)) log.debug("RemoteFinder.find_nodes(host=%s, query=%s) called" % (self.host, query)) # prevent divide by 0 cacheTTL = settings.FIND_CACHE_DURATION or 1 if query.startTime: start = query.startTime - (query.startTime % cacheTTL) else: start = "" if query.endTime: end = query.endTime - (query.endTime % cacheTTL) else: end = "" cacheKey = "find:%s:%s:%s:%s" % (self.host, compactHash( query.pattern), start, end) results = cache.get(cacheKey) if results is not None: log.debug( "RemoteFinder.find_nodes(host=%s, query=%s) using cached result" % (self.host, query)) else: url = '/metrics/find/' query_params = [ ('local', self.params.get('local', '1')), ('format', self.params.get('format', 'pickle')), ('query', query.pattern), ] if query.startTime: query_params.append(('from', int(query.startTime))) if query.endTime: query_params.append(('until', int(query.endTime))) result = self.request(url, fields=query_params, headers=query.headers, timeout=settings.FIND_TIMEOUT) try: if result.getheader('content-type') == 'application/x-msgpack': results = msgpack.load(BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE), encoding='utf-8') else: results = unpickle.load( BufferedHTTPReader( result, buffer_size=settings.REMOTE_BUFFER_SIZE)) except Exception as err: self.fail() log.exception( "RemoteFinder[%s] Error decoding find response from %s: %s" % (self.host, result.url_full, err)) raise Exception("Error decoding find response from %s: %s" % (result.url_full, err)) finally: result.release_conn() cache.set(cacheKey, results, settings.FIND_CACHE_DURATION) # We don't use generator here, this function may be run as a job in a thread pool, using a generator has the following risks: # 1. Generators are lazy, if we don't iterator the returned generator in the job, the real execution(network operations, # time-consuming) are very likely be triggered in the calling thread, losing the effect of thread pool; # 2. As function execution is delayed, the job manager can not catch job runtime exception as expected/designed; nodes = [] for node_info in results: # handle both 1.x and 0.9.x output path = node_info.get('path') or node_info.get('metric_path') is_leaf = node_info.get('is_leaf') or node_info.get('isLeaf') intervals = node_info.get('intervals') or [] if not isinstance(intervals, IntervalSet): intervals = IntervalSet([ Interval(interval[0], interval[1]) for interval in intervals ]) node_info = { 'is_leaf': is_leaf, 'path': path, 'intervals': intervals, } if is_leaf: reader = RemoteReader(self, node_info) node = LeafNode(path, reader) else: node = BranchNode(path) node.local = False nodes.append(node) return nodes
def get_intervals(self): interval_sets = [] for node in self.nodes: interval_sets.extend(node.intervals.intervals) return IntervalSet(sorted(interval_sets))
def find(self, pattern, startTime=None, endTime=None, local=False): query = FindQuery(pattern, startTime, endTime) # Start remote searches if not local: remote_requests = [ r.find(query) for r in self.remote_stores if r.available ] matching_nodes = set() # Search locally for finder in self.finders: for node in finder.find_nodes(query): #log.info("find() :: local :: %s" % node) matching_nodes.add(node) # Gather remote search results if not local: for request in remote_requests: for node in request.get_results(): #log.info("find() :: remote :: %s from %s" % (node,request.store.host)) matching_nodes.add(node) # Group matching nodes by their path nodes_by_path = {} for node in matching_nodes: if node.path not in nodes_by_path: nodes_by_path[node.path] = [] nodes_by_path[node.path].append(node) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() for path, nodes in nodes_by_path.iteritems(): leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int( time.time() ) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval( float('-inf'), tolerance_window ) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval(query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval(prior_to_window) return covered_intervals.union(relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union(best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) if settings.USE_WORKER_POOL: return_result = lambda x: result_queue.put(x) for job in jobs: get_pool().apply_async(func=job[0], args=job[1:], callback=return_result) else: for job in jobs: result_queue.put(job[0](*job[1:])) # Group matching nodes by their path nodes_by_path = defaultdict(list) deadline = start + settings.REMOTE_FIND_TIMEOUT result_cnt = 0 while result_cnt < len(jobs): wait_time = deadline - time.time() try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.info("Timed out in find_all after %fs" % (settings.REMOTE_FIND_TIMEOUT)) break else: continue log.info("Got a find result after %fs" % (time.time() - start)) result_cnt += 1 if nodes: for node in nodes: nodes_by_path[node.path].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def get_intervals(self): # all time return IntervalSet([Interval(0, int(time.time()))])
def _merge_leaf_nodes(self, query, path, leaf_nodes): """Get a single node from a list of leaf nodes.""" if not leaf_nodes: return None # Fast-path when there is a single node. if len(leaf_nodes) == 1: return leaf_nodes[0] # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably # do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within # tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if not minimal_node_set: return None elif len(minimal_node_set) == 1: return minimal_node_set.pop() else: reader = MultiReader(minimal_node_set) return LeafNode(path, reader)
def get_intervals(self): # TODO use borinud summary #return IntervalSet([Interval(start, end)]) return IntervalSet([Interval(0, int(time.time()))])