def test_MultiReader_merge_normal(self): results1 = ((1496252939, 1496252944, 1), [None, None, None, None, 1.0]) results2 = ((1496252939, 1496252944, 1), [1.0, 1.0, 1.0, 1.0, 1.0]) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) reader = MultiReader([node1]) (_, values) = reader.merge(results1, results2) self.assertEqual(values, [1.0, 1.0, 1.0, 1.0, 1.0])
def test_MultiReader_get_intervals(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) intervals = reader.get_intervals() for interval in intervals: self.assertEqual(int(interval.start), self.start_ts - 60) self.assertEqual(int(interval.end), self.start_ts)
def test_MultiReader_fetch(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) results = reader.fetch(self.start_ts - 5, self.start_ts) (_, values) = results self.assertEqual(values, [None, None, None, None, 1.0])
def _fetchData(pathExpr, startTime, endTime, requestContext, seriesList): matching_nodes = STORE.find(pathExpr, startTime, endTime, local=requestContext['localOnly'], reqkey=requestContext['request_key']) matching_nodes = list(matching_nodes) if len(matching_nodes) > 1: request_hash = md5("%s_%s_%s" % (pathExpr, startTime, endTime)).hexdigest() cached_result = cache.get(request_hash) if cached_result: log.info( "DEBUG:fetchData: got result from cache for %s_%s_%s" % (pathExpr, startTime, endTime)) fetches = cached_result else: log.info("DEBUG:fetchData: no cache for %s_%s_%s" % (pathExpr, startTime, endTime)) fetches = MultiReader( matching_nodes, reqkey=requestContext['request_key']).fetch( startTime, endTime) try: cache.add(request_hash, fetches) except Exception as err: log.exception("Failed save data in memcached: %s" % str(err)) elif len(matching_nodes) == 1: fetches = [(matching_nodes[0], matching_nodes[0].fetch(startTime, endTime))] else: fetches = [] for node, results in fetches: if isinstance(results, FetchInProgress): results = results.waitForResults() if not results: log.info( "render.datalib.fetchData :: no results for %s.fetch(%s, %s)" % (node, startTime, endTime)) continue try: (timeInfo, values) = results except ValueError, e: e = sys.exc_info()[1] raise Exception( "could not parse timeInfo/values from metric '%s': %s" % (node.path, e)) (start, end, step) = timeInfo series = TimeSeries(node.path, start, end, step, values) series.pathExpression = pathExpr #hack to pass expressions through to render functions seriesList.append(series)
def test_MultiReader_init(self): self.create_whisper_hosts() self.addCleanup(self.wipe_whisper_hosts) wr1 = WhisperReader(self.worker1, 'hosts.worker1.cpu') node1 = LeafNode('hosts.worker1.cpu', wr1) wr2 = WhisperReader(self.worker2, 'hosts.worker2.cpu') node2 = LeafNode('hosts.worker2.cpu', wr2) reader = MultiReader([node1, node2]) self.assertIsNotNone(reader)
def _merge_leaf_nodes(self, query, path, leaf_nodes): """Get a single node from a list of leaf nodes.""" if not leaf_nodes: return None # Fast-path when there is a single node. if len(leaf_nodes) == 1: return leaf_nodes[0] # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably # do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within # tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if not minimal_node_set: return None elif len(minimal_node_set) == 1: return minimal_node_set.pop() else: reader = MultiReader(minimal_node_set) return LeafNode(path, reader)
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) if settings.USE_WORKER_POOL: return_result = lambda x: result_queue.put(x) for job in jobs: get_pool().apply_async(func=job[0], args=job[1:], callback=return_result) else: for job in jobs: result_queue.put(job[0](*job[1:])) # Group matching nodes by their path nodes_by_path = defaultdict(list) deadline = start + settings.REMOTE_FIND_TIMEOUT result_cnt = 0 while result_cnt < len(jobs): wait_time = deadline - time.time() try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.info("Timed out in find_all after %fs" % (settings.REMOTE_FIND_TIMEOUT)) break else: continue log.info("Got a find result after %fs" % (time.time() - start)) result_cnt += 1 if nodes: for node in nodes: nodes_by_path[node.path].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def find(self, pattern, startTime=None, endTime=None, local=False): query = FindQuery(pattern, startTime, endTime) # Start remote searches if not local: remote_requests = [ r.find(query) for r in self.remote_stores if r.available ] matching_nodes = set() # Search locally for finder in self.finders: for node in finder.find_nodes(query): #log.info("find() :: local :: %s" % node) matching_nodes.add(node) # Gather remote search results if not local: for request in remote_requests: for node in request.get_results(): #log.info("find() :: remote :: %s from %s" % (node,request.store.host)) matching_nodes.add(node) # Group matching nodes by their path nodes_by_path = {} for node in matching_nodes: if node.path not in nodes_by_path: nodes_by_path[node.path] = [] nodes_by_path[node.path].append(node) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() for path, nodes in nodes_by_path.iteritems(): leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int( time.time() ) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval( float('-inf'), tolerance_window ) def measure_of_added_coverage(node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval(query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval(prior_to_window) return covered_intervals.union(relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [ (measure_of_added_coverage(n), n) for n in nodes_remaining ] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union(best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval(best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # single metric query, let's hit carbon-cache first, # if we can fetch all data from carbon-cache, then # DO NOT hit disk. It helps us reduce iowait. # Please use the right version of carbon-cache. found_in_cache = False # Let's cache nodes with incomplete results, in case we need it and # don't have to query carbon-cache again. nodes_with_incomplete_result = {} for leaf_node in self.carbon_cache_finder.find_nodes( query, nodes_with_incomplete_result): yield leaf_node found_in_cache = True if found_in_cache and query.startTime != 0: return # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) # Group matching nodes by their path nodes_by_path = defaultdict(list) def _work(job): return job[0](*job[1:]) nodes_list = self.worker_pool.map(_work, jobs) for nodes in nodes_list: if nodes: for node in nodes: nodes_by_path[node.path].append(node) # That means we should search all matched nodes. # it would merge nodes with new metrics that only exists in carbon-cache # merge any new metric node that only exists in carbon-cache, # although they partial exist. for name, node in nodes_with_incomplete_result.iteritems(): if name not in nodes_by_path: nodes_by_path[name].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Search Carbon Cache if nodes_by_path is empty # # We have this block of code here, because i wanna cover # an edge case. # 1) metric: carbon.foo # 2) carbon-cache includes 2 hours data for carbon.foo # 3) query data starting from 3 hours ago. # in such case, previous carbon_cache_finder will not return any node # because carbon-cache doesn't have enough data. However, if we reach # this point, that means we should return all we have in carbon cache. if not nodes_by_path: for name, node in nodes_with_incomplete_result.iteritems(): # it only exists one value yield node return # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)