def test_named(self): default = pool.get_pool() p = pool.get_pool(name='test') self.assertNotEqual(default, p) q = pool.pool_apply(p, [(lambda v: v, 'a')]) self.assertEqual(q.get(True, 1), 'a')
def test_named_no_worker_pool(self): default = pool.get_pool(thread_count=0) p = pool.get_pool(name='test', thread_count=0) self.assertIsNone(p) self.assertIsNone(default) results = pool.pool_exec(p, [pool.Job(lambda v: v, 'job', 'a')], 1) self.assertEqual(list(results)[0].result, 'a')
def test_named_no_worker_pool(self): default = pool.get_pool() p = pool.get_pool(name='test') self.assertIsNone(p) self.assertIsNone(default) q = pool.pool_apply(p, [(lambda v: v, 'a')]) self.assertEqual(q.get_nowait(), 'a')
def test_named(self): default = pool.get_pool() p = pool.get_pool(name='test') self.assertIn('test', pool._pools) self.assertNotEqual(default, p) results = pool.pool_exec(p, [pool.Job(lambda v: v, 'job', 'a')], 1) self.assertEqual(list(results)[0].result, 'a') pool.stop_pool('test') self.assertNotIn('test', pool._pools)
def test_named(self): default = pool.get_pool() p = pool.get_pool(name='test') self.assertIn('test', pool._pools) self.assertNotEqual(default, p) results = pool.pool_exec(p, [pool.Job(lambda v: v, 'job', 'a')], 1) self.assertEqual(list(results)[0].result, 'a') pool.stop_pool('test') self.assertNotIn('test', pool._pools)
def worker_pool(self): # The number of workers should increase linear with the number of # backend servers, plus we need some baseline for local finds and # other stuff that always happens. thread_count = settings.POOL_WORKERS_PER_BACKEND * len( settings.CLUSTER_SERVERS) + settings.POOL_WORKERS return get_pool(name="remote_finder", thread_count=thread_count)
def test_timeout(self): p = pool.get_pool() jobs = [pool.Job(lambda v: time.sleep(2) and v, i) for i in range(1, 5)] with self.assertRaises(pool.PoolTimeoutError): results = pool.pool_exec(p, jobs, 1) list(results)
def pool_exec(self, jobs, timeout): ''' Overwrite of pool_exec from Store to get another workers pool ''' if not jobs: return [] return worker_pool_exec(get_pool(self.pool_name, self.thread_count), jobs, timeout)
def pool_exec(self, jobs, timeout): if not jobs: return [] thread_count = 0 if settings.USE_WORKER_POOL: thread_count = min(len(self.finders), settings.POOL_MAX_WORKERS) return pool_exec(get_pool('finders', thread_count), jobs, timeout)
def test_timeout_sync(self): p = pool.get_pool(thread_count=0) jobs = [pool.Job(lambda v: time.sleep(1) and v, 'job', i) for i in range(1, 5)] with self.assertRaises(pool.PoolTimeoutError): results = pool.pool_exec(p, jobs, 1) list(results)
def pool_exec(self, jobs, timeout): if not jobs: return [] thread_count = 0 if settings.USE_WORKER_POOL: thread_count = min(len(self.finders), settings.POOL_MAX_WORKERS) return pool_exec(get_pool('finders', thread_count), jobs, timeout)
def test_exception(self): p = pool.get_pool() err = Exception('this is a test') def testfunc(): raise err results = pool.pool_exec(p, [pool.Job(testfunc, 'job')], 1) self.assertEqual(list(results)[0].exception, err)
def pool_exec(self, jobs, timeout): ''' Overwrite of pool_exec from Store to get another workers pool ''' if not jobs: return [] thread_count = 0 if settings.USE_WORKER_POOL: thread_count = min(parallel_jobs, settings.POOL_MAX_WORKERS) return worker_pool_exec(get_pool('graphouse', thread_count), jobs, timeout)
def fetch(self, patterns, startTime, endTime, now, requestContext): # deduplicate patterns patterns = list(set(patterns)) if not patterns: return [] log.debug( 'graphite.storage.Store.fetch :: Starting fetch on all backends') jobs = [ Job(finder.fetch, patterns, startTime, endTime, now=now, requestContext=requestContext) for finder in self.get_finders(requestContext.get('localOnly')) ] results = [] done = 0 errors = 0 # Start fetches start = time.time() try: for job in pool_exec(get_pool(), jobs, settings.REMOTE_FETCH_TIMEOUT): done += 1 if job.exception: errors += 1 log.debug("Fetch for %s failed after %fs: %s" % (str(patterns), time.time() - start, str(job.exception))) continue log.debug("Got a fetch result for %s after %fs" % (str(patterns), time.time() - start)) results.extend(job.result) except PoolTimeoutError: log.debug("Timed out in fetch after %fs" % (time.time() - start)) if errors == done: raise Exception('All fetches failed for %s' % (str(patterns))) log.debug("Got all fetch results for %s in %fs" % (str(patterns), time.time() - start)) return results
def find_all(self, query): start = time.time() jobs = [] # Start local searches for finder in self.finders: # Support legacy finders by defaulting to 'local = True' is_local = not hasattr(finder, 'local') or finder.local if query.local and not is_local: continue if getattr(finder, 'disabled', False): continue jobs.append((finder.find_nodes, query)) result_queue = pool_apply(get_pool(), jobs) # Group matching nodes by their path nodes_by_path = defaultdict(list) timeout = settings.REMOTE_FIND_TIMEOUT deadline = start + timeout done = 0 total = len(jobs) while done < total: wait_time = deadline - time.time() nodes = [] try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time # is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.debug("Timed out in find_nodes after %fs" % timeout) break else: continue log.debug("Got a find result after %fs" % (time.time() - start)) done += 1 for node in nodes or []: nodes_by_path[node.path].append(node) log.debug("Got all find results in %fs" % (time.time() - start)) return self._list_nodes(query, nodes_by_path)
def find_all(self, query): start = time.time() jobs = [] # Start local searches for finder in self.finders: # Support legacy finders by defaulting to 'local = True' is_local = not hasattr(finder, 'local') or finder.local if query.local and not is_local: continue jobs.append((finder.find_nodes, query)) result_queue = pool_apply(get_pool(), jobs) # Group matching nodes by their path nodes_by_path = defaultdict(list) timeout = settings.REMOTE_FIND_TIMEOUT deadline = start + timeout done = 0 total = len(jobs) while done < total: wait_time = deadline - time.time() nodes = [] try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time # is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.debug("Timed out in find_nodes after %fs" % timeout) break else: continue log.debug("Got a find result after %fs" % (time.time() - start)) done += 1 for node in nodes or []: nodes_by_path[node.path].append(node) log.debug("Got all find results in %fs" % (time.time() - start)) return self._list_nodes(query, nodes_by_path)
def _find(self, query): jobs = [ Job(finder.find_nodes, query) for finder in self.get_finders(query.local) ] # Group matching nodes by their path nodes_by_path = defaultdict(list) done = 0 errors = 0 # Start finds start = time.time() try: for job in pool_exec(get_pool(), jobs, settings.REMOTE_FIND_TIMEOUT): done += 1 if job.exception: errors += 1 log.debug( "Find for %s failed after %fs: %s" % (str(query), time.time() - start, str(job.exception))) continue log.debug("Got a find result for %s after %fs" % (str(query), time.time() - start)) for node in job.result or []: nodes_by_path[node.path].append(node) except PoolTimeoutError: log.debug("Timed out in find after %fs" % (time.time() - start)) if errors == done: raise Exception('All finds failed for %s' % (str(query))) log.debug("Got all find results for %s in %fs" % (str(query), time.time() - start)) return self._list_nodes(query, nodes_by_path)
def test_basic(self): p = pool.get_pool() pool.pool_apply(p, [])
def find_all(self, query, headers=None): start = time.time() result_queue = Queue.Queue() jobs = [] # Start remote searches if not query.local: random.shuffle(self.remote_stores) jobs.extend([(store.find, query, headers) for store in self.remote_stores if store.available]) # Start local searches for finder in self.finders: jobs.append((finder.find_nodes, query)) if settings.USE_WORKER_POOL: return_result = lambda x: result_queue.put(x) for job in jobs: get_pool().apply_async(func=job[0], args=job[1:], callback=return_result) else: for job in jobs: result_queue.put(job[0](*job[1:])) # Group matching nodes by their path nodes_by_path = defaultdict(list) deadline = start + settings.REMOTE_FIND_TIMEOUT result_cnt = 0 while result_cnt < len(jobs): wait_time = deadline - time.time() try: nodes = result_queue.get(True, wait_time) # ValueError could happen if due to really unlucky timing wait_time is negative except (Queue.Empty, ValueError): if time.time() > deadline: log.info("Timed out in find_all after %fs" % (settings.REMOTE_FIND_TIMEOUT)) break else: continue log.info("Got a find result after %fs" % (time.time() - start)) result_cnt += 1 if nodes: for node in nodes: nodes_by_path[node.path].append(node) log.info("Got all find results in %fs" % (time.time() - start)) # Reduce matching nodes for each path to a minimal set found_branch_nodes = set() items = list(nodes_by_path.iteritems()) random.shuffle(items) for path, nodes in items: leaf_nodes = [] # First we dispense with the BranchNodes for node in nodes: if node.is_leaf: leaf_nodes.append(node) elif node.path not in found_branch_nodes: #TODO need to filter branch nodes based on requested interval... how?!?!? yield node found_branch_nodes.add(node.path) if not leaf_nodes: continue # Fast-path when there is a single node. if len(leaf_nodes) == 1: yield leaf_nodes[0] continue # Calculate best minimal node set minimal_node_set = set() covered_intervals = IntervalSet([]) # If the query doesn't fall entirely within the FIND_TOLERANCE window # we disregard the window. This prevents unnecessary remote fetches # caused when carbon's cache skews node.intervals, giving the appearance # remote systems have data we don't have locally, which we probably do. now = int(time.time()) tolerance_window = now - settings.FIND_TOLERANCE disregard_tolerance_window = query.interval.start < tolerance_window prior_to_window = Interval(float('-inf'), tolerance_window) def measure_of_added_coverage( node, drop_window=disregard_tolerance_window): relevant_intervals = node.intervals.intersect_interval( query.interval) if drop_window: relevant_intervals = relevant_intervals.intersect_interval( prior_to_window) return covered_intervals.union( relevant_intervals).size - covered_intervals.size nodes_remaining = list(leaf_nodes) # Prefer local nodes first (and do *not* drop the tolerance window) for node in leaf_nodes: if node.local and measure_of_added_coverage(node, False) > 0: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) if settings.REMOTE_STORE_MERGE_RESULTS: remote_nodes = [n for n in nodes_remaining if not n.local] for node in remote_nodes: nodes_remaining.remove(node) minimal_node_set.add(node) covered_intervals = covered_intervals.union(node.intervals) else: while nodes_remaining: node_coverages = [(measure_of_added_coverage(n), n) for n in nodes_remaining] best_coverage, best_node = max(node_coverages) if best_coverage == 0: break nodes_remaining.remove(best_node) minimal_node_set.add(best_node) covered_intervals = covered_intervals.union( best_node.intervals) # Sometimes the requested interval falls within the caching window. # We include the most likely node if the gap is within tolerance. if not minimal_node_set: def distance_to_requested_interval(node): if not node.intervals: return float('inf') latest = sorted(node.intervals, key=lambda i: i.end)[-1] distance = query.interval.start - latest.end return distance if distance >= 0 else float('inf') best_candidate = min(leaf_nodes, key=distance_to_requested_interval) if distance_to_requested_interval( best_candidate) <= settings.FIND_TOLERANCE: minimal_node_set.add(best_candidate) if len(minimal_node_set) == 1: yield minimal_node_set.pop() elif len(minimal_node_set) > 1: reader = MultiReader(minimal_node_set) yield LeafNode(path, reader)
def test_basic(self): p = pool.get_pool() pool.pool_apply(p, [])
def fetch_list(self, startTime, endTime, now=None, requestContext=None): t = time.time() query_params = [ ('format', 'pickle'), ('local', '1'), ('noCache', '1'), ('from', str( int(startTime) )), ('until', str( int(endTime) )) ] for target in self.bulk_query: query_params.append(('target', target)) if now is not None: query_params.append(('now', str( int(now) ))) query_string = urlencode(query_params) urlpath = '/render/' url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host, urlpath) headers = requestContext.get('forwardHeaders') if requestContext else None cacheKey = "%s?%s" % (url, query_string) if requestContext is not None and 'inflight_requests' in requestContext and cacheKey in requestContext['inflight_requests']: self.log_debug("RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] if requestContext is None or 'inflight_locks' not in requestContext or cacheKey not in requestContext['inflight_locks']: with self.inflight_lock: self.log_debug("RemoteReader:: Got global lock %s?%s" % (url, query_string)) if requestContext is None: requestContext = {} if 'inflight_locks' not in requestContext: requestContext['inflight_locks'] = {} if 'inflight_requests' not in requestContext: requestContext['inflight_requests'] = {} if cacheKey not in requestContext['inflight_locks']: self.log_debug("RemoteReader:: Creating lock %s?%s" % (url, query_string)) requestContext['inflight_locks'][cacheKey] = Lock() self.log_debug("RemoteReader:: Released global lock %s?%s" % (url, query_string)) cacheLock = requestContext['inflight_locks'][cacheKey] with cacheLock: self.log_debug("RemoteReader:: got url lock %s?%s" % (url, query_string)) if cacheKey in requestContext['inflight_requests']: self.log_debug("RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] q = Queue() if settings.USE_WORKER_POOL: get_pool().apply_async( func=self._fetch, args=[url, query_string, query_params, headers], callback=lambda x: q.put(x), ) else: q.put( self._fetch(url, query_string, query_params, headers), ) def retrieve(): with retrieve.lock: # if the result is known we return it directly if hasattr(retrieve, '_result'): results = getattr(retrieve, '_result') self.log_debug( 'RemoteReader:: retrieve completed (cached) %s' % (', '.join([result['path'] for result in results])), ) return results # otherwise we get it from the queue and keep it for later results = q.get(block=True) for i in range(len(results)): results[i]['path'] = results[i]['name'] if not results: self.log_debug('RemoteReader:: retrieve has received no results') setattr(retrieve, '_result', results) self.log_debug( 'RemoteReader:: retrieve completed %s' % (', '.join([result['path'] for result in results])), ) return results self.log_debug( 'RemoteReader:: Storing FetchInProgress with cacheKey {cacheKey}' .format(cacheKey=cacheKey), ) retrieve.lock = Lock() data = FetchInProgress(retrieve) requestContext['inflight_requests'][cacheKey] = data self.log_debug("RemoteReader:: Returning %s?%s in %fs" % (url, query_string, time.time() - t)) return data
def tearDown(self): get_pool().close()
def worker_pool(self): # The number of workers should increase linear with the number of # backend servers, plus we need some baseline for local finds and # other stuff that always happens. thread_count = settings.POOL_WORKERS_PER_BACKEND * len(settings.CLUSTER_SERVERS) + settings.POOL_WORKERS return get_pool(name="remote_finder", thread_count=thread_count)
def test_basic(self): p = pool.get_pool() results = pool.pool_exec(p, [], 1) self.assertEqual(list(results), [])
def tearDown(self): get_pool().close()
def fetch_list(self, startTime, endTime, now=None, requestContext=None): t = time.time() query_params = [('target', self.query), ('format', 'pickle'), ('local', '1'), ('from', str(int(startTime))), ('until', str(int(endTime)))] if now is not None: query_params.append(('now', str(int(now)))) query_string = urlencode(query_params) urlpath = '/render/' url = "%s://%s%s" % ('https' if settings.INTRACLUSTER_HTTPS else 'http', self.store.host, urlpath) headers = requestContext.get( 'forwardHeaders') if requestContext else None cacheKey = "%s?%s" % (url, query_string) if requestContext is not None and 'inflight_requests' in requestContext and cacheKey in requestContext[ 'inflight_requests']: self.log_debug( "RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] if requestContext is None or 'inflight_locks' not in requestContext or cacheKey not in requestContext[ 'inflight_locks']: with self.inflight_lock: self.log_debug("RemoteReader:: Got global lock %s?%s" % (url, query_string)) if requestContext is None: requestContext = {} if 'inflight_locks' not in requestContext: requestContext['inflight_locks'] = {} if 'inflight_requests' not in requestContext: requestContext['inflight_requests'] = {} if cacheKey not in requestContext['inflight_locks']: self.log_debug("RemoteReader:: Creating lock %s?%s" % (url, query_string)) requestContext['inflight_locks'][cacheKey] = Lock() self.log_debug("RemoteReader:: Released global lock %s?%s" % (url, query_string)) cacheLock = requestContext['inflight_locks'][cacheKey] with cacheLock: self.log_debug("RemoteReader:: got url lock %s?%s" % (url, query_string)) if cacheKey in requestContext['inflight_requests']: self.log_debug( "RemoteReader:: Returning cached FetchInProgress %s?%s" % (url, query_string)) return requestContext['inflight_requests'][cacheKey] q = Queue() if settings.USE_WORKER_POOL: get_pool().apply_async( func=self._fetch, args=[url, query_string, query_params, headers], callback=lambda x: q.put(x), ) else: q.put(self._fetch(url, query_string, query_params, headers), ) def retrieve(): with retrieve.lock: # if the result is known we return it directly if hasattr(retrieve, '_result'): results = getattr(retrieve, '_result') self.log_debug( 'RemoteReader:: retrieve completed (cached) %s' % (', '.join([result['path'] for result in results])), ) return results # otherwise we get it from the queue and keep it for later results = q.get(block=True) for i in range(len(results)): results[i]['path'] = results[i]['name'] if not results: self.log_debug( 'RemoteReader:: retrieve has received no results') setattr(retrieve, '_result', results) self.log_debug( 'RemoteReader:: retrieve completed %s' % (', '.join([result['path'] for result in results])), ) return results self.log_debug( 'RemoteReader:: Storing FetchInProgress with cacheKey {cacheKey}' .format(cacheKey=cacheKey), ) retrieve.lock = Lock() data = FetchInProgress(retrieve) requestContext['inflight_requests'][cacheKey] = data self.log_debug("RemoteReader:: Returning %s?%s in %fs" % (url, query_string, time.time() - t)) return data