def test_distribution(): strings = [str(i) for i in range(60000)] node_weights = {'aaa': 1, 'bbb': 2, 'ccc': 1, 'ddd': 1, 'eee': 1} ring = NodeRing(node_weights.keys(), node_weights) distribution = defaultdict(list) for s in strings: distribution[ring.get_node(s)].append(s) assert 9000 < len(distribution['aaa']) < 11000 assert 18000 < len(distribution['bbb']) < 22000 assert 9000 < len(distribution['ccc']) < 11000 assert 9000 < len(distribution['ddd']) < 11000 assert 9000 < len(distribution['eee']) < 11000 # Drop a node, the new distributions should be super sets of the previous. # Existing nodes should not move. ring.remove_node('aaa') removed_distribution = defaultdict(list) for s in strings: removed_distribution[ring.get_node(s)].append(s) assert 22000 < len(removed_distribution['bbb']) < 26000 assert 11000 < len(removed_distribution['ccc']) < 13000 assert 11000 < len(removed_distribution['ddd']) < 13000 assert 11000 < len(removed_distribution['eee']) < 13000 assert set(distribution['bbb']) <= set(removed_distribution['bbb']) assert set(distribution['ccc']) <= set(removed_distribution['ccc']) assert set(distribution['ddd']) <= set(removed_distribution['ddd']) assert set(distribution['eee']) <= set(removed_distribution['eee']) # Add the node back again and check that the distribution goes back to previous ring.add_node('aaa', weight=1) added_distribution = defaultdict(list) for s in strings: added_distribution[ring.get_node(s)].append(s) assert distribution['aaa'] == added_distribution['aaa'] assert distribution['bbb'] == added_distribution['bbb'] assert distribution['ccc'] == added_distribution['ccc'] assert distribution['ddd'] == added_distribution['ddd'] assert distribution['eee'] == added_distribution['eee']
def __init__( self, node_list, connect_timeout=1.0, read_timeout=2.0, verify=True, auth=None, consecutive_error_count_limit=10 ): self.node_ring = NodeRing(node_list) self.failing_nodes = set() self.connect_timeout = connect_timeout self.read_timeout = read_timeout self.check_interval = 10 self.check_attempt_count = 0 self.session = requests.session() self.verify = verify self.auth = auth self.consecutive_error_count = 0 self.consecutive_error_count_limit = consecutive_error_count_limit self.statistics = None self._clear_statistics()
class QClient(object): """ Main client class. Basic example: >>> client = QClient(node_list=('http://host1:9401', 'http://host2:9401', 'http://host3:9401')) >>> result = client.get('someKey', {'select': ['col1', 'col2', 'col3'], 'where': ['<', 'col', 1]}) :param node_list: List or other iterables with addresses to qcache servers. Eg. ['http://host1:9401', 'http://host2:9401'] :param connect_timeout: Number of seconds to wait until connection timeout occurs. :param read_timeout: Number of seconds to wait until read timeout occurs. :param verify: If https is used controls if the host certificate should be verified. :param auth: Tuple (username, password), used for basic auth. :param consecutive_error_count_limit: Number of times to retry operations before giving up. """ def __init__( self, node_list, connect_timeout=1.0, read_timeout=2.0, verify=True, auth=None, consecutive_error_count_limit=10 ): self.node_ring = NodeRing(node_list) self.failing_nodes = set() self.connect_timeout = connect_timeout self.read_timeout = read_timeout self.check_interval = 10 self.check_attempt_count = 0 self.session = requests.session() self.verify = verify self.auth = auth self.consecutive_error_count = 0 self.consecutive_error_count_limit = consecutive_error_count_limit self.statistics = None self._clear_statistics() def _clear_statistics(self): self.statistics = defaultdict(_node_statisticts) def _node_for_key(self, key): node = self.node_ring.get_node(key) if not node: # Check all caches in unreachable nodes, if none exist. Fail! self._test_dropped_nodes() node = self.node_ring.get_node(key) if not node: raise NoCacheAvailable("No QCaches reachable") return node def _test_dropped_nodes(self): # Test all nodes that are currently on the fail list. Any node that responds # gets reinserted into the node ring. A more selective strategy may be required # in the future but keep it simple for now. for node in list(self.failing_nodes): status_url = self._status_url(node) try: response = self.session.get( status_url, verify=self.verify, auth=self.auth, timeout=(self.connect_timeout, self.read_timeout) ) if response.status_code == 200: self.node_ring.add_node(node) self.failing_nodes.remove(node) self.statistics[node]["resurrections"] += 1 except RequestException: self.statistics[node]["retry_error"] += 1 def _drop_node(self, node): self.node_ring.remove_node(node) self.failing_nodes.add(node) def _check_dropped_nodes(self): if self.check_attempt_count % self.check_interval == 0: self._test_dropped_nodes() self.check_attempt_count += 1 @contextmanager def _connection_error_manager(self, node): try: yield self.consecutive_error_count = 0 except ConnectTimeout: self.statistics[node]["connect_timeout"] += 1 self._drop_node(node) self.consecutive_error_count += 1 except ConnectionError: self.statistics[node]["connection_error"] += 1 self._drop_node(node) self.consecutive_error_count += 1 except ReadTimeout: self.statistics[node]["read_timeout"] += 1 self._drop_node(node) self.consecutive_error_count += 1 finally: if self.consecutive_error_count >= self.consecutive_error_count_limit: self.consecutive_error_count = 0 raise TooManyConsecutiveErrors( "Too many errors occurred while trying operation: {stat}".format(stat=dict(self.statistics)) ) @staticmethod def _status_url(node): new_node = node if node.endswith("/") else node + "/" return new_node + "qcache/status" @staticmethod def _key_url(node, key): new_node = node if node.endswith("/") else node + "/" return new_node + "qcache/dataset/" + key def get_statistics(self): statistics = self.statistics self._clear_statistics() return statistics def get(self, key, q, accept="application/json", post_query=False, query_headers=None): """ Execute query and return result. :param key: Key for the table to query. :param q: Dict with the query as described in the QCache documentation :param accept: Response type, application/json and text/csv are supported :param post_query: If set the query will be executed using a POST rather than GET. Good for very large queries. :param query_headers: dict with additional headers to include when issuing query. Key - header name Value - header value :returns QueryResult: Contains the result of the query. :raises MalformedQueryException: :raises UnsupportedAcceptType: :raises UnexpectedServerResponse: :raises TooManyConsecutiveErrors: :raises NoCacheAvailable: """ self._check_dropped_nodes() json_q = json.dumps(q) headers = {"Accept": accept} if query_headers: headers.update(query_headers) while True: node = self._node_for_key(key) key_url = self._key_url(node, key) with self._connection_error_manager(node): if post_query: headers["Content-Type"] = "application/json" response = self.session.post( key_url + "/q", data=json_q, headers=headers, auth=self.auth, timeout=(self.connect_timeout, self.read_timeout), verify=self.verify, ) else: response = self.session.get( key_url, params={"q": json_q}, headers=headers, auth=self.auth, timeout=(self.connect_timeout, self.read_timeout), verify=self.verify, ) if response.status_code == 200: return QueryResult( response.content, int(response.headers["X-QCache-unsliced-length"]), encoding=response.headers.get("Content-Encoding"), ) if response.status_code == 404: return None if response.status_code == 400: raise MalformedQueryException( 'Malformed query "{json_q}", server response "{server_response}"'.format( json_q=json_q, server_response=response.content ) ) elif response.status_code == 406: raise UnsupportedAcceptType('Accept type "{accept}" is not supported'.format(accept)) else: raise UnexpectedServerResponse( "Unable to query dataset, status code {status_code}".format(status_code=response.status_code) ) def post(self, key, content, content_type="text/csv", post_headers=None): """ Post table data to QCache for key. :param key: Key to store the table under :param content: Byte string with content encoded either as CSV or JSON. :param content_type: application/json or text/csv depending on uploaded content :param post_headers: dict with additional headers to include. Key - header name Value - header value :return: None :raises MalformedQueryException: :raises UnsupportedAcceptType: :raises UnexpectedServerResponse: :raises TooManyConsecutiveErrors: :raises NoCacheAvailable: """ self._check_dropped_nodes() while True: node = self._node_for_key(key) key_url = self._key_url(node, key) headers = {"Content-type": content_type} if post_headers: headers.update(post_headers) with self._connection_error_manager(node): response = self.session.post( key_url, headers=headers, data=content, timeout=(self.connect_timeout, 10 * self.read_timeout), verify=self.verify, auth=self.auth, ) if response.status_code == 201: return self.statistics[node]["unknown_error"] += 1 raise UnexpectedServerResponse( "Unable to create dataset, status code {status_code}".format(status_code=response.status_code) ) def query( self, key, q, load_fn, load_fn_kwargs=None, content_type="text/csv", accept="application/json", post_headers=None, post_query=False, query_headers=None, ): """ Convenience method to query for data. If the requested key is not available in the QCache a call will be made to :load_fn: providing :load_fn_kwargs: as key value args. :load_fn: should return the data to insert into QCache. Once the data has been pushed to QCache the query in executed again against the newly created table. :param key: Key for the table to query. :param q: Dict with the query as described in the QCache documentation :param load_fn: Function called to fetch data if not present in QCache. :param load_fn_kwargs: Key-value arguments to load_fn :param content_type: application/json or text/csv depending on uploaded content :param accept: Response type, application/json and text/csv are supported :param post_headers: dict with additional headers to include when pushing data to the caches. Key - header name Value - header value :param post_query: If set the query will be executed using a POST rather than GET. Good for very large queries. :param query_headers: dict with additional headers to include when issuing query. Key - header name Value - header value :return: QueryResult: Contains the result of the query. :raises MalformedQueryException: :raises UnsupportedAcceptType: :raises UnexpectedServerResponse: :raises TooManyConsecutiveErrors: :raises NoCacheAvailable: """ content = None try_count = 0 while True: result = self.get(key, q, accept, post_query, query_headers) if result is not None: return result try_count += 1 if try_count > self.consecutive_error_count_limit: raise TooManyConsecutiveErrors( "Unable to query dataset after {try_count} tries, this is probably a sign of problems".format( try_count=try_count ) ) if content is None: kwargs = load_fn_kwargs or {} content = load_fn(**kwargs) self.post(key, content, content_type=content_type, post_headers=post_headers) def delete(self, key): """ Delete table stored under key from QCache. NOTE: If more than one QCache node is used there is no guarantee that the table is completely removed since it may be stored in multiple location depending events. Example: A small installation with two Qaches, qc1 and qc2. Table t1 is stored on qc1. qc1 disappears for unknown reason. t1 is then stored and read from t2 instead. Later t1 comes back. Data is now read from qc1 instead. A delete would in this case be issued against qc1, after the delete qc2 would still hold a copy of t1. :param key: Key for the table to delete :raises UnexpectedServerResponse: :raises TooManyConsecutiveErrors: :raises NoCacheAvailable: :return: None """ while True: node = self._node_for_key(key) key_url = self._key_url(node, key) with self._connection_error_manager(node): self.session.delete( key_url, timeout=(self.connect_timeout, self.read_timeout), verify=self.verify, auth=self.auth ) return
def test_get_node_no_nodes_available(): ring = NodeRing(['12345']) ring.remove_node('12345') assert ring.get_node('12345') is None