def get_queues_sizes(self): out = {} for queue in retry_call(self.c.keys, "%s-*" % self.prefix): queue_size = int(retry_call(self.c.get, queue)) out[queue] = queue_size return {k.decode('utf-8'): v for k, v in out.items()}
def blocking_pop(self, timeout=0, low_priority=False): """When only one item is requested, blocking is is possible.""" if low_priority: result = retry_call(self.c.bzpopmax, self.name, timeout) else: result = retry_call(self.c.bzpopmin, self.name, timeout) if result: return decode(result[1][SORTING_KEY_LEN:]) return None
def delete(self): """Clear the tables from the redis server.""" retry_call(self.client.delete, self._dispatch_key) retry_call(self.client.delete, self._finish_key) self.schedules.delete() self._outstanding_service_count.delete() self._file_tree.delete() self._other_errors.delete() self._attempts.delete()
def unpush(self, num=None): if num is not None and num <= 0: return [] if num: return [ decode(s) for s in retry_call(self.t, args=[self.name, num]) ] else: ret_val = retry_call(self.t, args=[self.name, 1]) if ret_val: return decode(ret_val[0]) return None
def pop(self, name, blocking=True, timeout=0): if blocking: response = retry_call(self.c.blpop, name, timeout) else: response = retry_call(self.c.lpop, name) if not response: return response if blocking: return json.loads(response[1]) else: return json.loads(response)
def pop(self, blocking: bool = True, timeout: int = 0) -> Optional[T]: if blocking: response = retry_call(self.c.blpop, self.name, timeout) else: response = retry_call(self.c.lpop, self.name) if not response: return response if blocking: return json.loads(response[1]) else: return json.loads(response)
def unpush(self, num=None) -> Union[list[T], Optional[T]]: if num is not None and num <= 0: return [] if num: return [ decode(s[0][SORTING_KEY_LEN:]) for s in retry_call(self.c.zpopmax, self.name, num) ] else: ret_val = retry_call(self.c.zpopmax, self.name, 1) if ret_val: return decode(ret_val[0][0][SORTING_KEY_LEN:]) return None
def pop(self, num=None): if num is not None and num <= 0: return [] if num: return [ decode(s[21:]) for s in retry_call(self.r, args=[self.name, num - 1]) ] else: ret_val = retry_call(self.r, args=[self.name, 0]) if ret_val: return decode(ret_val[0][21:]) return None
def fail_recoverable(self, file_hash: str, service: str, error_key: str = None): """A service task has failed, but should be retried, clear that it has been dispatched. After this call, the service is in a non-dispatched state, and the status can't be update until it is dispatched again. """ if error_key: self._other_errors.add(error_key) retry_call(self.client.hdel, self._dispatch_key, f"{file_hash}-{service}") self._outstanding_service_count.increment(file_hash, -1)
def pop(self, num=None): if num is not None and num <= 0: return [] if num: return [ decode(s[0][SORTING_KEY_LEN:]) for s in retry_call(self.c.zpopmin, self.name, num) ] else: ret_val = retry_call(self.c.zpopmin, self.name, 1) if ret_val: return decode(ret_val[0][0][SORTING_KEY_LEN:]) return None
def unpush(self, num=None): if num is not None and num <= 0: return [] if num: return [ decode(s[0]) for s in retry_call(self.c.zpopmax, self.name, num) ] else: ret_val = retry_call(self.c.zpopmax, self.name, 1) if ret_val: return decode(ret_val[0][0]) return None
def __init__(self, names, host=None, port=None, private=False): self.c = get_client(host, port, private) self.p = retry_call(self.c.pubsub) if not isinstance(names, list): names = [names] self.names = names self._connected = False
def dispatch_time(self, file_hash: str, service: str) -> float: """When was dispatch called for this sha/service pair.""" result = retry_call(self.client.hget, self._dispatch_key, f"{file_hash}-{service}") if result is None: return 0 return float(result)
def items(self) -> dict: items = retry_call(self.c.hgetall, self.name) if not isinstance(items, dict): return {} for k in items.keys(): items[k] = json.loads(items[k]) return {k.decode('utf-8'): v for k, v in items.items()}
def try_run(self): counter = self.counter apm_client = self.apm_client while self.running: self.heartbeat() # Download all messages from the queue that have expired seconds, _ = retry_call(self.redis.time) messages = self.queue.dequeue_range(0, seconds) cpu_mark = time.process_time() time_mark = time.time() # Try to pass on all the messages to their intended recipient, try not to let # the failure of one message from preventing the others from going through for key in messages: # Start of transaction if apm_client: apm_client.begin_transaction('process_messages') message = self.hash.pop(key) if message: try: if message['action'] == WatcherAction.TimeoutTask: self.cancel_service_task(message['task_key'], message['worker']) else: queue = NamedQueue(message['queue'], self.redis) queue.push(message['message']) self.counter.increment('expired') # End of transaction (success) if apm_client: apm_client.end_transaction('watch_message', 'success') except Exception as error: # End of transaction (exception) if apm_client: apm_client.end_transaction('watch_message', 'error') self.log.exception(error) else: # End of transaction (duplicate) if apm_client: apm_client.end_transaction('watch_message', 'duplicate') self.log.warning( f'Handled watch twice: {key} {len(key)} {type(key)}') counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark) counter.increment_execution_time('busy_seconds', time.time() - time_mark) if not messages: time.sleep(0.1)
def __init__(self, host=None, port=None, private=None, deserializer: Callable[[str], MessageType] = json.loads): client: Redis[Any] = get_client(host, port, private) self.pubsub = retry_call(client.pubsub) self.worker: Optional[threading.Thread] = None self.deserializer = deserializer
def limited_add(self, key, value, size_limit): """Add a single value to the set, but only if that wouldn't make the set grow past a given size. If the hash has hit the size limit returns None Otherwise, returns the result of hsetnx (same as `add`) """ return retry_call(self._limited_add, keys=[self.name], args=[key, json.dumps(value), size_limit])
def __init__(self, sid: str, client: Union[Redis, StrictRedis], fetch_results=False): """ :param sid: :param client: :param fetch_results: Preload all the results on the redis server. """ self.client = client self.sid = sid self._dispatch_key = f'{sid}{dispatch_tail}' self._finish_key = f'{sid}{finished_tail}' self._finish = self.client.register_script(finish_script) # cache the schedules calculated for the dispatcher, used to prevent rebuilding the # schedule repeatedly, and for telling the UI what services are pending self.schedules = ExpiringHash(f'dispatch-hash-schedules-{sid}', host=self.client) # How many services are outstanding for each file in the submission self._outstanding_service_count = ExpiringHash( f'dispatch-hash-files-{sid}', host=self.client) # Track which files have been extracted by what, in order to rebuild the file tree later self._file_tree = ExpiringSet(f'dispatch-hash-parents-{sid}', host=self.client) self._attempts = ExpiringHash(f'dispatch-hash-attempts-{sid}', host=self.client) # Local caches for _files and finished table self._cached_files = set(self._outstanding_service_count.keys()) self._cached_results = dict() if fetch_results: self._cached_results = self.all_results() # Errors that are related to a submission, but not the terminal errors of a service self._other_errors = ExpiringSet(f'dispatch-hash-errors-{sid}', host=self.client) # TODO set these expire times from the global time limit for submissions retry_call(self.client.expire, self._dispatch_key, 60 * 60) retry_call(self.client.expire, self._finish_key, 60 * 60)
def touch(self, timeout: int, key: str, queue: str, message: dict): if timeout >= MAX_TIMEOUT: raise ValueError(f"Can't set watcher timeouts over {MAX_TIMEOUT}") self.hash.set(key, { 'action': WatcherAction.Message, 'queue': queue, 'message': message }) seconds, _ = retry_call(self.redis.time) self.queue.push(int(seconds + timeout), key)
def push(self, priority: int, data, vip=None) -> int: """Add or update elements in the priority queue. Existing elements will have their priority updated. Returns: Number of _NEW_ elements in the queue after the operation. """ return retry_call(self.c.zadd, self.name, {json.dumps(data): -priority})
def length(*queues: PriorityQueue) -> list[int]: """Utility function for batch reading queue lengths.""" if not queues: return [] pipeline = queues[0].c.pipeline(transaction=False) for que in queues: pipeline.zcard(que.name) return retry_call(pipeline.execute)
def add(self, key: str, value): """Add the (key, value) pair to the hash for new keys. If a key already exists this operation doesn't add it. Returns: True if key has been added to the table, False otherwise. """ if isinstance(key, bytes): raise ValueError("Cannot use bytes for hashmap keys") return retry_call(self.c.hsetnx, self.name, key, json.dumps(value))
def fail_nonrecoverable(self, file_hash: str, service, error_key) -> Tuple[int, bool]: """A service task has failed and should not be retried, entry the error as the result. Has exactly the same semantics as `finish` but for errors. """ return retry_call(self._finish, args=[ self.sid, file_hash, service, json.dumps(['error', error_key, 0, False, '']) ])
def touch_task(self, timeout: int, key: str, worker: str, task_key: str): if timeout >= MAX_TIMEOUT: raise ValueError(f"Can't set watcher timeouts over {MAX_TIMEOUT}") self.hash.set( key, { 'action': WatcherAction.TimeoutTask, 'worker': worker, 'task_key': task_key }) seconds, _ = retry_call(self.redis.time) self.queue.push(int(seconds + timeout), key)
def finished(self, file_hash, service) -> Union[DispatchRow, None]: """If a service has been finished, return the key of the result document.""" # Try the local cache result = self._cached_results.get(file_hash, {}).get(service, None) if result: return result # Try the server result = retry_call(self.client.hget, self._finish_key, f"{file_hash}-{service}") if result: return DispatchRow(*json.loads(result)) return None
def all_dispatches(self) -> Dict[str, Dict[str, float]]: """Load the entire table of things that should currently be running.""" rows = retry_call(self.client.hgetall, self._dispatch_key) output = {} for key, timestamp in rows.items(): file_hash, service = key.split(b'-', maxsplit=1) file_hash = file_hash.decode() service = service.decode() if file_hash not in output: output[file_hash] = {} output[file_hash][service] = float(timestamp) return output
def select(*queues, **kw): timeout = kw.get('timeout', 0) if len(queues) < 1: raise TypeError('At least one queue must be specified') if any([type(q) != PriorityQueue for q in queues]): raise TypeError('Only NamedQueues supported') c = queues[0].c response = retry_call(c.bzpopmin, [q.name for q in queues], timeout) if not response: return response return response[0].decode('utf-8'), json.loads(response[1][21:])
def all_results(self) -> Dict[str, Dict[str, DispatchRow]]: """Get all the records stored in the dispatch table. :return: output[file_hash][service_name] -> DispatchRow """ rows = retry_call(self.client.hgetall, self._finish_key) output = {} for key, status in rows.items(): file_hash, service = key.split(b'-', maxsplit=1) file_hash = file_hash.decode() service = service.decode() if file_hash not in output: output[file_hash] = {} output[file_hash][service] = DispatchRow(*json.loads(status)) return output
def dequeue_range(self, lower_limit='', upper_limit='', skip=0, num=1): """Dequeue a number of elements, within a specified range of scores. Limits given are inclusive, can be made exclusive, see redis docs on how to format limits for that. NOTE: lower/upper limit is negated+swapped in the lua script, no need to do it here :param lower_limit: The score of all dequeued elements must be higher or equal to this. :param upper_limit: The score of all dequeued elements must be lower or equal to this. :param skip: In the range of available items to dequeue skip over this many. :param num: Maximum number of elements to dequeue. :return: list """ results = retry_call(self._deque_range, keys=[self.name], args=[lower_limit, upper_limit, skip, num]) return [decode(res) for res in results]
def finish(self, file_hash, service, result_key, score, classification, drop=False) -> Tuple[int, bool]: """ As a single transaction: - Remove the service from the dispatched list - Add the file to the finished list, with the given result key - return the number of items in the dispatched list and if this was a duplicate call to finish """ return retry_call(self._finish, args=[ self.sid, file_hash, service, json.dumps([ 'result', result_key, score, drop, str(classification) ]) ])