def __init__(self, base_dir, rpc_server, addr, addrs, copies=1, n_priorities=3, deduper=None, app_name=None, logger=None): self.dir_ = base_dir self.addr_ = addr self.addrs = list(addrs) self.mq_node = LocalMessageQueueNode( base_dir, rpc_server, addr, addrs, copies=copies, n_priorities=n_priorities, deduper=deduper, app_name=app_name) self.distributor = Distributor(addrs, copies=copies) self.logger = logger self.prefix = get_rpc_prefix(app_name, 'mq') self._lock = threading.Lock() self.inited = False
class MessageQueueClient(object): def __init__(self, addrs, app_name=None, copies=1): self.addrs = addrs self.distributors = Distributor(addrs, copies=copies) self.prefix = get_rpc_prefix(app_name, 'mq') def put(self, objs): addrs_objs, addrs_backup_objs = \ self.distributors.distribute(objs) for addr, objs in addrs_objs.iteritems(): client_call(addr, self.prefix + 'batch_put', pickle.dumps(objs)) for addr, m in addrs_backup_objs.iteritems(): for b_addr, objs in m.iteritems(): client_call(addr, self.prefix + 'put_backup', b_addr, pickle.dumps(objs)) def get(self, size=1, priority=0): size = max(size, 1) addrs = list(self.addrs) shuffle(addrs) results = [] for addr in addrs: left = size - len(results) if left <= 0: break objs = pickle.loads( client_call(addr, self.prefix + 'get', left, priority)) if objs is None: continue if not isinstance(objs, list): objs = [ objs, ] results.extend(objs) if size == 1: if len(results) == 0: return return results[0] return results
class MessageQueueClient(object): def __init__(self, addrs, app_name=None, copies=1): self.addrs = addrs self.distributors = Distributor(addrs, copies=copies) self.prefix = get_rpc_prefix(app_name, 'mq') def put(self, objs): addrs_objs, addrs_backup_objs = \ self.distributors.distribute(objs) for addr, objs in addrs_objs.iteritems(): client_call(addr, self.prefix+'batch_put', pickle.dumps(objs)) for addr, m in addrs_backup_objs.iteritems(): for b_addr, objs in m.iteritems(): client_call(addr, self.prefix+'put_backup', b_addr, pickle.dumps(objs)) def get(self, size=1, priority=0): size = max(size, 1) addrs = list(self.addrs) shuffle(addrs) results = [] for addr in addrs: left = size - len(results) if left <= 0: break objs = pickle.loads(client_call(addr, self.prefix+'get', left, priority)) if objs is None: continue if not isinstance(objs, list): objs = [objs, ] results.extend(objs) if size == 1: if len(results) == 0: return return results[0] return results
class MessageQueueNodeProxy(object): def __init__(self, base_dir, rpc_server, addr, addrs, copies=1, n_priorities=3, deduper=None, app_name=None, logger=None): self.dir_ = base_dir self.addr_ = addr self.addrs = list(addrs) self.mq_node = LocalMessageQueueNode( base_dir, rpc_server, addr, addrs, copies=copies, n_priorities=n_priorities, deduper=deduper, app_name=app_name) self.distributor = Distributor(addrs, copies=copies) self.logger = logger self.prefix = get_rpc_prefix(app_name, 'mq') self._lock = threading.Lock() self.inited = False @classmethod def register_rpc(cls, node, rpc_server, app_name=None): LocalMessageQueueNode.register_rpc(node.mq_node, rpc_server, app_name=app_name) def init(self): with self._lock: if self.inited: return self.load() if not hasattr(self, 'caches'): self.caches = dict((addr, []) for addr in self.addrs) if not hasattr(self, 'caches_inited'): self.caches_inited = dict((addr, False) for addr in self.addrs) if not hasattr(self, 'backup_caches'): self.backup_caches = dict((addr, {}) for addr in self.addrs) for addr in self.addrs: for other_addr in [n for n in self.addrs if addr != n]: self.backup_caches[addr][other_addr] = [] self.mq_node.init() self.inited = True def load(self): save_file = os.path.join(self.dir_, MQ_STATUS_FILENAME) if not os.path.exists(save_file): return with open(save_file, 'r') as f: self.caches, self.caches_inited, self.backup_caches = pickle.load(f) def save(self): if not self.inited: return save_file = os.path.join(self.dir_, MQ_STATUS_FILENAME) with open(save_file, 'w') as f: t = (self.caches, self.caches_inited, self.backup_caches) pickle.dump(t, f) def _check_empty(self, objs): if objs is None: return True elif isinstance(objs, list) and len(objs) == 0: return True return False def _remote_or_local_put(self, addr, objs, force=False, priority=0): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.put(objs, force=force, priority=priority) else: client_call(addr, self.prefix+'put', pickle.dumps(objs), force, priority) def _remote_or_local_batch_put(self, addr, objs): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.batch_put(objs) else: client_call(addr, self.prefix+'batch_put', pickle.dumps(objs)) def _remote_or_local_get(self, addr, size=1, priority=0): if addr == self.addr_: return self.mq_node.get(size=size, priority=priority) else: return pickle.loads(client_call(addr, self.prefix+'get', size, priority)) def _remote_or_local_put_backup(self, addr, backup_addr, objs, force=False): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.put_backup(backup_addr, objs, force=force) else: client_call(addr, self.prefix+'put_backup', backup_addr, pickle.dumps(objs), force) def put(self, objects, flush=False): self.init() addrs_objs, backup_addrs_objs = \ self.distributor.distribute(objects) if flush is True: for addr in self.addrs: if addr not in addrs_objs: addrs_objs[addr] = [] if addr not in backup_addrs_objs: backup_addrs_objs[addr] = {} for addr, objs in addrs_objs.iteritems(): self.caches[addr].extend(objs) if not self.caches_inited[addr] or \ len(self.caches[addr]) >= CACHE_SIZE or flush: try: self._remote_or_local_batch_put(addr, self.caches[addr]) except socket.error, e: if self.logger: self.logger.exception(e) else: self.caches[addr] = [] if not self.caches_inited[addr]: self.caches_inited[addr] = True for addr, m in backup_addrs_objs.iteritems(): for backup_addr, objs in m.iteritems(): self.backup_caches[addr][backup_addr].extend(objs) size = sum([len(obs) for obs in \ self.backup_caches[addr].values()]) if size >= CACHE_SIZE or flush: for backup_addr, objs in self.backup_caches[addr].iteritems(): try: self._remote_or_local_put_backup( addr, backup_addr, objs) except socket.error, e: if self.logger: self.logger.exception(e) else: self.backup_caches[addr][backup_addr] = []
class MessageQueueNodeProxy(object): """ This class maintains an instance of :class:`~cola.core.mq.node.LocalMessageQueueNode`, and provide `PUT` and `GET` relative method. In each mq operation, it will execute a local or remote call by judging the address. The Remote call will actually send a RPC to the destination worker's instance which execute the method provided by :class:`~cola.core.mq.node.LocalMessageQueueNode`. Besides, this class also maintains an instance of :class:`~cola.core.mq.distributor.Distributor` which holds a hash ring. To an object of `PUT` operation, the object should be distributed to the destination according to the mechanism of the hash ring. Remember, a cache will be created to avoid the frequent write operations which may cause high burden of a message queue node. To `GET` operation, the mq will just fetch an object from the local node, or request from other nodes if local one's objects are exhausted. """ def __init__(self, base_dir, rpc_server, addr, addrs, copies=1, n_priorities=3, deduper=None, app_name=None, logger=None): self.dir_ = base_dir self.addr_ = addr self.addrs = list(addrs) self.mq_node = LocalMessageQueueNode( base_dir, rpc_server, addr, addrs, copies=copies, n_priorities=n_priorities, deduper=deduper, app_name=app_name) self.distributor = Distributor(addrs, copies=copies) self.logger = logger self.prefix = get_rpc_prefix(app_name, 'mq') self._lock = threading.Lock() self.inited = False @classmethod def register_rpc(cls, node, rpc_server, app_name=None): LocalMessageQueueNode.register_rpc(node.mq_node, rpc_server, app_name=app_name) def init(self): with self._lock: if self.inited: return self.load() if not hasattr(self, 'caches'): self.caches = dict((addr, []) for addr in self.addrs) if not hasattr(self, 'caches_inited'): self.caches_inited = dict((addr, False) for addr in self.addrs) if not hasattr(self, 'backup_caches'): self.backup_caches = dict((addr, {}) for addr in self.addrs) for addr in self.addrs: for other_addr in [n for n in self.addrs if addr != n]: self.backup_caches[addr][other_addr] = [] self.mq_node.init() self.inited = True def load(self): save_file = os.path.join(self.dir_, MQ_STATUS_FILENAME) if not os.path.exists(save_file): return with open(save_file, 'r') as f: self.caches, self.caches_inited, self.backup_caches = pickle.load(f) def save(self): if not self.inited: return save_file = os.path.join(self.dir_, MQ_STATUS_FILENAME) with open(save_file, 'w') as f: t = (self.caches, self.caches_inited, self.backup_caches) pickle.dump(t, f) def _check_empty(self, objs): if objs is None: return True elif isinstance(objs, list) and len(objs) == 0: return True return False def _remote_or_local_put(self, addr, objs, force=False, priority=0): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.put(objs, force=force, priority=priority) else: client_call(addr, self.prefix+'put', pickle.dumps(objs), force, priority) def _remote_or_local_batch_put(self, addr, objs): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.batch_put(objs) else: client_call(addr, self.prefix+'batch_put', pickle.dumps(objs)) def _remote_or_local_get(self, addr, size=1, priority=0): objs = None if addr == self.addr_: objs = self.mq_node.get(size=size, priority=priority) else: objs = pickle.loads(client_call(addr, self.prefix+'get', size, priority)) addr_caches = self.caches.get(addr, []) if size == 1 and objs is None and len(addr_caches) > 0: return addr_caches.pop(0) elif size > 1 and len(objs) == 0 and len(addr_caches) > 0: return addr_caches[:size] return objs def _remote_or_local_put_backup(self, addr, backup_addr, objs, force=False): if self._check_empty(objs): return if addr == self.addr_: self.mq_node.put_backup(backup_addr, objs, force=force) else: client_call(addr, self.prefix+'put_backup', backup_addr, pickle.dumps(objs), force) def put(self, objects, flush=False): """ Put a bunch of objects into the mq. The objects will be distributed to different mq nodes according to the instance of :class:`~cola.core.mq.distributor.Distributor`. There also exists a cache which will not flush out unless the parameter flush is true or a single destination cache is full. :param objects: objects to put into mq, an object is mostly the instance of :class:`~cola.core.unit.Url` or :class:`~cola.core.unit.Bundle` :param flush: flush out the cache all if set to true """ self.init() addrs_objs, backup_addrs_objs = \ self.distributor.distribute(objects) if flush is True: for addr in self.addrs: if addr not in addrs_objs: addrs_objs[addr] = [] if addr not in backup_addrs_objs: backup_addrs_objs[addr] = {} for addr, objs in addrs_objs.iteritems(): self.caches[addr].extend(objs) if not self.caches_inited[addr] or \ len(self.caches[addr]) >= CACHE_SIZE or flush: try: self._remote_or_local_batch_put(addr, self.caches[addr]) except socket.error, e: if self.logger: self.logger.exception(e) else: self.caches[addr] = [] if not self.caches_inited[addr]: self.caches_inited[addr] = True for addr, m in backup_addrs_objs.iteritems(): for backup_addr, objs in m.iteritems(): self.backup_caches[addr][backup_addr].extend(objs) size = sum([len(obs) for obs in \ self.backup_caches[addr].values()]) if size >= CACHE_SIZE or flush: for backup_addr, objs in self.backup_caches[addr].iteritems(): try: self._remote_or_local_put_backup( addr, backup_addr, objs) except socket.error, e: if self.logger: self.logger.exception(e) else: self.backup_caches[addr][backup_addr] = []
def __init__(self, addrs, app_name=None, copies=1): self.addrs = addrs self.distributors = Distributor(addrs, copies=copies) self.prefix = get_rpc_prefix(app_name, 'mq')