class PartitionClient(object): """ Client Class for the Partition Library Example usage: --------------------- import libpartition from libpartition.libpartition import PartitionClient def own_change_cb(l): print "ownership change:" + str(l) c = PartitionClient("test", "s1", ["s1", "s2", "s3"], 32, own_change_cb, "zookeeper_s1") ##do some real work now" if (c.own_partition(1)): ...... do something with partition #1 ..... ......... ... c.update_cluster_list(["s1", "s2"]) ... ---------------------- You should not call any partition library routine from within the callback function Args: app_name(str): Name of the app for which partition cluster is used self_name(str): Name of the local cluster node (can be ip address) cluster_list(list): List of all the nodes in the cluster including local node max_partition(int): Partition space always go from 0..max_partition-1 partition_update_cb: Callback function invoked when partition ownership list is updated.x zk_server(str): <zookeeper server>:<zookeeper server port> """ def __init__(self, app_name, self_name, cluster_list, max_partition, partition_update_cb, zk_server, logger=None): # Initialize local variables self._zk_server = zk_server self._cluster_list = set(cluster_list) self._max_partition = max_partition self._update_cb = partition_update_cb self._curr_part_ownership_list = [] self._target_part_ownership_list = [] self._con_hash = ConsistentHash(cluster_list) self._name = self_name # some sanity check if not (self._name in cluster_list): raise ValueError('cluster list is missing local server name') # initialize logging and other stuff if logger is None: logging.basicConfig() self._logger = logging else: self._logger = logger self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') # connect to zookeeper self._zk = KazooClient(zk_server) while True: try: self._zk.start() break except gevent.event.Timeout as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Zookeeper is also throwing exception due to delay in master election except Exception as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Update connection info self._sandesh_connection_info_update(status='UP', message='') # Done connecting to ZooKeeper # create a lock array to contain locks for each partition self._part_locks = [] for part in range(0, self._max_partition): lockpath = "/lockpath/" + app_name + "/" + str(part) l = self._zk.Lock(lockpath, self._name) self._part_locks.append(l) # initialize partition # to lock acquire greenlet dictionary self._part_lock_task_dict = {} self._logger.error("initial servers:" + str(self._cluster_list)) # update target partition ownership list for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end __init__ def _sandesh_connection_info_update(self, status, message): from pysandesh.connection_info import ConnectionState from pysandesh.gen_py.process_info.ttypes import ConnectionStatus, \ ConnectionType from pysandesh.gen_py.sandesh.ttypes import SandeshLevel new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update(conn_type=ConnectionType.ZOOKEEPER, name='Zookeeper', status=new_conn_state, message=message, server_addrs=self._zk_server.split(',')) if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and new_conn_state == ConnectionStatus.DOWN): msg = 'Connection to Zookeeper down: %s' % (message) self._logger.error(msg) if (self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP): msg = 'Connection to Zookeeper ESTABLISHED' self._logger.error(msg) self._conn_state = new_conn_state # end _sandesh_connection_info_update # following routine is the greenlet task function to acquire the lock # for a partition def _acquire_lock(self, part): # lock for the partition l = self._part_locks[part] # go in an infinite loop waiting to acquire the lock try: while True: ret = l.acquire(blocking=False) if ret == True: self._logger.error("Acquired lock for:" + str(part)) self._curr_part_ownership_list.append(part) self._update_cb(self._curr_part_ownership_list) return True else: gevent.sleep(1) except CancelledError: self._logger.error("Lock acquire cancelled for:" + str(part)) return False except Exception as ex: # TODO: If we have a non-KazooException, the lock object # may get stuck in the "cancelled" state self._logger.error("Lock acquire unexpected error!: " + str(ex)) # This exception should get propogated to main thread raise SystemExit return False #end _acquire_lock # get rid of finished spawned tasks from datastructures def _cleanup_greenlets(self): for part in self._part_lock_task_dict.keys(): if (self._part_lock_task_dict[part].ready()): del self._part_lock_task_dict[part] #end _cleanup_greenlets # following routine launches tasks to acquire partition locks def _acquire_partition_ownership(self): # cleanup any finished greenlets self._cleanup_greenlets() # this variable will help us decide if we need to call callback updated_curr_ownership = False # list of partitions for which locks have to be released release_lock_list = [] self._logger.error("known servers: %s" % self._con_hash.get_all_nodes()) for part in range(0, self._max_partition): if (part in self._target_part_ownership_list): if (part in self._curr_part_ownership_list): # do nothing, I already have ownership of this partition self._logger.error("No need to acquire ownership of:" + str(part)) else: # I need to acquire lock for this partition before I own if (part in self._part_lock_task_dict.keys()): try: self._part_lock_task_dict[part].get(block=False) except: # do nothing there is already a greenlet running to # acquire the lock self._logger.error("Already a greenlet running to" " acquire:" + str(part)) continue # Greenlet died without getting ownership. Cleanup self._logger.error("Cleanup stale greenlet running to" " acquire:" + str(part)) del self._part_lock_task_dict[part] self._logger.error("Starting greenlet running to" " acquire:" + str(part)) # launch the greenlet to acquire the loc, k g = Greenlet.spawn(self._acquire_lock, part) self._part_lock_task_dict[part] = g else: # give up ownership of the partition # cancel any lock acquisition which is ongoing if (part in self._part_lock_task_dict.keys()): try: self._part_lock_task_dict[part].get(block=False) except: self._logger.error( "canceling lock acquisition going on \ for:" + str(part)) # Cancelling the lock should result in killing the gevent self._part_locks[part].cancel() self._part_lock_task_dict[part].get(block=True) del self._part_lock_task_dict[part] if (part in self._curr_part_ownership_list): release_lock_list.append(part) self._curr_part_ownership_list.remove(part) updated_curr_ownership = True self._logger.error("giving up ownership of:" + str(part)) if (updated_curr_ownership is True): # current partition membership was updated call the callback self._update_cb(self._curr_part_ownership_list) if (len(release_lock_list) != 0): # release locks which were acquired for part in release_lock_list: self._logger.error("release the lock which was acquired:" + \ str(part)) try: self._part_locks[part].release() self._logger.error("fully gave up ownership of:" + str(part)) except: pass #end _acquire_partition_ownership def update_cluster_list(self, cluster_list): """ Updates the cluster node list Args: cluster_list(list): New list of names of the nodes in the cluster Returns: None """ # some sanity check if not (self._name in cluster_list): raise ValueError('cluster list is missing local server name') new_cluster_list = set(cluster_list) new_servers = list(new_cluster_list.difference(self._cluster_list)) deleted_servers = list( set(self._cluster_list).difference(new_cluster_list)) self._cluster_list = set(cluster_list) self._logger.error("deleted servers:" + str(deleted_servers)) self._logger.error("new servers:" + str(new_servers)) # update the hash structure if new_servers: self._con_hash.add_nodes(new_servers) if deleted_servers: self._con_hash.del_nodes(deleted_servers) # update target partition ownership list self._target_part_ownership_list = [] for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): if not (part in self._target_part_ownership_list): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end update_cluster_list def own_partition(self, part_no): """ Returns ownership information of a partition Args: part_no(int) : Partition no Returns: True if partition is owned by the local node False if partition is not owned by the local node """ return part_no in self._curr_part_ownership_list #end own_partition def close(self): """ Closes any connections and frees up any data structures Args: Returns: None """ # clean up greenlets for part in self._part_lock_task_dict.keys(): try: self._part_lock_task_dict[part].kill() except: pass # close zookeeper try: self._zk.stop() except: pass try: self._zk.close() except: pass
class ConsistentScheduler(object): ''' LibPartitionHelper abstract out workers and work_items, and their mapping to partitions. So application can only deal with the work items it owns, without bothering about partition mapping. This class also provides syncronization premitives to ensure apps to clean up b4 giving up their partitions ''' _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9) def __init__(self, service_name=None, zookeeper='127.0.0.1:2181', delete_hndlr=None, add_hndlr=None, bucketsize=47, item2part_func=None, partitioner=None, logger=None, cluster_id=''): if logger: self._logger = logger else: self._logger = logging.getLogger(__name__) self._service_name = service_name or os.path.basename(sys.argv[0]) self._item2part_func = item2part_func or self._device2partition self._zookeeper_srvr = zookeeper self._zk = None self._bucketsize = bucketsize self._delete_hndlr = delete_hndlr self._add_hndlr = add_hndlr self._partitioner = partitioner or self._partitioner_func self._partitions = {} self._con_hash = None self._last_log = '' self._last_log_cnt = 0 self._partition_set = map(str, range(self._bucketsize)) self._cluster_id = cluster_id if self._cluster_id: self._zk_path = '/' + self._cluster_id + '/contrail_cs' + '/' + self._service_name else: self._zk_path = '/'.join(['/contrail_cs', self._service_name]) self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') while True: self._logger.error("Consistent scheduler zk start") self._zk = KazooClient(self._zookeeper_srvr, handler=SequentialGeventHandler()) self._zk.add_listener(self._zk_lstnr) try: self._zk.start() while self._conn_state != ConnectionStatus.UP: gevent.sleep(1) break except Exception as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) self._zk.remove_listener(self._zk_lstnr) try: self._zk.stop() self._zk.close() except Exception as ex: template = "Exception {0} in Consistent scheduler zk stop/close. Args:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s for %s" % \ (messag, traceback.format_exc(), self._service_name)) finally: self._zk = None gevent.sleep(1) self._pc = self._zk.SetPartitioner(path=self._zk_path, set=self._partition_set, partition_func=self._partitioner) self._wait_allocation = 0 gevent.sleep(0) def _sandesh_connection_info_update(self, status, message): new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update(conn_type=ConnectionType.ZOOKEEPER, name='Zookeeper', status=new_conn_state, message=message, server_addrs=self._zookeeper_srvr.split(',')) if ((self._conn_state and self._conn_state != ConnectionStatus.DOWN) and new_conn_state == ConnectionStatus.DOWN): msg = 'Connection to Zookeeper down: %s' % (message) self._supress_log(msg) if (self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP): msg = 'Connection to Zookeeper ESTABLISHED' self._supress_log(msg) self._conn_state = new_conn_state # end _sandesh_connection_info_update def _zk_lstnr(self, state): self._logger.error("Consistent scheduler listen %s" % str(state)) if state == KazooState.CONNECTED: # Update connection info self._sandesh_connection_info_update(status='UP', message='') elif state == KazooState.LOST: self._logger.error("Consistent scheduler connection LOST") # Lost the session with ZooKeeper Server # Best of option we have is to exit the process and restart all # over again self._sandesh_connection_info_update( status='DOWN', message='Connection to Zookeeper lost') os._exit(2) elif state == KazooState.SUSPENDED: self._logger.error("Consistent scheduler connection SUSPENDED") # Update connection info self._sandesh_connection_info_update( status='INIT', message='Connection to zookeeper lost. Retrying') def schedule(self, items, lock_timeout=30): gevent.sleep(0) ret = False if self._pc.failed: self._logger.error('Lost or unable to acquire partition') os._exit(2) elif self._pc.release: self._supress_log('Releasing...') self._release() elif self._pc.allocating: self._supress_log('Waiting for allocation...') self._pc.wait_for_acquire(lock_timeout) if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION: self._wait_allocation += 1 else: self._logger.error('Giving up after %d tries!' % (self._wait_allocation)) os._exit(2) elif self._pc.acquired: self._supress_log('got work: ', list(self._pc)) ret = True self._wait_allocation = 0 self._populate_work_items(items) self._supress_log('work items: ', self._items2name(self.work_items()), 'from the list', self._items2name(items)) return ret def members(self): return list(self._con_hash.nodes) def partitions(self): return list(self._pc) def work_items(self): return sum(self._partitions.values(), []) def finish(self): self._inform_delete(self._partitions.keys()) self._pc.finish() self._zk.remove_listener(self._zk_lstnr) gevent.sleep(1) try: self._zk.stop() except: self._logger.error("Stopping kazooclient failed") else: self._logger.error("Stopping kazooclient successful") try: self._zk.close() except: self._logger.error("Closing kazooclient failed") else: self._logger.error("Closing kazooclient successful") def _items2name(self, items): return map(lambda x: x.name, items) def _supress_log(self, *s): slog = ' '.join(map(str, s)) dl = '' if slog != self._last_log_cnt: if self._last_log_cnt: dl += ' ' * 4 dl += '.' * 8 dl += '[last print repeats %d times]' % self._last_log_cnt self._last_log_cnt = 0 dl += slog self._last_log = slog self._logger.debug(dl) else: self._last_log_cnt += 1 def _consistent_hash(self, members): if self._con_hash is None: self._con_hash = ConsistentHash(members) self._logger.error('members: %s' % (str(self._con_hash.nodes))) cur, updtd = set(self._con_hash.nodes), set(members) if cur != updtd: newm = updtd - cur rmvd = cur - updtd if newm: self._logger.error('new members: %s' % (str(newm))) self._con_hash.add_nodes(list(newm)) if rmvd: self._logger.error('members left: %s' % (str(rmvd))) self._con_hash.del_nodes(list(rmvd)) return self._con_hash def _consistent_hash_get_node(self, members, partition): return self._consistent_hash(members).get_node(partition) def _partitioner_func(self, identifier, members, _partitions): partitions = [p for p in _partitions \ if self._consistent_hash_get_node(members, p) == identifier] self._logger.error('partitions: %s' % (str(partitions))) return partitions def _release(self): old = set(self._pc) new = set( self._partitioner(self._pc._identifier, list(self._pc._party), self._partition_set)) rmvd = old - new added = new - old if rmvd: self._inform_delete(list(rmvd)) if added: self._inform_will_add(list(added)) self._pc.release_set() def _list_items_in(self, partitions): return sum([self._partitions[k] for k in partitions if k in \ self._partitions], []) def _inform_will_add(self, partitions): if callable(self._add_hndlr): self._add_hndlr(self._list_items_in(partitions)) def _inform_delete(self, partitions): if callable(self._delete_hndlr): self._delete_hndlr(self._list_items_in(partitions)) def _populate_work_items(self, items): self._refresh_work_items() for i in items: part = str(self._item2part_func(i.name)) if part in list(self._pc): if part not in self._partitions: self._partitions[part] = [] if i.name not in map(lambda x: x.name, self._partitions[part]): self._partitions[part].append(i) self._logger.debug('@populate_work_items(%s): done!' % ' '.join( map( lambda v: str(v[0]) + ':' + ','.join( map(lambda x: x.name, v[1])), self._partitions.items()))) gevent.sleep(0) def _device2partition(self, key): return struct.unpack( 'Q', hashlib.md5(key).digest()[-8:])[0] % self._bucketsize def _refresh_work_items(self): for k in self._partitions: self._partitions[k] = []
class ConsistentScheduler(object): ''' LibPartitionHelper abstract out workers and work_items, and their mapping to partitions. So application can only deal with the work items it owns, without bothering about partition mapping. This class also provides syncronization premitives to ensure apps to clean up b4 giving up their partitions ''' _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9) def __init__(self, service_name=None, zookeeper='127.0.0.1:2181', delete_hndlr=None, add_hndlr=None, bucketsize=47, item2part_func=None, partitioner=None, logger=None, cluster_id=''): if logger: self._logger = logger else: self._logger = logging.getLogger(__name__) self._service_name = service_name or os.path.basename(sys.argv[0]) self._item2part_func = item2part_func or self._device2partition self._zookeeper_srvr = zookeeper self._zk = None self._bucketsize = bucketsize self._delete_hndlr = delete_hndlr self._add_hndlr = add_hndlr self._partitioner = partitioner or self._partitioner_func self._partitions = {} self._con_hash = None self._last_log = '' self._last_log_cnt = 0 self._partition_set = map(str, range(self._bucketsize)) self._cluster_id = cluster_id if self._cluster_id: self._zk_path = '/'+self._cluster_id + '/contrail_cs' + '/'+self._service_name else: self._zk_path = '/'.join(['/contrail_cs', self._service_name]) self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') while True: self._logger.error("Consistent scheduler zk start") self._zk = KazooClient(self._zookeeper_srvr, handler=SequentialGeventHandler()) self._zk.add_listener(self._zk_lstnr) try: self._zk.start() while self._conn_state != ConnectionStatus.UP: gevent.sleep(1) break except Exception as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) self._zk.remove_listener(self._zk_lstnr) try: self._zk.stop() self._zk.close() except Exception as ex: template = "Exception {0} in Consistent scheduler zk stop/close. Args:\n{1!r}" messag = template.format(type(ex).__name__, ex.args) self._logger.error("%s : traceback %s for %s" % \ (messag, traceback.format_exc(), self._service_name)) finally: self._zk = None gevent.sleep(1) self._pc = self._zk.SetPartitioner(path=self._zk_path, set=self._partition_set, partition_func=self._partitioner) self._wait_allocation = 0 gevent.sleep(0) def _sandesh_connection_info_update(self, status, message): new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update(conn_type = ConnectionType.ZOOKEEPER, name = 'Zookeeper', status = new_conn_state, message = message, server_addrs = self._zookeeper_srvr.split(',')) if ((self._conn_state and self._conn_state != ConnectionStatus.DOWN) and new_conn_state == ConnectionStatus.DOWN): msg = 'Connection to Zookeeper down: %s' %(message) self._supress_log(msg) if (self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP): msg = 'Connection to Zookeeper ESTABLISHED' self._supress_log(msg) self._conn_state = new_conn_state # end _sandesh_connection_info_update def _zk_lstnr(self, state): self._logger.error("Consistent scheduler listen %s" % str(state)) if state == KazooState.CONNECTED: # Update connection info self._sandesh_connection_info_update(status='UP', message='') elif state == KazooState.LOST: self._logger.error("Consistent scheduler connection LOST") # Lost the session with ZooKeeper Server # Best of option we have is to exit the process and restart all # over again self._sandesh_connection_info_update(status='DOWN', message='Connection to Zookeeper lost') os._exit(2) elif state == KazooState.SUSPENDED: self._logger.error("Consistent scheduler connection SUSPENDED") # Update connection info self._sandesh_connection_info_update(status='INIT', message = 'Connection to zookeeper lost. Retrying') def schedule(self, items, lock_timeout=30): gevent.sleep(0) ret = False if self._pc.failed: self._logger.error('Lost or unable to acquire partition') os._exit(2) elif self._pc.release: self._supress_log('Releasing...') self._release() elif self._pc.allocating: self._supress_log('Waiting for allocation...') self._pc.wait_for_acquire(lock_timeout) if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION: self._wait_allocation += 1 else: self._logger.error('Giving up after %d tries!' % (self._wait_allocation)) os._exit(2) elif self._pc.acquired: self._supress_log('got work: ', list(self._pc)) ret = True self._wait_allocation = 0 self._populate_work_items(items) self._supress_log('work items: ', self._items2name(self.work_items()), 'from the list', self._items2name(items)) return ret def members(self): return list(self._con_hash.nodes) def partitions(self): return list(self._pc) def work_items(self): return sum(self._partitions.values(), []) def finish(self): self._inform_delete(self._partitions.keys()) self._pc.finish() self._zk.remove_listener(self._zk_lstnr) gevent.sleep(1) try: self._zk.stop() except: self._logger.error("Stopping kazooclient failed") else: self._logger.error("Stopping kazooclient successful") try: self._zk.close() except: self._logger.error("Closing kazooclient failed") else: self._logger.error("Closing kazooclient successful") def _items2name(self, items): return map(lambda x: x.name, items) def _supress_log(self, *s): slog = ' '.join(map(str, s)) dl = '' if slog != self._last_log_cnt: if self._last_log_cnt: dl += ' ' * 4 dl += '.' * 8 dl += '[last print repeats %d times]' % self._last_log_cnt self._last_log_cnt = 0 dl += slog self._last_log = slog self._logger.debug(dl) else: self._last_log_cnt += 1 def _consistent_hash(self, members): if self._con_hash is None: self._con_hash = ConsistentHash(members) self._logger.error('members: %s' % (str(self._con_hash.nodes))) cur, updtd = set(self._con_hash.nodes), set(members) if cur != updtd: newm = updtd - cur rmvd = cur - updtd if newm: self._logger.error('new members: %s' % (str(newm))) self._con_hash.add_nodes(list(newm)) if rmvd: self._logger.error('members left: %s' % (str(rmvd))) self._con_hash.del_nodes(list(rmvd)) return self._con_hash def _consistent_hash_get_node(self, members, partition): return self._consistent_hash(members).get_node(partition) def _partitioner_func(self, identifier, members, _partitions): partitions = [p for p in _partitions \ if self._consistent_hash_get_node(members, p) == identifier] self._logger.error('partitions: %s' % (str(partitions))) return partitions def _release(self): old = set(self._pc) new = set(self._partitioner(self._pc._identifier, list(self._pc._party), self._partition_set)) rmvd = old - new added = new - old if rmvd: self._inform_delete(list(rmvd)) if added: self._inform_will_add(list(added)) self._pc.release_set() def _list_items_in(self, partitions): return sum([self._partitions[k] for k in partitions if k in \ self._partitions], []) def _inform_will_add(self, partitions): if callable(self._add_hndlr): self._add_hndlr(self._list_items_in(partitions)) def _inform_delete(self, partitions): if callable(self._delete_hndlr): self._delete_hndlr(self._list_items_in(partitions)) def _populate_work_items(self, items): self._refresh_work_items() for i in items: part = str(self._item2part_func(i.name)) if part in list(self._pc): if part not in self._partitions: self._partitions[part] = [] if i.name not in map(lambda x: x.name, self._partitions[part]): self._partitions[part].append(i) self._logger.debug('@populate_work_items(%s): done!' % ' '.join( map(lambda v: str(v[0]) + ':' + ','.join(map( lambda x: x.name, v[1])), self._partitions.items()))) gevent.sleep(0) def _device2partition(self, key): return struct.unpack('Q', hashlib.md5(key).digest( )[-8:])[0] % self._bucketsize def _refresh_work_items(self): for k in self._partitions: self._partitions[k] = []
class ConsistentScheduler(object): ''' LibPartitionHelper abstract out workers and work_items, and their mapping to partitions. So application can only deal with the work items it owns, without bothering about partition mapping. This class also provides syncronization premitives to ensure apps to clean up b4 giving up their partitions ''' _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9) def __init__(self, service_name=None, zookeeper='127.0.0.1:2181', delete_hndlr=None, add_hndlr=None, bucketsize=47, item2part_func=None, partitioner=None, logger=None): if logger: self._logger = logger else: self._logger = logging.getLogger(__name__) self._service_name = service_name or os.path.basename(sys.argv[0]) self._item2part_func = item2part_func or self._device2partition self._zookeeper_srvr = zookeeper self._bucketsize = bucketsize self._delete_hndlr = delete_hndlr self._add_hndlr = add_hndlr self._partitioner = partitioner or self._partitioner_func self._partitions = {} self._con_hash = None self._last_log = '' self._last_log_cnt = 0 self._partition_set = map(str, range(self._bucketsize)) self._zk_path = '/'.join(['/contrail_cs', self._service_name]) self._zk = KazooClient(self._zookeeper_srvr) self._zk.add_listener(self._zk_lstnr) self._zk.start() self._pc = self._zk.SetPartitioner(path=self._zk_path, set=self._partition_set, partition_func=self._partitioner) self._wait_allocation = 0 gevent.sleep(0) def _zk_lstnr(self, state): self._supress_log('zk state change to %s' % str(state)) def schedule(self, items, lock_timeout=30): gevent.sleep(0) ret = False if self._pc.failed: raise Exception("Lost or unable to acquire partition") elif self._pc.release: self._supress_log('Releasing...') self._release() elif self._pc.allocating: self._supress_log('Waiting for allocation...') self._pc.wait_for_acquire(lock_timeout) if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION: self._wait_allocation += 1 else: raise StopIteration('Giving up after %d tries!' % (self._wait_allocation)) elif self._pc.acquired: self._supress_log('got work: ', list(self._pc)) ret = True self._wait_allocation = 0 self._populate_work_items(items) self._supress_log('work items: ', self._items2name(self.work_items()), 'from the list', self._items2name(items)) return ret def work_items(self): return sum(self._partitions.values(), []) def finish(self): self._inform_delete(self._partitions.keys()) self._pc.finish() def _items2name(self, items): return map(lambda x: x.name, items) def _supress_log(self, *s): slog = ' '.join(map(str, s)) dl = '' if slog != self._last_log_cnt: if self._last_log_cnt: dl += ' ' * 4 dl += '.' * 8 dl += '[last print repeats %d times]' % self._last_log_cnt self._last_log_cnt = 0 dl += slog self._last_log = slog self._logger.debug(dl) else: self._last_log_cnt += 1 def _consistent_hash(self, members): if self._con_hash is None: self._con_hash = ConsistentHash(members) self._supress_log('members:', self._con_hash.nodes) cur, updtd = set(self._con_hash.nodes), set(members) if cur != updtd: newm = updtd - cur rmvd = cur - updtd if newm: self._supress_log('new workers:', newm) self._con_hash.add_nodes(list(newm)) if rmvd: self._supress_log('workers left:', rmvd) self._con_hash.del_nodes(list(rmvd)) return self._con_hash def _consistent_hash_get_node(self, members, partition): return self._consistent_hash(members).get_node(partition) def _partitioner_func(self, identifier, members, _partitions): return [p for p in _partitions \ if self._consistent_hash_get_node(members, p) == identifier] def _release(self): old = set(self._pc) new = set( self._partitioner(self._pc._identifier, list(self._pc._party), self._partition_set)) rmvd = old - new added = new - old if rmvd: self._inform_delete(list(rmvd)) if added: self._inform_will_add(list(added)) self._pc.release_set() def _list_items_in(self, partitions): return sum([self._partitions[k] for k in partitions if k in \ self._partitions], []) def _inform_will_add(self, partitions): if callable(self._add_hndlr): self._add_hndlr(self._list_items_in(partitions)) def _inform_delete(self, partitions): if callable(self._delete_hndlr): self._delete_hndlr(self._list_items_in(partitions)) def _populate_work_items(self, items): self._refresh_work_items() for i in items: part = str(self._item2part_func(i.name)) if part in list(self._pc): if part not in self._partitions: self._partitions[part] = [] if i.name not in map(lambda x: x.name, self._partitions[part]): self._partitions[part].append(i) self._logger.debug('@populate_work_items(%s): done!' % ' '.join( map( lambda v: str(v[0]) + ':' + ','.join( map(lambda x: x.name, v[1])), self._partitions.items()))) gevent.sleep(0) def _device2partition(self, key): return struct.unpack( 'Q', hashlib.md5(key).digest()[-8:])[0] % self._bucketsize def _refresh_work_items(self): for k in self._partitions: self._partitions[k] = []
class PartitionClient(object): """ Client Class for the Partition Library Example usage: --------------------- import libpartition from libpartition.libpartition import PartitionClient def own_change_cb(l): print "ownership change:" + str(l) c = PartitionClient("test", "s1", ["s1", "s2", "s3"], 32, own_change_cb, "zookeeper_s1") ##do some real work now" if (c.own_partition(1)): ...... do something with partition #1 ..... ......... ... c.update_cluster_list(["s1", "s2"]) ... ---------------------- You should not call any partition library routine from within the callback function Args: app_name(str): Name of the app for which partition cluster is used self_name(str): Name of the local cluster node (can be ip address) cluster_list(list): List of all the nodes in the cluster including local node max_partition(int): Partition space always go from 0..max_partition-1 partition_update_cb: Callback function invoked when partition ownership list is updated.x zk_server(str): <zookeeper server>:<zookeeper server port> """ def __init__( self, app_name, self_name, cluster_list, max_partition, partition_update_cb, zk_server, logger = None): # Initialize local variables self._zk_server = zk_server self._cluster_list = set(cluster_list) self._max_partition = max_partition self._update_cb = partition_update_cb self._curr_part_ownership_list = [] self._target_part_ownership_list = [] self._con_hash = ConsistentHash(cluster_list) self._name = self_name # some sanity check if not(self._name in cluster_list): raise ValueError('cluster list is missing local server name') # initialize logging and other stuff if logger is None: logging.basicConfig() self._logger = logging else: self._logger = logger self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') # connect to zookeeper self._zk = KazooClient(zk_server) while True: try: self._zk.start() break except gevent.event.Timeout as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Zookeeper is also throwing exception due to delay in master election except Exception as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Update connection info self._sandesh_connection_info_update(status='UP', message='') # Done connecting to ZooKeeper # create a lock array to contain locks for each partition self._part_locks = [] for part in range(0, self._max_partition): lockpath = "/lockpath/"+ app_name + "/" + str(part) l = self._zk.Lock(lockpath, self._name) self._part_locks.append(l) # initialize partition # to lock acquire greenlet dictionary self._part_lock_task_dict = {} self._logger.error("initial servers:" + str(self._cluster_list)) # update target partition ownership list for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end __init__ def _sandesh_connection_info_update(self, status, message): from pysandesh.connection_info import ConnectionState from pysandesh.gen_py.process_info.ttypes import ConnectionStatus, \ ConnectionType from pysandesh.gen_py.sandesh.ttypes import SandeshLevel new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update(conn_type = ConnectionType.ZOOKEEPER, name = 'Zookeeper', status = new_conn_state, message = message, server_addrs = self._zk_server.split(',')) if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and new_conn_state == ConnectionStatus.DOWN): msg = 'Connection to Zookeeper down: %s' %(message) self._logger.error(msg) if (self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP): msg = 'Connection to Zookeeper ESTABLISHED' self._logger.error(msg) self._conn_state = new_conn_state # end _sandesh_connection_info_update # following routine is the greenlet task function to acquire the lock # for a partition def _acquire_lock(self, part): # lock for the partition l = self._part_locks[part] # go in an infinite loop waiting to acquire the lock try: while True: ret = l.acquire(blocking=False) if ret == True: self._logger.error("Acquired lock for:" + str(part)) self._curr_part_ownership_list.append(part) self._update_cb(self._curr_part_ownership_list) return True else: gevent.sleep(1) except CancelledError: self._logger.error("Lock acquire cancelled for:" + str(part)) return False except Exception as ex: # TODO: If we have a non-KazooException, the lock object # may get stuck in the "cancelled" state self._logger.error("Lock acquire unexpected error!: " + str(ex)) # This exception should get propogated to main thread raise SystemExit return False #end _acquire_lock # get rid of finished spawned tasks from datastructures def _cleanup_greenlets(self): for part in self._part_lock_task_dict.keys(): if (self._part_lock_task_dict[part].ready()): del self._part_lock_task_dict[part] #end _cleanup_greenlets # following routine launches tasks to acquire partition locks def _acquire_partition_ownership(self): # cleanup any finished greenlets self._cleanup_greenlets() # this variable will help us decide if we need to call callback updated_curr_ownership = False # list of partitions for which locks have to be released release_lock_list = [] self._logger.info("known servers: %s" % self._con_hash.get_all_nodes()) for part in range(0, self._max_partition): if (part in self._target_part_ownership_list): if (part in self._curr_part_ownership_list): # do nothing, I already have ownership of this partition self._logger.info("No need to acquire ownership of:" + str(part)) else: # I need to acquire lock for this partition before I own if (part in self._part_lock_task_dict.keys()): try: self._part_lock_task_dict[part].get(block=False) except: # do nothing there is already a greenlet running to # acquire the lock self._logger.error("Already a greenlet running to" " acquire:" + str(part)) continue # Greenlet died without getting ownership. Cleanup self._logger.error("Cleanup stale greenlet running to" " acquire:" + str(part)) del self._part_lock_task_dict[part] self._logger.error("Starting greenlet running to" " acquire:" + str(part)) # launch the greenlet to acquire the loc, k g = Greenlet.spawn(self._acquire_lock, part) self._part_lock_task_dict[part] = g else: # give up ownership of the partition # cancel any lock acquisition which is ongoing if (part in self._part_lock_task_dict.keys()): try: self._part_lock_task_dict[part].get(block=False) except: self._logger.error("canceling lock acquisition going on \ for:" + str(part)) # Cancelling the lock should result in killing the gevent self._part_locks[part].cancel() self._part_lock_task_dict[part].get(block=True) del self._part_lock_task_dict[part] if (part in self._curr_part_ownership_list): release_lock_list.append(part) self._curr_part_ownership_list.remove(part) updated_curr_ownership = True self._logger.error("giving up ownership of:" + str(part)) if (updated_curr_ownership is True): # current partition membership was updated call the callback self._update_cb(self._curr_part_ownership_list) if (len(release_lock_list) != 0): # release locks which were acquired for part in release_lock_list: self._logger.error("release the lock which was acquired:" + \ str(part)) try: self._part_locks[part].release() self._logger.error("fully gave up ownership of:" + str(part)) except: pass #end _acquire_partition_ownership def update_cluster_list(self, cluster_list): """ Updates the cluster node list Args: cluster_list(list): New list of names of the nodes in the cluster Returns: None """ # some sanity check if not(self._name in cluster_list): raise ValueError('cluster list is missing local server name') new_cluster_list = set(cluster_list) new_servers = list(new_cluster_list.difference( self._cluster_list)) deleted_servers = list(set(self._cluster_list).difference( new_cluster_list)) self._cluster_list = set(cluster_list) # update the hash structure if new_servers: self._logger.error("new servers:" + str(new_servers)) self._con_hash.add_nodes(new_servers) if deleted_servers: self._logger.error("deleted servers:" + str(deleted_servers)) self._con_hash.del_nodes(deleted_servers) # update target partition ownership list self._target_part_ownership_list = [] for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): if not (part in self._target_part_ownership_list): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end update_cluster_list def own_partition(self, part_no): """ Returns ownership information of a partition Args: part_no(int) : Partition no Returns: True if partition is owned by the local node False if partition is not owned by the local node """ return part_no in self._curr_part_ownership_list #end own_partition def close(self): """ Closes any connections and frees up any data structures Args: Returns: None """ # clean up greenlets for part in self._part_lock_task_dict.keys(): try: self._part_lock_task_dict[part].kill() except: pass # close zookeeper try: self._zk.stop() except: pass try: self._zk.close() except: pass
class ConsistentScheduler(object): """ LibPartitionHelper abstract out workers and work_items, and their mapping to partitions. So application can only deal with the work items it owns, without bothering about partition mapping. This class also provides syncronization premitives to ensure apps to clean up b4 giving up their partitions """ _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9) def __init__( self, service_name=None, zookeeper="127.0.0.1:2181", delete_hndlr=None, add_hndlr=None, bucketsize=47, item2part_func=None, partitioner=None, logger=None, ): if logger: self._logger = logger else: self._logger = logging.getLogger(__name__) self._service_name = service_name or os.path.basename(sys.argv[0]) self._item2part_func = item2part_func or self._device2partition self._zookeeper_srvr = zookeeper self._bucketsize = bucketsize self._delete_hndlr = delete_hndlr self._add_hndlr = add_hndlr self._partitioner = partitioner or self._partitioner_func self._partitions = {} self._con_hash = None self._last_log = "" self._last_log_cnt = 0 self._partition_set = map(str, range(self._bucketsize)) self._zk_path = "/".join(["/contrail_cs", self._service_name]) self._zk = KazooClient(self._zookeeper_srvr) self._zk.add_listener(self._zk_lstnr) self._zk.start() self._pc = self._zk.SetPartitioner( path=self._zk_path, set=self._partition_set, partition_func=self._partitioner ) self._wait_allocation = 0 gevent.sleep(0) def _zk_lstnr(self, state): self._supress_log("zk state change to %s" % str(state)) def schedule(self, items, lock_timeout=30): gevent.sleep(0) ret = False if self._pc.failed: raise Exception("Lost or unable to acquire partition") elif self._pc.release: self._supress_log("Releasing...") self._release() elif self._pc.allocating: self._supress_log("Waiting for allocation...") self._pc.wait_for_acquire(lock_timeout) if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION: self._wait_allocation += 1 else: raise StopIteration("Giving up after %d tries!" % (self._wait_allocation)) elif self._pc.acquired: self._supress_log("got work: ", list(self._pc)) ret = True self._wait_allocation = 0 self._populate_work_items(items) self._supress_log( "work items: ", self._items2name(self.work_items()), "from the list", self._items2name(items) ) return ret def work_items(self): return sum(self._partitions.values(), []) def finish(self): self._inform_delete(self._partitions.keys()) self._pc.finish() def _items2name(self, items): return map(lambda x: x.name, items) def _supress_log(self, *s): slog = " ".join(map(str, s)) dl = "" if slog != self._last_log_cnt: if self._last_log_cnt: dl += " " * 4 dl += "." * 8 dl += "[last print repeats %d times]" % self._last_log_cnt self._last_log_cnt = 0 dl += slog self._last_log = slog self._logger.debug(dl) else: self._last_log_cnt += 1 def _consistent_hash(self, members): if self._con_hash is None: self._con_hash = ConsistentHash(members) self._supress_log("members:", self._con_hash.nodes) cur, updtd = set(self._con_hash.nodes), set(members) if cur != updtd: newm = updtd - cur rmvd = cur - updtd if newm: self._supress_log("new workers:", newm) self._con_hash.add_nodes(list(newm)) if rmvd: self._supress_log("workers left:", rmvd) self._con_hash.del_nodes(list(rmvd)) return self._con_hash def _consistent_hash_get_node(self, members, partition): return self._consistent_hash(members).get_node(partition) def _partitioner_func(self, identifier, members, _partitions): return [p for p in _partitions if self._consistent_hash_get_node(members, p) == identifier] def _release(self): old = set(self._pc) new = set(self._partitioner(self._pc._identifier, list(self._pc._party), self._partition_set)) rmvd = old - new added = new - old if rmvd: self._inform_delete(list(rmvd)) if added: self._inform_will_add(list(added)) self._pc.release_set() def _list_items_in(self, partitions): return sum([self._partitions[k] for k in partitions if k in self._partitions], []) def _inform_will_add(self, partitions): if callable(self._add_hndlr): self._add_hndlr(self._list_items_in(partitions)) def _inform_delete(self, partitions): if callable(self._delete_hndlr): self._delete_hndlr(self._list_items_in(partitions)) def _populate_work_items(self, items): self._refresh_work_items() for i in items: part = str(self._item2part_func(i.name)) if part in list(self._pc): if part not in self._partitions: self._partitions[part] = [] if i.name not in map(lambda x: x.name, self._partitions[part]): self._partitions[part].append(i) self._logger.debug( "@populate_work_items(%s): done!" % " ".join(map(lambda v: str(v[0]) + ":" + ",".join(map(lambda x: x.name, v[1])), self._partitions.items())) ) gevent.sleep(0) def _device2partition(self, key): return struct.unpack("Q", hashlib.md5(key).digest()[-8:])[0] % self._bucketsize def _refresh_work_items(self): for k in self._partitions: self._partitions[k] = []
class PartitionClient(object): """ Client Class for the Partition Library Example usage: --------------------- import libpartition from libpartition import PartitionClient def own_change_cb(l): print "ownership change:" + str(l) c = PartitionClient("test", "s1", ["s1", "s2", "s3"], 32, own_change_cb, "zookeeper_s1") ##do some real work now" if (c.own_partition(1)): ...... do something with partition #1 ..... ......... ... c.update_cluster_list(["s1", "s2"]) ... ---------------------- You should not call any partition library routine from within the callback function Args: app_name(str): Name of the app for which partition cluster is used self_name(str): Name of the local cluster node (can be ip address) cluster_list(list): List of all the nodes in the cluster including local node max_partition(int): Partition space always go from 0..max_partition-1 partition_update_cb: Callback function invoked when partition ownership list is updated.x zk_server(str): <zookeeper server>:<zookeeper server port> """ def __init__(self, app_name, self_name, cluster_list, max_partition, partition_update_cb, zk_server): # Initialize local variables self._zk_server = zk_server self._cluster_list = set(cluster_list) self._max_partition = max_partition self._update_cb = partition_update_cb self._curr_part_ownership_list = [] self._target_part_ownership_list = [] self._con_hash = ConsistentHash(cluster_list) self._name = self_name # some sanity check if not (self._name in cluster_list): raise ValueError('cluster list is missing local server name') # connect to zookeeper self._zk = KazooClient(zk_server) self._zk.start() # create a lock array to contain locks for each partition self._part_locks = [] for part in range(0, self._max_partition): lockpath = "/lockpath/" + app_name + "/" + str(part) l = self._zk.Lock(lockpath, self._name) self._part_locks.append(l) # initialize partition # to lock acquire greenlet dictionary self._part_lock_task_dict = {} # update target partition ownership list for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end __init__ # following routine is the greenlet task function to acquire the lock # for a partition def _acquire_lock(self, part): # lock for the partition l = self._part_locks[part] while True: if (l.cancelled == True): # a lock acquisition is getting cancelled let's wait logging.info("lock acquisition is getting cancelled, \ lets wait") gevent.sleep(1) else: break # go in an infinite loop waiting to acquire the lock while True: ret = l.acquire(blocking=False) if ret == True: logging.info("Acquired lock for:" + str(part)) self._curr_part_ownership_list.append(part) self._update_cb(self._curr_part_ownership_list) return ret else: gevent.sleep(1) #end _acquire_lock # get rid of finished spawned tasks from datastructures def _cleanup_greenlets(self): for part in self._part_lock_task_dict.keys(): if (self._part_lock_task_dict[part].ready()): del self._part_lock_task_dict[part] #end _cleanup_greenlets # following routine launches tasks to acquire partition locks def _acquire_partition_ownership(self): # cleanup any finished greenlets self._cleanup_greenlets() # this variable will help us decide if we need to call callback updated_curr_ownership = False for part in range(0, self._max_partition): if (part in self._target_part_ownership_list): if (part in self._curr_part_ownership_list): # do nothing, I already have ownership of this partition logging.info("No need to acquire ownership of:" + str(part)) else: # I need to acquire lock for this partition before I own if (part in self._part_lock_task_dict.keys()): # do nothing there is already a greenlet running to # acquire the lock logging.info("Already a greenlet running to" " acquire:" + str(part)) else: # launch the greenlet to acquire the loc, k g = Greenlet.spawn(self._acquire_lock, part) self._part_lock_task_dict[part] = g else: # give up ownership of the partition # cancel any lock acquisition which is ongoing if (part in self._part_lock_task_dict.keys()): # kill the greenlet trying to get the lock for this # partition self._part_lock_task_dict[part].kill() del self._part_lock_task_dict[part] logging.info("canceling lock acquisition going on \ for:" + str(part)) try: self._part_locks[part].cancel() except: pass if (part in self._curr_part_ownership_list): # release if lock was already acquired logging.info("release the lock which was acquired:" + \ str(part)) try: self._part_locks[part].release() except: pass self._curr_part_ownership_list.remove(part) updated_curr_ownership = True logging.info("gave up ownership of:" + str(part)) if (updated_curr_ownership is True): # current partition membership was updated call the callback self._update_cb(self._curr_part_ownership_list) #end _acquire_partition_ownership def update_cluster_list(self, cluster_list): """ Updates the cluster node list Args: cluster_list(list): New list of names of the nodes in the cluster Returns: None """ # some sanity check if not (self._name in cluster_list): raise ValueError('cluster list is missing local server name') new_cluster_list = set(cluster_list) new_servers = list(new_cluster_list.difference(self._cluster_list)) deleted_servers = list( set(self._cluster_list).difference(new_cluster_list)) self._cluster_list = cluster_list logging.info("deleted servers:" + str(deleted_servers)) logging.info("new servers:" + str(new_servers)) # update the hash structure if new_servers: self._con_hash.add_nodes(new_servers) if deleted_servers: self._con_hash.del_nodes(deleted_servers) # update target partition ownership list self._target_part_ownership_list = [] for part in range(0, self._max_partition): if (self._con_hash.get_node(str(part)) == self._name): if not (part in self._target_part_ownership_list): self._target_part_ownership_list.append(part) # update current ownership list self._acquire_partition_ownership() #end update_cluster_list def own_partition(self, part_no): """ Returns ownership information of a partition Args: part_no(int) : Partition no Returns: True if partition is owned by the local node False if partition is not owned by the local node """ return part_no in self._curr_part_ownership_list #end own_partition def close(self): """ Closes any connections and frees up any data structures Args: Returns: None """ # clean up greenlets for part in self._part_lock_task_dict.keys(): try: self._part_lock_task_dict[part].kill() except: pass # close zookeeper try: self._zk.stop() except: pass try: self._zk.close() except: pass
class ConsistentScheduler(object): """ LibPartitionHelper abstract out workers and work_items, and their mapping to partitions. So application can only deal with the work items it owns, without bothering about partition mapping. This class also provides syncronization premitives to ensure apps to clean up b4 giving up their partitions """ _MAX_WAIT_4_ALLOCATION = 6 + randint(0, 9) def __init__( self, service_name=None, zookeeper="127.0.0.1:2181", delete_hndlr=None, add_hndlr=None, bucketsize=47, item2part_func=None, partitioner=None, logger=None, ): if logger: self._logger = logger else: self._logger = logging.getLogger(__name__) self._service_name = service_name or os.path.basename(sys.argv[0]) self._item2part_func = item2part_func or self._device2partition self._zookeeper_srvr = zookeeper self._bucketsize = bucketsize self._delete_hndlr = delete_hndlr self._add_hndlr = add_hndlr self._partitioner = partitioner or self._partitioner_func self._partitions = {} self._con_hash = None self._last_log = "" self._last_log_cnt = 0 self._partition_set = map(str, range(self._bucketsize)) self._zk_path = "/".join(["/contrail_cs", self._service_name]) self._zk = KazooClient(self._zookeeper_srvr) self._zk.add_listener(self._zk_lstnr) self._conn_state = None while True: try: self._zk.start() break except gevent.event.Timeout as e: # Update connection info self._sandesh_connection_info_update(status="DOWN", message=str(e)) gevent.sleep(1) # Zookeeper is also throwing exception due to delay in master election except Exception as e: # Update connection info self._sandesh_connection_info_update(status="DOWN", message=str(e)) gevent.sleep(1) self._pc = self._zk.SetPartitioner( path=self._zk_path, set=self._partition_set, partition_func=self._partitioner ) self._wait_allocation = 0 gevent.sleep(0) def _sandesh_connection_info_update(self, status, message): from pysandesh.connection_info import ConnectionState from pysandesh.gen_py.process_info.ttypes import ConnectionStatus, ConnectionType new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update( conn_type=ConnectionType.ZOOKEEPER, name="Zookeeper", status=new_conn_state, message=message, server_addrs=self._zookeeper_srvr.split(","), ) if (self._conn_state and self._conn_state != ConnectionStatus.DOWN) and new_conn_state == ConnectionStatus.DOWN: msg = "Connection to Zookeeper down: %s" % (message) self._supress_log(msg) if self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP: msg = "Connection to Zookeeper ESTABLISHED" self._supress_log(msg) self._conn_state = new_conn_state # end _sandesh_connection_info_update def _zk_lstnr(self, state): if state == KazooState.CONNECTED: # Update connection info self._sandesh_connection_info_update(status="UP", message="") elif state == KazooState.LOST: # Lost the session with ZooKeeper Server # Best of option we have is to exit the process and restart all # over again self._sandesh_connection_info_update(status="DOWN", message="Connection to Zookeeper lost") os._exit(2) elif state == KazooState.SUSPENDED: # Update connection info self._sandesh_connection_info_update(status="INIT", message="Connection to zookeeper lost. Retrying") def schedule(self, items, lock_timeout=30): gevent.sleep(0) ret = False if self._pc.failed: raise Exception("Lost or unable to acquire partition") elif self._pc.release: self._supress_log("Releasing...") self._release() elif self._pc.allocating: self._supress_log("Waiting for allocation...") self._pc.wait_for_acquire(lock_timeout) if self._wait_allocation < self._MAX_WAIT_4_ALLOCATION: self._wait_allocation += 1 else: raise StopIteration("Giving up after %d tries!" % (self._wait_allocation)) elif self._pc.acquired: self._supress_log("got work: ", list(self._pc)) ret = True self._wait_allocation = 0 self._populate_work_items(items) self._supress_log( "work items: ", self._items2name(self.work_items()), "from the list", self._items2name(items) ) return ret def work_items(self): return sum(self._partitions.values(), []) def finish(self): self._inform_delete(self._partitions.keys()) self._pc.finish() def _items2name(self, items): return map(lambda x: x.name, items) def _supress_log(self, *s): slog = " ".join(map(str, s)) dl = "" if slog != self._last_log_cnt: if self._last_log_cnt: dl += " " * 4 dl += "." * 8 dl += "[last print repeats %d times]" % self._last_log_cnt self._last_log_cnt = 0 dl += slog self._last_log = slog self._logger.debug(dl) else: self._last_log_cnt += 1 def _consistent_hash(self, members): if self._con_hash is None: self._con_hash = ConsistentHash(members) self._supress_log("members:", self._con_hash.nodes) cur, updtd = set(self._con_hash.nodes), set(members) if cur != updtd: newm = updtd - cur rmvd = cur - updtd if newm: self._supress_log("new workers:", newm) self._con_hash.add_nodes(list(newm)) if rmvd: self._supress_log("workers left:", rmvd) self._con_hash.del_nodes(list(rmvd)) return self._con_hash def _consistent_hash_get_node(self, members, partition): return self._consistent_hash(members).get_node(partition) def _partitioner_func(self, identifier, members, _partitions): return [p for p in _partitions if self._consistent_hash_get_node(members, p) == identifier] def _release(self): old = set(self._pc) new = set(self._partitioner(self._pc._identifier, list(self._pc._party), self._partition_set)) rmvd = old - new added = new - old if rmvd: self._inform_delete(list(rmvd)) if added: self._inform_will_add(list(added)) self._pc.release_set() def _list_items_in(self, partitions): return sum([self._partitions[k] for k in partitions if k in self._partitions], []) def _inform_will_add(self, partitions): if callable(self._add_hndlr): self._add_hndlr(self._list_items_in(partitions)) def _inform_delete(self, partitions): if callable(self._delete_hndlr): self._delete_hndlr(self._list_items_in(partitions)) def _populate_work_items(self, items): self._refresh_work_items() for i in items: part = str(self._item2part_func(i.name)) if part in list(self._pc): if part not in self._partitions: self._partitions[part] = [] if i.name not in map(lambda x: x.name, self._partitions[part]): self._partitions[part].append(i) self._logger.debug( "@populate_work_items(%s): done!" % " ".join(map(lambda v: str(v[0]) + ":" + ",".join(map(lambda x: x.name, v[1])), self._partitions.items())) ) gevent.sleep(0) def _device2partition(self, key): return struct.unpack("Q", hashlib.md5(key).digest()[-8:])[0] % self._bucketsize def _refresh_work_items(self): for k in self._partitions: self._partitions[k] = []
class ConsistentMemcachedClient(Client): """ Consistent Memcached Client attempts to create a scalable Memcached cluster that uses Consistent Hashing (using the ketama algorithm). In any distributed caching setup, adding or deleting servers disrupts the entire hashing and results in significant redistribution of keys. A consistent hashing function will significantly decrease the chances of a key being hashed to a different slot. A good explanation for the algorithm is found here: http://michaelnielsen.org/blog/consistent-hashing/ """ # The timeout period before marking a server as a dead _RETRY_GAP = 0.1 def __init__(self, *args, **kwargs): """ A memcache subclass. It currently allows you to add or delete a new host at run time. It also checks if a memcache server is down and automatically readjusts if a memcached server is not reachable. """ super(ConsistentMemcachedClient, self).__init__(*args, **kwargs) self.hash_manager = ConsistentHash(self.servers) def _reconfigure_hashing(self): """ If a server can be reached add it to the list of available servers. If a server cannot be reached, delete it from the list of available servers. """ for server in self.servers: if self._is_server_alive(server, sleep=False): self._add_alive_server(server) for server in self.hash_manager.nodes: if not self._is_server_alive(server, sleep=False): self._remove_dead_server(server) def _add_alive_server(self, server): """ Add a server to the hash manager """ if server not in self.hash_manager.nodes: self.hash_manager.add_nodes([server]) def _is_server_alive(self, server, sleep=True): """ Check is server is alive Client._SERVER_RETRIES times """ for i in range(Client._SERVER_RETRIES): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if not sock.connect_ex(server.address): return True if sleep: time.sleep(Client._RETRY_GAP) finally: sock.close() return False def _remove_dead_server(self, server): """ Reconfigure hashing by removing the server that is not responding """ try: self.hash_manager.nodes.remove(server) self.hash_manager = ConsistentHash(self.hash_manager.nodes) except ValueError: raise ValueError('no data store is functioning, cannot process request') def _get_server(self, key): """ Returns the most likely server to hold the key """ self._reconfigure_hashing() server = self.hash_manager.get_node(key) if not self.buckets: return None, None for i in range(Client._SERVER_RETRIES): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if server and server.connect() and not sock.connect_ex(server.address): return server, key time.sleep(Client._RETRY_GAP) finally: sock.close() return None, None