except Exception as e: logs += ["ReadLock acquisition failed" + str(e)] if not done: logs += ["ReadLock acquisition failed" + str(e)] time.sleep(0.005) try: tic = datetime.now() done = rlock.release() tac = datetime.now() - tic logs += ['time to release read lock :' + str(tac)] except Exception as e: logs += ["ReadLock release failed" + str(e)] if not done: logs += ["ReadLock release failed" + str(e)] wlock = zk.WriteLock('lock1', 'paris') for i in range(1000): try: tic = datetime.now() done = wlock.acquire() tac = datetime.now() - tic logs += ['time to acquire write lock :' + str(tac)] except Exception as e: logs += ["WriteLock acquisition failed" + str(e)] if not done: logs += ["WriteLock acquisition failed" + str(e)] time.sleep(0.005) try: tic = datetime.now() done = wlock.release() tac = datetime.now() - tic
class USSMetadataManager(object): """Interfaces with the locking system to get, put, and delete USS metadata. Metadata gets/stores/deletes the USS information for a partiular grid, including current version number, a list of USSs with active operations, and the endpoints to get that information. Locking is assured through a snapshot token received when getting, and used when putting. """ def __init__(self, connectionstring=DEFAULT_CONNECTION, testgroupid=None): """Initializes the class. Args: connectionstring: Zookeeper connection string - server:port,server:port,... testgroupid: ID to use if in test mode, none for normal mode """ if testgroupid: self.set_testmode(testgroupid) if not connectionstring: connectionstring = DEFAULT_CONNECTION log.debug('Creating metadata manager object and connecting to zookeeper...') try: if set(BAD_CHARACTER_CHECK) & set(connectionstring): raise ValueError self.zk = KazooClient(hosts=connectionstring, timeout=CONNECTION_TIMEOUT) self.zk.add_listener(self.zookeeper_connection_listener) self.zk.start() if testgroupid: self.delete_testdata(testgroupid) except KazooTimeoutError: log.error('Unable to connect to zookeeper using %s connection string...', connectionstring) raise except ValueError: log.error('Connection string %s seems invalid...', connectionstring) raise def __del__(self): log.debug('Destroying metadata manager object and disconnecting from zk...') self.zk.stop() def set_verbose(self): log.setLevel(logging.DEBUG) def set_testmode(self, testgroupid='UNDEFINED_TESTER'): """Sets the mode to testing with the specific test ID, cannot be undone. Args: testgroupid: ID to use if in test mode, none for normal mode """ global GRID_PATH global CONNECTION_TIMEOUT # Adjust parameters specifically for the test GRID_PATH = TEST_BASE_PREFIX + testgroupid + USS_BASE_PREFIX log.debug('Setting test path to %s...', GRID_PATH) CONNECTION_TIMEOUT = 1.0 def zookeeper_connection_listener(self, state): if state == KazooState.LOST: # Register somewhere that the session was lost log.error('Lost connection with the zookeeper servers...') elif state == KazooState.SUSPENDED: # Handle being disconnected from Zookeeper log.error('Suspended connection with the zookeeper servers...') elif state == KazooState.CONNECTED: # Handle being connected/reconnected to Zookeeper log.info('Connection restored with the zookeeper servers...') def delete_testdata(self, testgroupid=None): """Removes the test data from the servers. Be careful when using this in parallel as it removes everything under the testgroupid, or everything if no tetgroupid is provided. Args: testgroupid: ID to use if in test mode, none will remove all test data """ if testgroupid: path = TEST_BASE_PREFIX + testgroupid else: path = TEST_BASE_PREFIX self.zk.delete(path, recursive=True) def get(self, z, x, y): """Gets the metadata and snapshot token for a GridCell. Reads data from zookeeper, including a snapshot token. The snapshot token is used as a reference when writing to ensure the data has not been updated between read and write. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ # TODO(hikevin): Change to use our own error codes and let the server # convert them to http error codes. For now, this is # at least in a standard JSend format. status = 500 if self._validate_slippy(z, x, y): (content, metadata) = self._get_raw(z, x, y) if metadata: try: m = USSMetadata(content) status = 200 result = { 'status': 'success', 'sync_token': metadata.last_modified_transaction_id, 'data': m.to_json() } except ValueError: status = 424 else: status = 404 else: status = 400 if status != 200: result = self._format_status_code_to_jsend(status) return result def set(self, z, x, y, sync_token, uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation): """Sets the metadata for a GridCell. Writes data, using the snapshot token for confirming data has not been updated since it was last read. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format sync_token: token retrieved in the original GET GridCellMetadata, uss_id: plain text identifier for the USS, ws_scope: scope to use to obtain OAuth token, operation_format: output format for operation ws (i.e. NASA, GUTMA), operation_ws: submitting USS endpoint where all flights in this cell can be retrieved from, earliest_operation: lower bound of active or planned flight timestamp, used for quick filtering conflicts. latest_operation: upper bound of active or planned flight timestamp, used for quick filtering conflicts. Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ status = 500 if self._validate_slippy(z, x, y): # first we have to get the cell status = 0 (content, metadata) = self._get_raw(z, x, y) if metadata: # Quick check of the token, another is done on the actual set to be sure # but this check fails early and fast if str(metadata.last_modified_transaction_id) == str(sync_token): try: m = USSMetadata(content) log.debug('Setting metadata for %s...', uss_id) if not m.upsert_operator(uss_id, ws_scope, operation_format, operation_ws, earliest_operation, latest_operation): log.error('Failed setting operator for %s with token %s...', uss_id, str(sync_token)) raise ValueError status = self._set_raw(z, x, y, m, uss_id, sync_token) except ValueError: status = 424 else: status = 409 else: status = 404 else: status = 400 if status == 200: # Success, now get the metadata back to send back result = self.get(z, x, y) else: result = self._format_status_code_to_jsend(status) return result def delete(self, z, x, y, uss_id): """Sets the metadata for a GridCell by removing the entry for the USS. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format uss_id: is the plain text identifier for the USS Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ status = 500 if self._validate_slippy(z, x, y): # first we have to get the cell (content, metadata) = self._get_raw(z, x, y) if metadata: try: m = USSMetadata(content) m.remove_operator(uss_id) # TODO(pelletierb): Automatically retry on delete status = self._set_raw(z, x, y, m, uss_id, metadata.last_modified_transaction_id) except ValueError: status = 424 else: status = 404 else: status = 400 if status == 200: # Success, now get the metadata back to send back (content, metadata) = self._get_raw(z, x, y) result = { 'status': 'success', 'sync_token': metadata.last_modified_transaction_id, 'data': m.to_json() } else: result = self._format_status_code_to_jsend(status) return result ###################################################################### ################ INTERNAL FUNCTIONS ######################### ###################################################################### def _get_raw(self, z, x, y): """Gets the raw content and metadata for a GridCell from zookeeper. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format Returns: content: USS metadata metadata: straight from zookeeper """ path = GRID_PATH + '/'.join((str(z), str(x), str(y))) + USS_METADATA_FILE log.debug('Getting metadata from zookeeper@%s...', path) self.zk.ensure_path(path) c, m = self.zk.get(path) if c: log.debug('Received raw content and metadata from zookeeper: %s', c) if m: log.debug('Received raw metadata from zookeeper: %s', m) return c, m def _set_raw(self, z, x, y, m, uss_id, sync_token): """Grabs the lock and updates the raw content for a GridCell in zookeeper. Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format m: metadata object to write uss_id: the plain text identifier for the USS sync_token: the sync token received during get operation Returns: 200 for success, 409 for conflict, 408 for unable to get the lock """ status = 500 path = GRID_PATH + '/'.join((str(z), str(x), str(y))) + USS_METADATA_FILE # TODO(hikevin): Remove Lock and use built in set with version lock = self.zk.WriteLock(path, uss_id) try: log.debug('Getting metadata lock from zookeeper@%s...', path) lock.acquire(timeout=LOCK_TIMEOUT) (content, metadata) = self._get_raw(z, x, y) del content if str(metadata.last_modified_transaction_id) == str(sync_token): log.debug('Setting metadata to %s...', str(m)) self.zk.set(path, json.dumps(m.to_json())) status = 200 else: log.error( 'Sync token from USS (%s) does not match token from zk (%s)...', str(sync_token), str(metadata.last_modified_transaction_id)) status = 409 log.debug('Releasing the lock...') lock.release() except LockTimeout: log.error('Unable to acquire the lock for %s...', path) status = 408 return status def _format_status_code_to_jsend(self, status): """Formats a response based on HTTP status code. Args: status: HTTP status code Returns: JSend formatted response (https://labs.omniti.com/labs/jsend) """ if status == 200 or status == 204: result = {'status': 'success', 'code': 204, 'message': 'Empty data set.'} elif status == 400: result = { 'status': 'fail', 'code': status, 'message': 'Parameters are not following the correct format.' } elif status == 404: result = { 'status': 'fail', 'code': status, 'message': 'Unable to pull metadata from lock system.' } elif status == 408: result = { 'status': 'fail', 'code': status, 'message': 'Timeout trying to get lock.' } elif status == 409: result = { 'status': 'fail', 'code': status, 'message': 'Content in metadata has been updated since provided sync token.' } elif status == 424: result = { 'status': 'fail', 'code': status, 'message': 'Content in metadata is not following JSON format guidelines.' } else: result = { 'status': 'fail', 'code': status, 'message': 'Unknown error code occurred.' } return result def _validate_slippy(self, z, x, y): """Validates slippy tile ranges. https://en.wikipedia.org/wiki/Tiled_web_map https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames Args: z: zoom level in slippy tile format x: x tile number in slippy tile format y: y tile number in slippy tile format Returns: true if valid, false if not """ try: z = int(z) x = int(x) y = int(y) if not 0 <= z <= 20: raise ValueError if not 0 <= x < 2**z: raise ValueError if not 0 <= y < 2**z: raise ValueError return True except (ValueError, TypeError): log.error('Invalid slippy format for tiles %sz, %s,%s!', z, x, y) return False
class ZKHandler(object): def __init__(self, config, logger=None): """ Initialize an instance of the ZKHandler class with config A zk_conn object will be created but not started A ZKSchema instance will be created """ self.encoding = "utf8" self.coordinators = config["coordinators"] self.logger = logger self.zk_conn = KazooClient(hosts=self.coordinators) self._schema = ZKSchema() # # Class meta-functions # def coordinators(self): return str(self.coordinators) def log(self, message, state=""): if self.logger is not None: self.logger.out(message, state) else: print(message) # # Properties # @property def schema(self): return self._schema # # State/connection management # def listener(self, state): """ Listen for KazooState changes and log accordingly. This function does not do anything except for log the state, and Kazoo handles the rest. """ if state == KazooState.CONNECTED: self.log("Connection to Zookeeper resumed", state="o") else: self.log( "Connection to Zookeeper lost with state {}".format(state), state="w") def connect(self, persistent=False): """ Start the zk_conn object and connect to the cluster """ try: self.zk_conn.start() if persistent: self.log("Connection to Zookeeper started", state="o") self.zk_conn.add_listener(self.listener) except Exception as e: raise ZKConnectionException(self, e) def disconnect(self, persistent=False): """ Stop and close the zk_conn object and disconnect from the cluster The class instance may be reused later (avoids persistent connections) """ self.zk_conn.stop() self.zk_conn.close() if persistent: self.log("Connection to Zookeeper terminated", state="o") # # Schema helper actions # def get_schema_path(self, key): """ Get the Zookeeper path for {key} from the current schema based on its format. If {key} is a tuple of length 2, it's treated as a path plus an item instance of that path (e.g. a node, a VM, etc.). If {key} is a tuple of length 4, it is treated as a path plus an item instance, as well as another item instance of the subpath. If {key} is just a string, it's treated as a lone path (mostly used for the 'base' schema group. Otherwise, returns None since this is not a valid key. This function also handles the special case where a string that looks like an existing path (i.e. starts with '/') is passed; in that case it will silently return the same path back. This was mostly a migration functionality and is deprecated. """ if isinstance(key, tuple): # This is a key tuple with both an ipath and an item if len(key) == 2: # 2-length normal tuple ipath, item = key elif len(key) == 4: # 4-length sub-level tuple ipath, item, sub_ipath, sub_item = key return self.schema.path(ipath, item=item) + self.schema.path( sub_ipath, item=sub_item) else: # This is an invalid key return None elif isinstance(key, str): # This is a key string with just an ipath ipath = key item = None # This is a raw key path, used by backup/restore functionality if re.match(r"^/", ipath): return ipath else: # This is an invalid key return None return self.schema.path(ipath, item=item) # # Key Actions # def exists(self, key): """ Check if a key exists """ path = self.get_schema_path(key) if path is None: # This path is invalid, this is likely due to missing schema entries, so return False return False stat = self.zk_conn.exists(path) if stat: return True else: return False def read(self, key): """ Read data from a key """ try: path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so return None return None return self.zk_conn.get(path)[0].decode(self.encoding) except NoNodeError: return None def write(self, kvpairs): """ Create or update one or more keys' data """ if type(kvpairs) is not list: self.log("ZKHandler error: Key-value sequence is not a list", state="e") return False transaction = self.zk_conn.transaction() for kvpair in kvpairs: if type(kvpair) is not tuple: self.log( "ZKHandler error: Key-value pair '{}' is not a tuple". format(kvpair), state="e", ) return False key = kvpair[0] value = kvpair[1] path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue if not self.exists(key): # Creating a new key transaction.create(path, str(value).encode(self.encoding)) else: # Updating an existing key data = self.zk_conn.get(path) version = data[1].version # Validate the expected version after the execution new_version = version + 1 # Update the data transaction.set_data(path, str(value).encode(self.encoding)) # Check the data try: transaction.check(path, new_version) except TypeError: self.log( "ZKHandler error: Key '{}' does not match expected version" .format(path), state="e", ) return False try: transaction.commit() return True except Exception as e: self.log( "ZKHandler error: Failed to commit transaction: {}".format(e), state="e") return False def delete(self, keys, recursive=True): """ Delete a key or list of keys (defaults to recursive) """ if type(keys) is not list: keys = [keys] for key in keys: if self.exists(key): try: path = self.get_schema_path(key) self.zk_conn.delete(path, recursive=recursive) except Exception as e: self.log( "ZKHandler error: Failed to delete key {}: {}".format( path, e), state="e", ) return False return True def children(self, key): """ Lists all children of a key """ try: path = self.get_schema_path(key) if path is None: # This path is invalid; this is likely due to missing schema entries, so return None return None return self.zk_conn.get_children(path) except NoNodeError: return None def rename(self, kkpairs): """ Rename one or more keys to a new value """ if type(kkpairs) is not list: self.log("ZKHandler error: Key-key sequence is not a list", state="e") return False transaction = self.zk_conn.transaction() def rename_element(transaction, source_path, destination_path): data = self.zk_conn.get(source_path)[0] transaction.create(destination_path, data) if self.children(source_path): for child_path in self.children(source_path): child_source_path = "{}/{}".format(source_path, child_path) child_destination_path = "{}/{}".format( destination_path, child_path) rename_element(transaction, child_source_path, child_destination_path) transaction.delete(source_path) for kkpair in kkpairs: if type(kkpair) is not tuple: self.log( "ZKHandler error: Key-key pair '{}' is not a tuple".format( kkpair), state="e", ) return False source_key = kkpair[0] source_path = self.get_schema_path(source_key) if source_path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue destination_key = kkpair[1] destination_path = self.get_schema_path(destination_key) if destination_path is None: # This path is invalid; this is likely due to missing schema entries, so continue continue if not self.exists(source_key): self.log( "ZKHander error: Source key '{}' does not exist".format( source_path), state="e", ) return False if self.exists(destination_key): self.log( "ZKHander error: Destination key '{}' already exists". format(destination_path), state="e", ) return False rename_element(transaction, source_path, destination_path) try: transaction.commit() return True except Exception as e: self.log( "ZKHandler error: Failed to commit transaction: {}".format(e), state="e") return False # # Lock actions # def readlock(self, key): """ Acquires a read lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.ReadLock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire read lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire read lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock def writelock(self, key): """ Acquires a write lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.WriteLock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire write lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire write lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock def exclusivelock(self, key): """ Acquires an exclusive lock on a key """ count = 1 lock = None path = self.get_schema_path(key) while True: try: lock_id = str(uuid.uuid1()) lock = self.zk_conn.Lock(path, lock_id) break except NoNodeError: self.log( "ZKHandler warning: Failed to acquire exclusive lock on nonexistent path {}" .format(path), state="e", ) return None except Exception as e: if count > 5: self.log( "ZKHandler warning: Failed to acquire exclusive lock after 5 tries: {}" .format(e), state="e", ) break else: time.sleep(0.5) count += 1 continue return lock