def __init__(self, lock_server_data: LockServerData): if not lock_server_data.enabled: return # Only set up connection if it's enabled self.client = self.__create_client(lock_server_data.host, lock_server_data.port) self.own_lock = Lock(self.client, 'manager-lock') self.locks = dict() self.set(self)
def _create_lock(etcd): """Factory method for creating the Engines leader lock. Args: etcd (etcd.Client): The client for the store to register the lock on Returns: etcd.Lock: The lock object unarmed """ return Lock(etcd, 'leader-election')
def __create(self, lock_id, silent) -> bool: """ Create an etcd lock. Fails if it already exists. The idea is to create-and-destroy when needed. """ self.own_lock.acquire( blocking=True ) # This is a problem as it blocks all coroutines (but it is a short block) try: self.client.read(lock_id) except EtcdKeyNotFound: # We could write whatever we wanted here, we only care about the key's existence self.client.write(lock_id, 'on') self.locks[lock_id] = Lock(self.client, lock_id) return True finally: self.own_lock.release() # If the key existed, then we don't want to lock. if not silent: raise ExistingLockError(lock_id) else: return False
def update_alert(message): try: lock = None existing_alert = False new_alert = json.loads(message.payload["message"]) new_alert['alert_id'] = message.message_id new_alert_obj = AlertUtils().to_obj(new_alert) if new_alert_obj.alert_type not in constants.SUPPORTED_ALERT_TYPES: logger.log( "error", NS.publisher_id, {"message": "Invalid alert type in alert %s" % new_alert}) raise InvalidAlertType if new_alert_obj.severity not in constants.SUPPORTED_ALERT_SEVERITY: logger.log( "error", NS.publisher_id, {"message": "Invalid alert severity in alert %s" % new_alert}) raise InvalidAlertSeverity alert_notify = message.payload.get('alert_notify', False) if not alert_notify: if (new_alert_obj.resource in NS.tendrl.objects.ClusterAlert()._defs['relationship'][ new_alert_obj.alert_type.lower()]): new_alert_obj.classification.append(constants.CLUSTER_ALERT) if (new_alert_obj.resource in NS.tendrl.objects.NodeAlert()._defs['relationship'][ new_alert_obj.alert_type.lower()]): new_alert_obj.classification.append(constants.NODE_ALERT) alerts = utils.get_alerts(new_alert_obj) for curr_alert in alerts: curr_alert.tags = json.loads(curr_alert.tags) if AlertUtils().is_same(new_alert_obj, curr_alert): new_alert_obj = AlertUtils().update( new_alert_obj, curr_alert) if not AlertUtils().equals(new_alert_obj, curr_alert): # Lock only if new alert matches with existing alert lock = Lock( NS._int.wclient, 'alerting/alerts/%s' % new_alert_obj.alert_id) lock.acquire(blocking=True, lock_ttl=60) if lock.is_acquired: # renew a lock lock.acquire(lock_ttl=60) existing_alert = True utils.update_alert_count(new_alert_obj, existing_alert) if message.payload["alert_condition_unset"]: keep_alive = int( NS.config.data["alert_retention_time"]) utils.classify_alert(new_alert_obj, keep_alive) new_alert_obj.save(ttl=keep_alive) else: # Remove the clearing alert with same if exist utils.remove_alert(new_alert_obj) utils.classify_alert(new_alert_obj) new_alert_obj.save() return else: # Handle case where alert severity changes without # coming to normal. In this case the previous alert # should be overriden with new one utils.remove_alert(new_alert_obj) utils.classify_alert(new_alert_obj) new_alert_obj.save() return # else add this new alert to etcd if message.payload["alert_condition_state"] == \ constants.ALERT_SEVERITY["warning"]: utils.update_alert_count(new_alert_obj, existing_alert) utils.classify_alert(new_alert_obj) new_alert_obj.save() else: logger.log( "error", NS.publisher_id, { "message": "New alert can't be a clearing alert %s" % (new_alert) }) else: # SDS native events utils.save_notification_only_alert(new_alert_obj) except (AttributeError, TypeError, ValueError, KeyError, InvalidAlertType, InvalidAlertSeverity, EtcdKeyNotFound, EtcdException) as ex: logger.log( "error", NS.publisher_id, {"message": "Error %s in updating alert %s" % (ex, new_alert)}) finally: if isinstance(lock, Lock) and lock.is_acquired: lock.release()
def update_alert(message): try: lock = None new_alert = json.loads(message.payload["message"]) new_alert['alert_id'] = message.message_id new_alert_obj = AlertUtils().to_obj(new_alert) if new_alert_obj.alert_type not in constants.SUPPORTED_ALERT_TYPES: logger.log( "error", NS.publisher_id, {"message": "Invalid alert type in alert %s" % new_alert}) raise InvalidAlertType if new_alert_obj.severity not in constants.SUPPORTED_ALERT_SEVERITY: logger.log( "error", NS.publisher_id, {"message": "Invalid alert severity in alert %s" % new_alert}) raise InvalidAlertSeverity alert_notify = message.payload.get('alert_notify', False) if not alert_notify: if (new_alert_obj.resource in NS.tendrl.objects.ClusterAlert()._defs['relationship'][ new_alert_obj.alert_type.lower()]): new_alert_obj.classification.append(constants.CLUSTER_ALERT) if (new_alert_obj.resource in NS.tendrl.objects.NodeAlert()._defs['relationship'][ new_alert_obj.alert_type.lower()]): new_alert_obj.classification.append(constants.NODE_ALERT) alerts = utils.get_alerts(new_alert_obj) for curr_alert in alerts: curr_alert.tags = json.loads(curr_alert.tags) if AlertUtils().is_same(new_alert_obj, curr_alert): if new_alert_obj.severity == \ constants.ALERT_SEVERITY["info"]: if "clear_alert" in new_alert_obj.tags.keys(): if new_alert_obj.tags['clear_alert'] != \ curr_alert.severity: # only warning clearing alert can clear # the warning alert and critical clearing alert # can clear the critical alert, # Because critical/warning alert panels in # grafana are indipendent from one another, # So after critical alert raised if warning # clearing came then tendrl can show only # clearing alert, So this logic will help # to prevent from the above case. return new_alert_obj = AlertUtils().update( new_alert_obj, curr_alert) if not AlertUtils().equals(new_alert_obj, curr_alert): # Lock only if new alert matches with existing alert lock = Lock( NS._int.wclient, 'alerting/alerts/%s' % new_alert_obj.alert_id) lock.acquire(blocking=True, lock_ttl=60) if lock.is_acquired: # renew a lock lock.acquire(lock_ttl=60) utils.update_alert_count(new_alert_obj, curr_alert) if message.payload["alert_condition_unset"]: keep_alive = int( NS.config.data["alert_retention_time"]) utils.classify_alert(new_alert_obj, keep_alive) new_alert_obj.save(ttl=keep_alive) else: # Remove the clearing alert with same if exist utils.remove_alert(new_alert_obj) utils.classify_alert(new_alert_obj) new_alert_obj.save() return else: # If alert raised again with same severity, # then update the alert utils.remove_alert(new_alert_obj) utils.classify_alert(new_alert_obj) new_alert_obj.save() # message_id and alert_id is same # When same alert raised multiple times # then assign old alert_id to new message # to avoid duplicates events # here new alert object already have old_message_id message.message_id = new_alert_obj.alert_id return # else add this new alert to etcd severity = message.payload["alert_condition_state"] if(severity == constants.ALERT_SEVERITY["warning"]) or \ (severity == constants.ALERT_SEVERITY["critical"]): utils.update_alert_count(new_alert_obj) utils.classify_alert(new_alert_obj) new_alert_obj.save() else: logger.log( "debug", NS.publisher_id, { "message": "New alert can't be a clearing alert %s" % (new_alert) }) else: # SDS native events utils.save_notification_only_alert(new_alert_obj) except (AttributeError, TypeError, ValueError, KeyError, InvalidAlertType, InvalidAlertSeverity, EtcdKeyNotFound, EtcdException) as ex: logger.log( "error", NS.publisher_id, {"message": "Error %s in updating alert %s" % (ex, new_alert)}) finally: if isinstance(lock, Lock) and lock.is_acquired: lock.release()
class LockManager: INSTANCE = None def __init__(self, lock_server_data: LockServerData): if not lock_server_data.enabled: return # Only set up connection if it's enabled self.client = self.__create_client(lock_server_data.host, lock_server_data.port) self.own_lock = Lock(self.client, 'manager-lock') self.locks = dict() self.set(self) def acquire(self, lock_id, blocking=True, silent=False): """ Create and lock an etcd lock. """ return self.__create(lock_id, silent) and self.__lock( lock_id, blocking) def release(self, lock_id): """ Unlock and destroy an etcd lock. """ self.__unlock(lock_id) self.__destroy(lock_id) def __create(self, lock_id, silent) -> bool: """ Create an etcd lock. Fails if it already exists. The idea is to create-and-destroy when needed. """ self.own_lock.acquire( blocking=True ) # This is a problem as it blocks all coroutines (but it is a short block) try: self.client.read(lock_id) except EtcdKeyNotFound: # We could write whatever we wanted here, we only care about the key's existence self.client.write(lock_id, 'on') self.locks[lock_id] = Lock(self.client, lock_id) return True finally: self.own_lock.release() # If the key existed, then we don't want to lock. if not silent: raise ExistingLockError(lock_id) else: return False def __destroy(self, lock_id): """ Remove a lock from the dictionary and from the etcd server. """ self.client.delete(lock_id) self.locks.pop(lock_id) def __lock(self, lock_id, blocking=True) -> bool: """ Acquire a lock with given id. Be sure to create it before acquiring it. """ return self.locks[lock_id].acquire(blocking=blocking) def __unlock(self, lock_id): """ Release a lock with given id. Be sure to create and acquire it before releasing it. """ self.locks[lock_id].release() @classmethod def __create_client(cls, etcd_host, etcd_port) -> Client: """ Create etcd client and test connection. """ Logger(cls.__name__).info('Connecting to etcd server...') client = Client(host=etcd_host, port=etcd_port) # Test connection by trying to read a random value try: client.read('nodes') except EtcdConnectionFailed: raise EtcdConnectionError(port=etcd_port) except EtcdKeyNotFound: # This is to handle the case where etcd did not have the key (we don't care) but it is running pass return client @classmethod def set(cls, instance): cls.INSTANCE = instance @classmethod def get(cls): return cls.INSTANCE