def _restore_machine(self, task, config, healthcheck_timeout): retry_failure_delay = int(self.config.get("RESTORE_MACHINE_FAILURE_DELAY", 5)) restore_dry_mode = self.config.get("RESTORE_MACHINE_DRY_MODE", False) retry_failure_query = {"_id": {"$regex": "restore_.+"}, "last_attempt": {"$ne": None}} if task['instance'] not in self._failure_instances(retry_failure_query, retry_failure_delay): host = self.storage.find_host_id(task['host']) if not restore_dry_mode: Host.from_dict({"_id": host['_id'], "dns_name": task['host'], "manager": host['manager']}, conf=config).restore() self.nginx_manager.wait_healthcheck(task['host'], timeout=healthcheck_timeout) self.storage.remove_task({"_id": task['_id']})
def from_dict(cls, dict, conf=None): if dict is None: return None dict['name'] = dict['_id'] del dict['_id'] dict['conf'] = conf hosts_data = dict.get('hosts', None) if hosts_data: dict['hosts'] = [Host.from_dict(h, conf=conf) for h in hosts_data] return cls(**dict)
def _restore_machine(self, task, config, healthcheck_timeout): retry_failure_delay = int(self.config.get("RESTORE_MACHINE_FAILURE_DELAY", 5)) restore_dry_mode = self.config.get("RESTORE_MACHINE_DRY_MODE", False) in ("True", "true", "1") retry_failure_query = {"_id": {"$regex": "restore_.+"}, "last_attempt": {"$ne": None}} if task['instance'] not in self._failure_instances(retry_failure_query, retry_failure_delay): host = self.storage.find_host_id(task['host']) if not restore_dry_mode: healing_id = self.storage.store_healing(task['instance'], task['host']) try: Host.from_dict({"_id": host['_id'], "dns_name": task['host'], "manager": host['manager']}, conf=config).restore() Host.from_dict({"_id": host['_id'], "dns_name": task['host'], "manager": host['manager']}, conf=config).start() self.nginx_manager.wait_healthcheck(task['host'], timeout=healthcheck_timeout) self.storage.update_healing(healing_id, "success") except Exception as e: self.storage.update_healing(healing_id, str(e.message)) raise e self.storage.remove_task({"_id": task['_id']})
def run(self, config): self.init_config(config) retry_failure_delay = int(self.config.get("RESTORE_MACHINE_FAILURE_DELAY", 5)) restore_dry_mode = self.config.get("RESTORE_MACHINE_DRY_MODE", False) retry_failure_query = {"_id": {"$regex": "restore_.+"}, "last_attempt": {"$ne": None}} failure_instances = set() for task in self.storage.find_task(retry_failure_query): retry_failure = task['last_attempt'] + datetime.timedelta(minutes=retry_failure_delay) if (retry_failure >= datetime.datetime.utcnow()): failure_instances.add(task['instance']) restore_delay = int(self.config.get("RESTORE_MACHINE_DELAY", 5)) created_in = datetime.datetime.utcnow() - datetime.timedelta(minutes=restore_delay) query = {"_id": {"$regex": "restore_.+"}, "created": {"$lte": created_in}} for task in self.storage.find_task(query): try: if task['instance'] not in failure_instances: host = self.storage.find_host_id(task['host']) if not restore_dry_mode: Host.from_dict({"_id": host['_id'], "dns_name": task['host'], "manager": host['manager']}, conf=config).restore() self.storage.remove_task({"_id": task['_id']}) except Exception as e: self.storage.update_task(task['_id'], {"last_attempt": datetime.datetime.utcnow()}) raise e