def PUT(self, cluster_id): """ :IMPORTANT: this method should be rewritten to be more RESTful :returns: JSONized Task object. :http: * 202 (network checking task failed) * 200 (network verification task started) * 404 (cluster not found in db) """ cluster = self.get_object_or_404(Cluster, cluster_id) try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) vlan_ids = [{ 'name': n['name'], 'vlans': NetworkGroup.generate_vlan_ids_list(n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data, vlan_ids) return TaskHandler.render(task)
def _success_action(cls, task, status, progress): # check if all nodes are ready if any(map(lambda n: n.status == 'error', task.cluster.nodes)): cls._error_action(task, 'error', 100) return task_name = task.name.title() try: message = (u"{0} of environment '{1}' is done. ").format( task_name, task.cluster.name, ) except Exception as exc: logger.error(": ".join([str(exc), traceback.format_exc()])) message = u"{0} of environment '{1}' is done".format( task_name, task.cluster.name) zabbix_url = objects.Cluster.get_network_manager( task.cluster).get_zabbix_url(task.cluster) if zabbix_url: message = "{0} Access Zabbix dashboard at {1}".format( message, zabbix_url) plugins_msg = cls._make_plugins_success_message(task.cluster.plugins) if plugins_msg: message = '{0}\n\n{1}'.format(message, plugins_msg) notifier.notify("done", message, task.cluster_id) data = {'status': status, 'progress': progress, 'message': message} objects.Task.update(task, data)
def PUT(self, cluster_id): cluster = self.get_object_or_404( Cluster, cluster_id, log_404=( "warning", "Error: there is no cluster " "with id '{0}' in DB.".format(cluster_id) ) ) if not cluster.attributes: logger.error('ClusterAttributesDefaultsHandler: no attributes' ' found for cluster_id %s' % cluster_id) raise web.internalerror("No attributes found!") cluster.attributes.editable = cluster.release.attributes_metadata.get( "editable" ) self.db.commit() cluster.add_pending_changes("attributes") logger.debug('ClusterAttributesDefaultsHandler:' ' editable attributes for cluster_id %s were reset' ' to default' % cluster_id) return {"editable": cluster.attributes.editable}
def send_oswl_serialized(self, rec_data, ids): if rec_data: resp = self.send_data_to_url( url=self.build_collector_url("COLLECTOR_OSWL_INFO_URL"), data={"oswl_stats": rec_data} ) resp_dict = resp.json() if self.is_status_acceptable(resp.status_code, resp_dict["status"]): records_resp = resp_dict["oswl_stats"] status_failed = consts.LOG_RECORD_SEND_STATUS.failed saved_ids = set(r["id"] for r in records_resp if r["status"] != status_failed) failed_ids = set(r["id"] for r in records_resp if r["status"] == status_failed) sent_saved_ids = set(saved_ids) & set(ids) logger.info("OSWL info records saved: %s, failed: %s", six.text_type(list(sent_saved_ids)), six.text_type(list(failed_ids))) if sent_saved_ids: db().query(models.OpenStackWorkloadStats).filter( models.OpenStackWorkloadStats.id.in_(sent_saved_ids) ).update( {"is_sent": True}, synchronize_session=False ) db().commit() else: logger.error("Unexpected collector answer: %s", six.text_type(resp.text))
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 202 (network checking task created) * 404 (cluster not found in db) """ data = jsonutils.loads(web.data()) if data.get("networks"): data["networks"] = [n for n in data["networks"] if n.get("name") != "fuelweb_admin"] cluster = self.get_object_or_404(objects.Cluster, cluster_id) self.check_net_provider(cluster) self.check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != consts.TASK_STATUSES.error: try: if "networks" in data: self.validator.validate_networks_update(jsonutils.dumps(data)) if "dns_nameservers" in data: self.validator.validate_dns_servers_update(jsonutils.dumps(data)) objects.Cluster.get_network_manager(cluster).update(cluster, data) except Exception as exc: # set task status to error and update its corresponding data data = {"status": consts.TASK_STATUSES.error, "progress": 100, "message": six.text_type(exc)} objects.Task.update(task, data) logger.error(traceback.format_exc()) raise self.http(202, objects.Task.to_json(task))
def calculate_fault_tolerance(cls, percentage_or_value, total): """Calculates actual fault tolerance value. :param percentage_or_value: the fault tolerance as percent of nodes that can fail or actual number of nodes, the negative number means the number of nodes which have to deploy successfully. :param total: the total number of nodes in group :return: the actual number of nodes that can fail """ if percentage_or_value is None: # unattainable number return total + 1 if isinstance(percentage_or_value, six.string_types): percentage_or_value = percentage_or_value.strip() try: if (isinstance(percentage_or_value, six.string_types) and percentage_or_value[-1] == '%'): value = (int(percentage_or_value[:-1]) * total) // 100 else: value = int(percentage_or_value) if value < 0: # convert negative value to number of nodes which may fail value = max(0, total + value) return value except ValueError as e: logger.error( "Failed to handle fault_tolerance: '%s': %s. it is ignored", percentage_or_value, e ) # unattainable number return total + 1
def __init__(self): settings_files = [] logger.debug("Looking for settings.yaml package config " "using old style __file__") project_path = os.path.dirname(__file__) project_settings_file = os.path.join(project_path, "settings.yaml") settings_files.append(project_settings_file) settings_files.append("/etc/nailgun/settings.yaml") version_paths = ["/etc/fuel/version.yaml", "/etc/fuel/nailgun/version.yaml", "/etc/nailgun/version.yaml"] for path in version_paths: if os.access(path, os.R_OK): settings_files.append(path) break else: logger.error("'version.yaml' config file is not found") self.config = {} for sf in settings_files: try: logger.debug("Trying to read config file %s" % sf) self.update_from_file(sf) except Exception as e: logger.error("Error while reading config file %s: %s" % (sf, str(e))) if int(self.config.get("DEVELOPMENT")): logger.info("DEVELOPMENT MODE ON:") here = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) self.config.update( {"STATIC_DIR": os.path.join(here, "static"), "TEMPLATE_DIR": os.path.join(here, "static")} ) logger.info("Static dir is %s" % self.config.get("STATIC_DIR")) logger.info("Template dir is %s" % self.config.get("TEMPLATE_DIR"))
def PUT(self, cluster_id): data = jsonutils.loads(web.data()) cluster = self.get_object_or_404(objects.Cluster, cluster_id) self.check_net_provider(cluster) self.check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != consts.TASK_STATUSES.error: try: if "networks" in data: self.validator.validate_networks_update(jsonutils.dumps(data)) if "networking_parameters" in data: self.validator.validate_neutron_params(jsonutils.dumps(data), cluster_id=cluster_id) objects.Cluster.get_network_manager(cluster).update(cluster, data) except Exception as exc: # set task status to error and update its corresponding data data = {"status": "error", "progress": 100, "message": six.text_type(exc)} objects.Task.update(task, data) logger.error(traceback.format_exc()) raise self.http(202, objects.Task.to_json(task))
def get_file_content(self, path): try: with open(path, "r") as f: return f.read().strip() except Exception as e: logger.error("Error while reading file: %s. %s", path, six.text_type(e))
def get_metadata(self): """Get plugin data tree. :return: All plugin metadata :rtype: dict """ data_tree, report = self.loader.load() if report.is_failed(): logger.error(report.render()) logger.error('Problem with loading plugin {0}'.format( self.plugin_path)) return data_tree for field in data_tree: if field in self.attributes_processors: data_tree[field] = \ self.attributes_processors[field](data_tree.get(field)) data_tree = { k: v for k, v in six.iteritems(data_tree) if v is not None } return data_tree
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 202 (network checking task created) * 404 (cluster not found in db) """ data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: self.validator.validate_networks_update(json.dumps(data)) NetworkConfiguration.update(cluster, data) except web.webapi.badrequest as exc: TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def PUT(self, cluster_id): """:IMPORTANT: this method should be rewritten to be more RESTful :returns: JSONized Task object. :http: * 202 (network checking task failed) * 200 (network verification task started) * 404 (cluster not found in db) """ cluster = self.get_object_or_404(Cluster, cluster_id) try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) vlan_ids = [{ 'name': n['name'], 'vlans': NetworkGroup.generate_vlan_ids_list(n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data, vlan_ids) return TaskHandler.render(task)
def update_task_status(cls, uuid, status, progress, msg="", result=None): # verify_networks - task is expecting to receive result with # some data if connectivity_verification fails logger.debug("Updating task: %s", uuid) task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error( "Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status data = { 'status': status, 'progress': progress, 'message': msg, 'result': result } for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info(u"Task {0} ({1}) {2} is set to {3}".format( task.uuid, task.name, key, value)) db().add(task) db().commit() if previous_status != status and task.cluster_id: logger.debug( "Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s.", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def PUT(self, cluster_id): """:returns: JSONized Cluster attributes. :http: * 200 (OK) * 400 (wrong attributes data specified) * 404 (cluster not found in db) * 500 (cluster has no attributes) """ cluster = self.get_object_or_404( Cluster, cluster_id, log_404=( "warning", "Error: there is no cluster " "with id '{0}' in DB.".format(cluster_id) ) ) if not cluster.attributes: logger.error('ClusterAttributesDefaultsHandler: no attributes' ' found for cluster_id %s' % cluster_id) raise web.internalerror("No attributes found!") cluster.attributes.editable = cluster.release.attributes_metadata.get( "editable" ) db().commit() cluster.add_pending_changes("attributes") logger.debug('ClusterAttributesDefaultsHandler:' ' editable attributes for cluster_id %s were reset' ' to default' % cluster_id) return {"editable": cluster.attributes.editable}
def get_network_roles(cls, cluster, merge_policy): """Returns the network roles from plugins. The roles cluster and plugins will be mixed according to merge policy. """ instance_roles = cluster.release.network_roles_metadata all_roles = dict((role['id'], role) for role in instance_roles) conflict_roles = dict() for plugin in ClusterPlugin.get_enabled(cluster.id): for role in plugin.network_roles_metadata: role_id = role['id'] if role_id in all_roles: try: merge_policy.apply_patch(all_roles[role_id], role) except errors.UnresolvableConflict as e: logger.error("cannot merge plugin {0}: {1}".format( plugin.name, e)) conflict_roles[role_id] = plugin.name else: all_roles[role_id] = role if conflict_roles: raise errors.NetworkRoleConflict( "Cannot override existing network roles: '{0}' in " "plugins: '{1}'".format( ', '.join(conflict_roles), ', '.join(set(conflict_roles.values())))) return list(all_roles.values())
def PUT(self, cluster_id): data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: network_configuration = self.validator.\ validate_networks_update(json.dumps(data)) NetworkConfiguration.update(cluster, data) except web.webapi.badrequest as exc: TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def send_log_serialized(self, records, ids): if records: logger.info("Send %d action logs records", len(records)) resp = self.send_data_to_url( url=self.build_collector_url("COLLECTOR_ACTION_LOGS_URL"), data={"action_logs": records}) resp_dict = resp.json() if self.is_status_acceptable(resp.status_code, resp_dict["status"]): records_resp = resp_dict["action_logs"] saved_ids = set() failed_ids = set() for record in records_resp: if record["status"] == \ consts.LOG_RECORD_SEND_STATUS.failed: failed_ids.add(record["external_id"]) else: saved_ids.add(record["external_id"]) sent_saved_ids = set(saved_ids) & set(ids) logger.info("Action logs records saved: %s, failed: %s", six.text_type(list(sent_saved_ids)), six.text_type(list(failed_ids))) db().query(models.ActionLog).filter( models.ActionLog.id.in_(sent_saved_ids)).update( {"is_sent": True}, synchronize_session=False) db().commit() else: logger.error("Unexpected collector answer: %s", six.text_type(resp.text))
def _success_start_action(cls, task, status, progress): # check if all nodes are ready if any(map(lambda n: n.status == 'error', task.cluster.nodes)): cls._error_start_action(task, 'error', 100) return task_name = task.name.title() task_cache=cls.get_task_cache(task) try: message = ( u"The Role {0} of cluster '{1}' is success {2}" ).format( task_cache["role"], task.cluster.name, task_cache[task_cache["role"].lower()]["action"] ) except Exception as exc: logger.error(": ".join([ str(exc), traceback.format_exc() ])) message = u"{0} of environment '{1}' is done".format( task_name, task.cluster.name ) notifier.notify( "done", message, task.cluster_id ) data = {'status': status, 'progress': progress, 'message': message,'timestamp':datetime.datetime.now()} objects.Task.update(task, data)
def _call_silently(self, task, instance, *args, **kwargs): # create action_log for task al = TaskHelper.create_action_log(task) method = getattr(instance, kwargs.pop('method_name', 'execute')) if task.status == TASK_STATUSES.error: TaskHelper.update_action_log(task, al) return try: to_return = method(task, *args, **kwargs) # update action_log instance for task # for asynchronous task it will be not final update # as they also are updated in rpc receiver TaskHelper.update_action_log(task, al) return to_return except Exception as exc: err = str(exc) if any([ not hasattr(exc, "log_traceback"), hasattr(exc, "log_traceback") and exc.log_traceback ]): logger.error(traceback.format_exc()) # update task entity with given data data = {'status': 'error', 'progress': 100, 'message': err} objects.Task.update(task, data) TaskHelper.update_action_log(task, al)
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 202 (task successfully executed) * 400 (invalid object data specified) * 404 (environment is not found) * 409 (task with such parameters already exists) """ cluster = self.get_object_or_404( objects.Cluster, cluster_id, log_404=(u"warning", u"Error: there is no cluster " u"with id '{0}' in DB.".format(cluster_id))) logger.info(self.log_message.format(env_id=cluster_id)) try: task_manager = self.task_manager(cluster_id=cluster.id) task = task_manager.execute() except (errors.AlreadyExists, errors.StopAlreadyRunning) as exc: raise self.http(409, exc.message) except (errors.DeploymentNotRunning, errors.WrongNodeStatus) as exc: raise self.http(400, exc.message) except Exception as exc: logger.error( self.log_error.format(env_id=cluster_id, error=str(exc))) # let it be 500 raise raise self.http(202, self.single.to_json(task))
def PUT(self, cluster_id): data = jsonutils.loads(web.data()) cluster = self.get_object_or_404(objects.Cluster, cluster_id) self.check_net_provider(cluster) self.check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != consts.TASK_STATUSES.error: try: if 'networks' in data: self.validator.validate_networks_update( jsonutils.dumps(data)) if 'networking_parameters' in data: self.validator.validate_neutron_params( jsonutils.dumps(data), cluster_id=cluster_id) objects.Cluster.get_network_manager(cluster).update( cluster, data) except Exception as exc: # set task status to error and update its corresponding data data = { 'status': 'error', 'progress': 100, 'message': six.text_type(exc) } objects.Task.update(task, data) logger.error(traceback.format_exc()) raise self.http(202, objects.Task.to_json(task))
def _call_silently(self, task, instance, *args, **kwargs): # create action_log for task al = TaskHelper.create_action_log(task) method = getattr(instance, kwargs.pop('method_name', 'execute')) if task.status == consts.TASK_STATUSES.error: TaskHelper.update_action_log(task, al) return try: to_return = method(task, *args, **kwargs) # update action_log instance for task # for asynchronous task it will be not final update # as they also are updated in rpc receiver TaskHelper.update_action_log(task, al) return to_return except errors.NoChanges as e: self._finish_task(task, al, consts.TASK_STATUSES.ready, str(e)) except Exception as exc: if any([ not hasattr(exc, "log_traceback"), hasattr(exc, "log_traceback") and exc.log_traceback ]): logger.error(traceback.format_exc()) self._finish_task(task, al, consts.TASK_STATUSES.error, str(exc))
def download_release_resp(cls, **kwargs): logger.info("RPC method download_release_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) release_info = task.cache['args']['release_info'] release_id = release_info['release_id'] release = db().query(Release).get(release_id) if not release: logger.error( "download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = 'error' error_msg = "{0} download and preparation " \ "has failed.".format(release.name) cls._download_release_error(release_id, error_msg) elif progress == 100 and status == 'ready': cls._download_release_completed(release_id) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def stats_user_resp(cls, **kwargs): logger.info("RPC method stats_user_resp received: %s", jsonutils.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes', []) status = kwargs.get('status') error = kwargs.get('error') message = kwargs.get('msg') task = objects.Task.get_by_uuid( task_uuid, fail_if_not_found=True, lock_for_update=True) if status not in (consts.TASK_STATUSES.ready, consts.TASK_STATUSES.error): logger.debug("Task %s, id: %s in status: %s", task.name, task.id, task.status) return data = {'status': status, 'progress': 100, 'message': message} if status == consts.TASK_STATUSES.error: logger.error("Task %s, id: %s failed: %s", task.name, task.id, error) data['message'] = error objects.Task.update(task, data) cls._update_action_log_entry(status, task.name, task_uuid, nodes) logger.info("RPC method stats_user_resp processed")
def update_config_resp(cls, **kwargs): """Updates task and nodes states at the end of upload config task""" logger.info("RPC method update_config_resp received: %s" % jsonutils.dumps(kwargs)) task_uuid = kwargs["task_uuid"] message = kwargs.get("error") status = kwargs.get("status") progress = kwargs.get("progress") task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True, lock_for_update=True) q_nodes = objects.NodeCollection.filter_by_id_list(None, task.cache["nodes"]) # lock nodes for updating nodes = objects.NodeCollection.lock_for_update(q_nodes).all() if status in (consts.TASK_STATUSES.ready, consts.TASK_STATUSES.error): for node in nodes: node.status = consts.NODE_STATUSES.ready node.progress = 100 if status == consts.TASK_STATUSES.error: message = (u"Failed to update configuration on nodes:" u" {0}.").format( ", ".join(node.name for node in nodes) ) logger.error(message) notifier.notify("error", message) db().flush() data = {"status": status, "progress": progress, "message": message} objects.Task.update(task, data) cls._update_action_log_entry(status, task.name, task_uuid, nodes)
def update_task_status(cls, uuid, status, progress, msg="", result=None): logger.debug("Updating task: %s", uuid) db = orm() task = db.query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status data = {'status': status, 'progress': progress, 'message': msg, 'result': result} for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info( u"Task {0} {1} is set to {2}".format( task.uuid, key, value ) ) db.add(task) db.commit() if previous_status != status: cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def run(): logger.info("Starting standalone stats sender...") try: StatsSender().run() except (KeyboardInterrupt, SystemExit) as e: logger.error("Stats sender exception: %s", six.text_type(e)) logger.info("Stopping standalone stats sender...")
def PUT(self, cluster_id): """:returns: JSONized Cluster attributes. :http: * 200 (OK) * 400 (wrong attributes data specified) * 404 (cluster not found in db) * 500 (cluster has no attributes) """ cluster = self.get_object_or_404( objects.Cluster, cluster_id, log_404=( "error", "There is no cluster " "with id '{0}' in DB.".format(cluster_id) ) ) if not cluster.attributes: logger.error('ClusterAttributesDefaultsHandler: no attributes' ' found for cluster_id %s' % cluster_id) raise self.http(500, "No attributes found!") cluster.attributes.editable = ( objects.Cluster.get_default_editable_attributes(cluster)) objects.Cluster.add_pending_changes(cluster, "attributes") logger.debug('ClusterAttributesDefaultsHandler:' ' editable attributes for cluster_id %s were reset' ' to default' % cluster_id) return {"editable": cluster.attributes.editable}
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 200 (task successfully executed) * 202 (network checking task scheduled for execution) * 400 (data validation failed) * 404 (cluster not found in db) """ # TODO(pkaminski): this seems to be synchronous, no task needed here data = jsonutils.loads(web.data()) cluster = self.get_object_or_404(objects.Cluster, cluster_id) self.check_net_provider(cluster) self.check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != consts.TASK_STATUSES.error: try: if "networks" in data: self.validator.validate_networks_update(jsonutils.dumps(data)) if "networking_parameters" in data: self.validator.validate_neutron_params(jsonutils.dumps(data), cluster_id=cluster_id) objects.Cluster.get_network_manager(cluster).update(cluster, data) except Exception as exc: # set task status to error and update its corresponding data data = {"status": "error", "progress": 100, "message": six.text_type(exc)} objects.Task.update(task, data) logger.error(traceback.format_exc()) self.raise_task(task)
def update_task_status(cls, uuid, status, progress, msg="", result=None): logger.debug("Updating task: %s", uuid) task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return data = {'status': status, 'progress': progress, 'message': msg, 'result': result} for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info( u"Task {0} ({1}) {2} is set to {3}".format( task.uuid, task.name, key, value)) db().commit() if task.cluster_id: logger.debug("Updating cluster status: %s " "cluster_id: %s status: %s", uuid, task.cluster_id, status) cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s.", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def update_verify_networks(cls, uuid, status, progress, msg, result): #TODO(dshulyak) move network tests into ostf task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error( "Can't set status='%s', message='%s': No task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status statuses = [sub.status for sub in task.subtasks] messages = [sub.message for sub in task.subtasks] messages.append(msg) statuses.append(status) if any(st == 'error' for st in statuses): task.status = 'error' else: task.status = status or task.status task.progress = progress or task.progress task.result = result or task.result # join messages if not None or "" task.message = '\n'.join([m for m in messages if m]) db().commit() if previous_status != task.status and task.cluster_id: logger.debug( "Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid)
def launch_verify(self, cluster): try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) data["networks"] = [ n for n in data["networks"] if n.get("name") != "fuelweb_admin" ] vlan_ids = [{ 'name': n['name'], 'vlans': cluster.network_manager.generate_vlan_ids_list( data, cluster, n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) try: task = task_manager.execute(data, vlan_ids) except errors.CantRemoveOldVerificationTask: raise web.badrequest("You cannot delete running task manually") return TaskHandler.render(task)
def update_action_log(cls, task, al_instance=None): from nailgun.objects import ActionLog try: if not al_instance: al_instance = ActionLog.get_by_kwargs(task_uuid=task.uuid, action_name=task.name) # this is needed as status for check_networks task is not set to # "ready" in case of success (it is left in status "running") so # we do it here manually, there is no such issue with "error" # status though. set_to_ready_cond = (task.name == consts.TASK_NAMES.check_networks and task.status == consts.TASK_STATUSES.running) task_status = consts.TASK_STATUSES.ready if set_to_ready_cond \ else task.status if al_instance: task_cache = cls.get_task_cache(task) update_data = { "end_timestamp": datetime.datetime.utcnow(), "additional_info": { "ended_with_status": task_status, "message": "", "output": cls.sanitize_task_output(task_cache, al_instance) } } ActionLog.update(al_instance, update_data) except Exception as e: logger.error("update_action_log failed: %s", six.text_type(e))
def PUT(self, cluster_id): data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': if 'networks' in data: network_configuration = self.validator.\ validate_networks_update(json.dumps(data)) try: NetworkConfiguration.update(cluster, data) except Exception as exc: err = str(exc) TaskHelper.update_task_status(task.uuid, status="error", progress=100, msg=err) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': self.db.rollback() else: self.db.commit() raise web.accepted(data=data)
def calculate_fault_tolerance(cls, percentage_or_value, total): """Calculates actual fault tolerance value. :param percentage_or_value: the fault tolerance as percent of nodes that can fail or actual number of nodes, the negative number means the number of nodes which have to deploy successfully. :param total: the total number of nodes in group :return: the actual number of nodes that can fail """ if percentage_or_value is None: # unattainable number return total + 1 if isinstance(percentage_or_value, six.string_types): percentage_or_value = percentage_or_value.strip() try: if (isinstance(percentage_or_value, six.string_types) and percentage_or_value[-1] == '%'): value = (int(percentage_or_value[:-1]) * total) // 100 else: value = int(percentage_or_value) if value < 0: # convert negative value to number of nodes which may fail value = max(0, total + value) return value except ValueError as e: logger.error( "Failed to handle fault_tolerance: '%s': %s. it is ignored", percentage_or_value, e) # unattainable number return total + 1
def PUT(self, cluster_id): data = json.loads(web.data()) if data.get("networks"): data["networks"] = [ n for n in data["networks"] if n.get("name") != "fuelweb_admin" ] cluster = self.get_object_or_404(Cluster, cluster_id) check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: self.validator.validate_networks_update(json.dumps(data)) if 'neutron_parameters' in data: self.validator.validate_neutron_params(json.dumps(data)) NeutronNetworkConfiguration.update(cluster, data) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def send_log_serialized(self, records, ids): if records: logger.info("Send %d action logs records", len(records)) resp = self.send_data_to_url( url=self.build_collector_url("COLLECTOR_ACTION_LOGS_URL"), data={"action_logs": records} ) resp_dict = resp.json() if self.is_status_acceptable(resp.status_code, resp_dict["status"]): records_resp = resp_dict["action_logs"] saved_ids = set() failed_ids = set() for record in records_resp: if record["status"] == \ consts.LOG_RECORD_SEND_STATUS.failed: failed_ids.add(record["external_id"]) else: saved_ids.add(record["external_id"]) sent_saved_ids = set(saved_ids) & set(ids) logger.info("Action logs records saved: %s, failed: %s", six.text_type(list(sent_saved_ids)), six.text_type(list(failed_ids))) db().query(models.ActionLog).filter( models.ActionLog.id.in_(sent_saved_ids) ).update( {"is_sent": True}, synchronize_session=False ) db().commit() else: logger.error("Unexpected collector answer: %s", six.text_type(resp.text))
def update_task_status(cls, uuid, status, progress, msg="", result=None): logger.debug("Updating task: %s", uuid) task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error( "Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return data = { 'status': status, 'progress': progress, 'message': msg, 'result': result } for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info(u"Task {0} ({1}) {2} is set to {3}".format( task.uuid, task.name, key, value)) db().commit() if task.cluster_id: logger.debug( "Updating cluster status: %s " "cluster_id: %s status: %s", uuid, task.cluster_id, status) cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s.", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def execute(self): current_cluster_tasks = db().query(Task).filter_by(cluster=self.cluster, name="cluster_deletion").all() deploy_running = db().query(Task).filter_by(cluster=self.cluster, name="deploy", status="running").first() if deploy_running: logger.error(u"Deleting cluster '{0}' " "while deployment is still running".format(self.cluster.name)) logger.debug("Removing cluster tasks") for task in current_cluster_tasks: if task.status == "running": raise errors.DeletionAlreadyStarted() elif task.status in ("ready", "error"): for subtask in task.subtasks: db().delete(subtask) db().delete(task) db().commit() logger.debug("Labeling cluster nodes to delete") for node in self.cluster.nodes: node.pending_deletion = True db().add(node) db().commit() self.cluster.status = "remove" db().add(self.cluster) db().commit() logger.debug("Creating cluster deletion task") task = Task(name="cluster_deletion", cluster=self.cluster) db().add(task) db().commit() self._call_silently(task, tasks.ClusterDeletionTask) return task
def PUT(self, cluster_id): """:returns: JSONized Cluster attributes. :http: * 200 (OK) * 400 (wrong attributes data specified) * 404 (cluster not found in db) * 500 (cluster has no attributes) """ cluster = self.get_object_or_404( objects.Cluster, cluster_id, log_404=("error", "There is no cluster " "with id '{0}' in DB.".format(cluster_id))) if not cluster.attributes: logger.error('ClusterAttributesDefaultsHandler: no attributes' ' found for cluster_id %s' % cluster_id) raise self.http(500, "No attributes found!") cluster.attributes.editable = ( objects.Cluster.get_default_editable_attributes(cluster)) objects.Cluster.add_pending_changes(cluster, "attributes") logger.debug('ClusterAttributesDefaultsHandler:' ' editable attributes for cluster_id %s were reset' ' to default' % cluster_id) return {"editable": cluster.attributes.editable}
def check_redhat_credentials_resp(cls, **kwargs): logger.info("RPC method check_redhat_credentials_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) release_info = task.cache['args']['release_info'] release_id = release_info['release_id'] release = db().query(Release).get(release_id) if not release: logger.error( "download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = 'error' cls._update_release_state(release_id, 'error') # TODO(NAME): remove this ugly checks if 'Unknown error' in error_msg: error_msg = 'Failed to check Red Hat ' \ 'credentials' if error_msg != 'Task aborted': notifier.notify('error', error_msg) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def get_network_roles(cls, cluster, merge_policy): """Returns the network roles from plugins. The roles cluster and plugins will be mixed according to merge policy. """ instance_roles = cluster.release.network_roles_metadata all_roles = dict((role['id'], role) for role in instance_roles) conflict_roles = dict() for plugin in cluster.plugins: for role in plugin.network_roles_metadata: role_id = role['id'] if role_id in all_roles: try: merge_policy.apply_patch( all_roles[role_id], role ) except errors.UnresolvableConflict as e: logger.error("cannot merge plugin {0}: {1}" .format(plugin.name, e)) conflict_roles[role_id] = plugin.name else: all_roles[role_id] = role if conflict_roles: raise errors.NetworkRoleConflict( "Cannot override existing network roles: '{0}' in " "plugins: '{1}'".format( ', '.join(conflict_roles), ', '.join(set(conflict_roles.values())))) return list(all_roles.values())
def _success_action(cls, task, status, progress): # check if all nodes are ready if any(map(lambda n: n.status == "error", task.cluster.nodes)): cls._error_action(task, "error", 100) return task_name = task.name.title() try: message = (u"{0} of environment '{1}' is done. ").format(task_name, task.cluster.name) except Exception as exc: logger.error(": ".join([str(exc), traceback.format_exc()])) message = u"{0} of environment '{1}' is done".format(task_name, task.cluster.name) zabbix_url = objects.Cluster.get_network_manager(task.cluster).get_zabbix_url(task.cluster) if zabbix_url: message = "{0} Access Zabbix dashboard at {1}".format(message, zabbix_url) plugins_msg = cls._make_plugins_success_message(ClusterPlugins.get_enabled(task.cluster.id)) if plugins_msg: message = "{0}\n\n{1}".format(message, plugins_msg) cls._notify(task, consts.NOTIFICATION_TOPICS.done, message) data = {"status": status, "progress": progress, "message": message} objects.Task.update(task, data)
def update_verify_networks(cls, uuid, status, progress, msg, result): #TODO(dshulyak) move network tests into ostf task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s': No task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status statuses = [sub.status for sub in task.subtasks] messages = [sub.message for sub in task.subtasks] messages.append(msg) statuses.append(status) if any(st == 'error' for st in statuses): task.status = 'error' else: task.status = status or task.status task.progress = progress or task.progress task.result = result or task.result # join messages if not None or "" task.message = '\n'.join([m for m in messages if m]) db().commit() if previous_status != task.status and task.cluster_id: logger.debug("Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid)
def download_release_resp(cls, **kwargs): logger.info("RPC method download_release_resp received: %s" % jsonutils.dumps(kwargs)) task_uuid = kwargs.get("task_uuid") error_msg = kwargs.get("error") status = kwargs.get("status") progress = kwargs.get("progress") task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True) release_info = task.cache["args"]["release_info"] release_id = release_info["release_id"] release = db().query(Release).get(release_id) if not release: logger.error("download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = "error" error_msg = "{0} download and preparation " "has failed.".format(release.name) cls._download_release_error(release_id, error_msg) elif progress == 100 and status == "ready": cls._download_release_completed(release_id) result = {"release_info": {"release_id": release_id}} data = {"status": status, "progress": progress, "message": error_msg, "result": result} objects.Task.update(task, data)
def update_task_status(cls, uuid, status, progress, msg="", result=None): # verify_networks - task is expecting to receive result with # some data if connectivity_verification fails logger.debug("Updating task: %s", uuid) task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status data = {'status': status, 'progress': progress, 'message': msg, 'result': result} for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info( u"Task {0} ({1}) {2} is set to {3}".format( task.uuid, task.name, key, value ) ) db().add(task) db().commit() if previous_status != status and task.cluster_id: logger.debug("Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s.", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def update_action_log(cls, task, al_instance=None): from nailgun.objects import ActionLog try: if not al_instance: al_instance = ActionLog.get_by_kwargs(task_uuid=task.uuid, action_name=task.name) # this is needed as status for check_networks task is not set to # "ready" in case of success (it is left in status "running") so # we do it here manually, there is no such issue with "error" # status though. set_to_ready_cond = ( task.name == consts.TASK_NAMES.check_networks and task.status == consts.TASK_STATUSES.running ) task_status = consts.TASK_STATUSES.ready if set_to_ready_cond \ else task.status if al_instance: task_cache = cls.get_task_cache(task) update_data = { "end_timestamp": datetime.datetime.utcnow(), "additional_info": { "ended_with_status": task_status, "message": "", "output": cls.sanitize_task_output(task_cache, al_instance) } } ActionLog.update(al_instance, update_data) except Exception as e: logger.error("update_action_log failed: %s", six.text_type(e))
def execute(self): # locking required tasks locked_tasks = objects.TaskCollection.lock_cluster_tasks( self.cluster.id) # locking cluster objects.Cluster.get_by_uid(self.cluster.id, fail_if_not_found=True, lock_for_update=True) # locking nodes nodes = objects.NodeCollection.filter_by(None, cluster_id=self.cluster.id) nodes = objects.NodeCollection.order_by(nodes, 'id') objects.NodeCollection.lock_for_update(nodes).all() current_cluster_tasks = objects.TaskCollection.filter_by_list( locked_tasks, 'name', (consts.TASK_NAMES.cluster_deletion, )) deploy_running = objects.TaskCollection.filter_by( None, cluster_id=self.cluster.id, name=consts.TASK_NAMES.deploy, status=consts.TASK_STATUSES.running) deploy_running = objects.TaskCollection.order_by(deploy_running, 'id').first() if deploy_running: logger.error(u"Deleting cluster '{0}' " "while deployment is still running".format( self.cluster.name)) # Updating action logs for deploy task TaskHelper.set_ready_if_not_finished(deploy_running) logger.debug("Removing cluster tasks") for task in current_cluster_tasks: if task.status == consts.TASK_STATUSES.running: db().rollback() raise errors.DeletionAlreadyStarted() elif task.status in (consts.TASK_STATUSES.ready, consts.TASK_STATUSES.error): for subtask in task.subtasks: db().delete(subtask) db().delete(task) db().flush() logger.debug("Labeling cluster nodes to delete") for node in self.cluster.nodes: node.pending_deletion = True db().add(node) db().flush() self.cluster.status = consts.CLUSTER_STATUSES.remove db().add(self.cluster) logger.debug("Creating cluster deletion task") task = Task(name=consts.TASK_NAMES.cluster_deletion, cluster=self.cluster) db().add(task) db().commit() self._call_silently(task, tasks.ClusterDeletionTask) return task
def _success_action(cls, task, status, progress): network_manager = NetworkManager() # check if all nodes are ready if any(map(lambda n: n.status == 'error', task.cluster.nodes)): cls._error_action(task, 'error', 100) return if task.cluster.mode in ('singlenode', 'multinode'): # determining horizon url - it's an IP # of a first cluster controller controller = db().query(Node).filter_by( cluster_id=task.cluster_id).filter( Node.role_list.any(name='controller')).first() if controller: logger.debug( u"Controller is found, node_id=%s, " "getting it's IP addresses", controller.id) public_net = filter( lambda n: n['name'] == 'public' and 'ip' in n, network_manager.get_node_networks(controller.id)) if public_net: horizon_ip = public_net[0]['ip'].split('/')[0] message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at " "http://{1}/ or via internal network at http://{2}/" ).format(task.cluster.name, horizon_ip, controller.ip) else: message = ( u"Deployment of environment '{0}' is done").format( task.cluster.name) logger.warning(u"Public ip for controller node " "not found in '{0}'".format( task.cluster.name)) else: message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Controller node not found in '{0}'".format( task.cluster.name)) elif task.cluster.is_ha_mode: # determining horizon url in HA mode - it's vip # from a public network saved in task cache try: netmanager = NetworkManager() message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at {1}").format( task.cluster.name, netmanager.get_horizon_url(task.cluster.id)) except Exception as exc: logger.error(": ".join([str(exc), traceback.format_exc()])) message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Cannot find virtual IP for '{0}'".format( task.cluster.name)) notifier.notify("done", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 202 (task successfully executed) * 400 (invalid object data specified) * 404 (environment is not found) * 409 (task with such parameters already exists) """ cluster = self.get_object_or_404( objects.Cluster, cluster_id, log_404=(u"warning", u"Error: there is no cluster " u"with id '{0}' in DB.".format(cluster_id))) logger.info(self.log_message.format(env_id=cluster_id)) try: options = self.get_options() except ValueError as e: raise self.http(400, six.text_type(e)) try: self.validator.validate(cluster) except errors.NailgunException as e: raise self.http(400, e.message) if objects.Release.is_lcm_supported(cluster.release): # try to get new graph to run transaction manager try: transaction_options = self.get_transaction_options( cluster, options) except errors.NailgunException as e: logger.exception("Failed to get transaction options") raise self.http(400, msg=six.text_type(e)) if transaction_options: return self.start_transaction(cluster, transaction_options) try: task_manager = self.task_manager(cluster_id=cluster.id) task = task_manager.execute(**options) except (errors.AlreadyExists, errors.StopAlreadyRunning) as exc: raise self.http(409, exc.message) except ( errors.DeploymentNotRunning, errors.NoDeploymentTasks, errors.WrongNodeStatus, errors.UnavailableRelease, errors.CannotBeStopped, ) as exc: raise self.http(400, exc.message) except Exception as exc: logger.error( self.log_error.format(env_id=cluster_id, error=str(exc))) # let it be 500 raise self.raise_task(task)
def make_ubuntu_preferences_task(uids, repo): # NOTE(ikalnitsky): In order to implement the proper pinning, # we have to download and parse the repo's "Release" file. # Generally, that's not a good idea to make some HTTP request # from Nailgun, but taking into account that this task # will be executed in uWSGI's mule worker we can skip this # rule, because proper pinning is more valuable thing right now. template = '\n'.join([ 'Package: *', 'Pin: release {conditions}', 'Pin-Priority: {priority}']) preferences_content = [] try: release = debian.get_release_file(repo, retries=3) release = debian.parse_release_file(release) pin = debian.get_apt_preferences_line(release) except requests.exceptions.HTTPError as exc: logger.error("Failed to fetch 'Release' file due to '%s'. " "The apt preferences won't be applied for repo '%s'.", six.text_type(exc), repo['name']) return None except Exception: logger.exception("Failed to parse 'Release' file.") return None # NOTE(kozhukalov): When a package is available both in: # 1) http://archive.ubuntu.com/ubuntu trusty universe # 2) http://mirror.fuel-infra.org/mos-repos/ubuntu/7.0 mos7.0 main # And if the content of the preferences file is (i.e. by section priority): # Package: * # Pin: release o=Mirantis, a=mos7.0, n=mos7.0, l=mos7.0, c=main # Pin-Priority: 1050 # then the package available in MOS won't match the pin because for # some reason apt still thinks this package is in universe section. # As a result: # # apt-cache policy ohai # ohai: # Installed: (none) # Candidate: 6.14.0-2 # Version table: # 6.14.0-2 0 # 500 http://10.20.0.1/mirror/ubuntu/ trusty/universe amd64 Packages # 6.14.0-2~u14.04+mos1 0 # 500 http://10.20.0.2:8080/2015.1.0-7.0/ubuntu/x86_64/ mos7.0/main # amd64 Packages preferences_content.append(template.format( conditions=pin, priority=repo['priority'])) preferences_content = '\n\n'.join(preferences_content) preferences_path = '/etc/apt/preferences.d/{0}.pref'.format(repo['name']) return make_upload_task(uids, preferences_content, preferences_path)
def remove_nodes_resp(cls, **kwargs): logger.info("RPC method remove_nodes_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes') or [] error_nodes = kwargs.get('error_nodes') or [] inaccessible_nodes = kwargs.get('inaccessible_nodes') or [] error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') for node in nodes: node_db = db().query(Node).get(node['uid']) if not node_db: logger.error(u"Failed to delete node '%s': node doesn't exist", str(node)) break db().delete(node_db) for node in inaccessible_nodes: # Nodes which not answered by rpc just removed from db node_db = db().query(Node).get(node['uid']) if node_db: logger.warn(u'Node %s not answered by RPC, removing from db', node_db.human_readable_name) db().delete(node_db) for node in error_nodes: node_db = db().query(Node).get(node['uid']) if not node_db: logger.error( u"Failed to delete node '%s' marked as error from Naily:" " node doesn't exist", str(node)) break node_db.pending_deletion = False node_db.status = 'error' db().add(node_db) node['name'] = node_db.name db().commit() success_msg = u"No nodes were removed" err_msg = u"No errors occurred" if nodes: success_msg = u"Successfully removed {0} node(s)".format( len(nodes)) notifier.notify("done", success_msg) if error_nodes: err_msg = u"Failed to remove {0} node(s): {1}".format( len(error_nodes), ', '.join([ n.get('name') or "ID: {0}".format(n['uid']) for n in error_nodes ])) notifier.notify("error", err_msg) if not error_msg: error_msg = ". ".join([success_msg, err_msg]) TaskHelper.update_task_status(task_uuid, status, progress, error_msg)