Example #1
0
    def remove_cluster_resp(cls, **kwargs):
        logger.info("RPC method remove_cluster_resp received: %s" %
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')

        cls.remove_nodes_resp(**kwargs)

        task = TaskHelper.get_task_by_uuid(task_uuid)
        cluster = task.cluster

        if task.status in ('ready', ):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            ips = db().query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in cluster.network_groups]))
            map(db().delete, ips)
            db().flush()

            db().delete(cluster)
            db().flush()

            notifier.notify(
                "done", u"Environment '%s' and all its nodes are deleted" %
                (cluster_name))

        elif task.status in ('error', ):
            cluster.status = 'error'
            db().add(cluster)
            db().flush()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(task,
                                                error_types=('deletion', )))
            notifier.notify("error", task.message, cluster.id)
Example #2
0
    def _error_action(cls, task, status, progress, message=None):
        task_name = task.name.title()
        if message:
            message = u"{0} has failed. {1}".format(task_name, message)
            # in case we are sending faild task message from astute
            # we should not create a notification with it, because its add
            # a lot of clutter for user
            notify_message = message.split('\n\n')[0]
        else:
            message = u"{0} has failed. Check these nodes:\n{1}".format(
                task_name,
                cls._generate_error_message(
                    task,
                    error_types=('deploy', 'provision'),
                    names_only=True
                )
            )
            notify_message = message

        notifier.notify(
            "error",
            notify_message,
            task.cluster_id
        )
        data = {'status': status, 'progress': progress, 'message': message}
        objects.Task.update(task, data)
Example #3
0
    def dump_environment_resp(cls, **kwargs):
        logger.info(
            "RPC method dump_environment_resp received: %s" %
            jsonutils.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')
        status = kwargs.get('status')
        progress = kwargs.get('progress')
        error = kwargs.get('error')
        msg = kwargs.get('msg')

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)

        if status == 'error':
            notifier.notify('error', error)

            data = {'status': status, 'progress': 100, 'message': error}
            objects.Task.update(task, data)

        elif status == 'ready':
            dumpfile = os.path.basename(msg)
            notifier.notify('done', 'Snapshot is ready. '
                            'Visit Support page to download')
            data = {'status': status, 'progress': progress,
                    'message': '/dump/{0}'.format(dumpfile)}
            objects.Task.update(task, data)
Example #4
0
    def PUT(self):
        """:returns: node id.

        :http: * 200 (node are successfully updated)
               * 304 (node data not changed since last request)
               * 400 (data validation failed)
               * 404 (node not found)
        """
        nd = self.checked_data(
            self.validator.validate_update,
            data=web.data())

        node = self.collection.single.get_by_meta(nd)

        if not node:
            raise self.http(404, "Can't find node: {0}".format(nd))

        node.timestamp = datetime.now()

        if not node.online:
            node.online = True
            msg = u"Node '{0}' is back online".format(node.human_readable_name)
            logger.info(msg)
            notifier.notify("discover", msg, node_id=node.id)
        db().flush()

        if 'agent_checksum' in nd and (
            node.agent_checksum == nd['agent_checksum']
        ):
            return {'id': node.id, 'cached': True}

        self.collection.single.update_by_agent(node, nd)
        return {"id": node.id}
Example #5
0
 def _download_release_error(cls, release_id, error_message):
     release = db().query(Release).get(release_id)
     release.state = 'error'
     db().commit()
     # TODO(NAME): remove this ugly checks
     if error_message != 'Task aborted':
         notifier.notify('error', error_message)
Example #6
0
    def POST(self, cluster_id):
        """:returns: Http response.
        :http: * 201 (nodes are successfully assigned)
               * 400 (invalid nodes data specified)
        """
        data = self.checked_data(
            self.validator.validate_collection_update,
            cluster_id=cluster_id
        )
        nodes = self.get_objects_list_or_404(Node, data.keys())
        cluster = self.get_object_or_404(Cluster, cluster_id)
        for node in nodes:
            node.cluster = cluster
            node.pending_roles = data[node.id]
            node.pending_addition = True
            try:
                node.attributes.volumes = \
                    node.volume_manager.gen_volumes_info()
                node.cluster.add_pending_changes("disks", node_id=node.id)

                network_manager = node.cluster.network_manager
                network_manager.assign_networks_by_default(node)
            except Exception as exc:
                logger.warning(traceback.format_exc())
                notifier.notify(
                    "error",
                    u"Failed to generate attributes for node '{0}': '{1}'"
                    .format(
                        node.human_readable_name(),
                        str(exc) or u"see logs for details"
                    ),
                    node_id=node.id
                )
            db().commit()
        raise web.ok
Example #7
0
    def update_config_resp(cls, **kwargs):
        """Updates task and nodes states at the end of upload config task"""
        logger.info("RPC method update_config_resp received: %s" % jsonutils.dumps(kwargs))

        task_uuid = kwargs["task_uuid"]
        message = kwargs.get("error")
        status = kwargs.get("status")
        progress = kwargs.get("progress")

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True, lock_for_update=True)

        q_nodes = objects.NodeCollection.filter_by_id_list(None, task.cache["nodes"])
        # lock nodes for updating
        nodes = objects.NodeCollection.lock_for_update(q_nodes).all()

        if status in (consts.TASK_STATUSES.ready, consts.TASK_STATUSES.error):
            for node in nodes:
                node.status = consts.NODE_STATUSES.ready
                node.progress = 100

        if status == consts.TASK_STATUSES.error:
            message = (u"Failed to update configuration on nodes:" u" {0}.").format(
                ", ".join(node.name for node in nodes)
            )
            logger.error(message)
            notifier.notify("error", message)

        db().flush()

        data = {"status": status, "progress": progress, "message": message}
        objects.Task.update(task, data)

        cls._update_action_log_entry(status, task.name, task_uuid, nodes)
Example #8
0
    def check_redhat_credentials_resp(cls, **kwargs):
        logger.info("RPC method check_redhat_credentials_resp received: %s" % json.dumps(kwargs))
        task_uuid = kwargs.get("task_uuid")
        error_msg = kwargs.get("error")
        status = kwargs.get("status")
        progress = kwargs.get("progress")

        task = db().query(Task).filter_by(uuid=task_uuid).first()
        if not task:
            logger.error(
                "check_redhat_credentials_resp: task \
                    with UUID %s not found!",
                task_uuid,
            )
            return

        release_info = task.cache["args"]["release_info"]
        release_id = release_info["release_id"]
        release = db().query(Release).get(release_id)
        if not release:
            logger.error("download_release_resp: Release" " with ID %s not found", release_id)
            return

        if error_msg:
            status = "error"
            cls._update_release_state(release_id, "error")
            # TODO(NAME): remove this ugly checks
            if "Unknown error" in error_msg:
                error_msg = "Failed to check Red Hat " "credentials"
            if error_msg != "Task aborted":
                notifier.notify("error", error_msg)

        result = {"release_info": {"release_id": release_id}}

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
Example #9
0
    def checked_data(self, validate_method=None, **kwargs):
        try:
            data = kwargs.pop('data', web.data())
            if validate_method:
                method = validate_method
            else:
                method = self.validator.validate

            valid_data = method(data, **kwargs)
        except (
            errors.InvalidInterfacesInfo,
            errors.InvalidMetadata
        ) as exc:
            notifier.notify("error", str(exc))
            raise web.badrequest(message=str(exc))
        except (
            errors.AlreadyExists
        ) as exc:
            err = web.conflict()
            err.message = exc.message
            raise err
        except (
            errors.InvalidData,
            Exception
        ) as exc:
            raise web.badrequest(message=str(exc))
        return valid_data
Example #10
0
    def run(self):
        super(FakeDeletionThread, self).run()
        receiver = NailgunReceiver
        kwargs = {
            'task_uuid': self.task_uuid,
            'nodes': self.data['args']['nodes'],
            'status': 'ready'
        }
        nodes_to_restore = self.data['args'].get('nodes_to_restore', [])
        resp_method = getattr(receiver, self.respond_to)
        resp_method(**kwargs)

        for node_data in nodes_to_restore:
            node = Node(**node_data)

            # Offline node just deleted from db
            # and could not recreated with status
            # discover
            if not node.online:
                continue

            node.status = 'discover'
            db().add(node)
            db().commit()
            node.attributes = NodeAttributes(node_id=node.id)
            node.attributes.volumes = node.volume_manager.gen_volumes_info()
            NetworkManager.update_interfaces_info(node)
            db().commit()

            ram = round(node.meta.get('ram') or 0, 1)
            cores = node.meta.get('cores') or 'unknown'
            notifier.notify("discover",
                            "New node with %s CPU core(s) "
                            "and %s GB memory is discovered" %
                            (cores, ram), node_id=node.id)
Example #11
0
    def check_redhat_credentials_resp(cls, **kwargs):
        logger.info("RPC method check_redhat_credentials_resp received: %s" %
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        error_msg = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = TaskHelper.get_task_by_uuid(task_uuid)

        release_info = task.cache['args']['release_info']
        release_id = release_info['release_id']
        release = db().query(Release).get(release_id)
        if not release:
            logger.error(
                "download_release_resp: Release"
                " with ID %s not found", release_id)
            return

        if error_msg:
            status = 'error'
            cls._update_release_state(release_id, 'error')
            # TODO(NAME): remove this ugly checks
            if 'Unknown error' in error_msg:
                error_msg = 'Failed to check Red Hat ' \
                            'credentials'
            if error_msg != 'Task aborted':
                notifier.notify('error', error_msg)

        result = {"release_info": {"release_id": release_id}}

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg,
                                      result)
Example #12
0
    def PUT(self, node_id):
        """:returns: JSONized Node object.
        :http: * 200 (OK)
               * 400 (invalid node data specified)
               * 404 (node not found in db)
        """
        node = self.get_object_or_404(Node, node_id)
        if not node.attributes:
            node.attributes = NodeAttributes(node_id=node.id)

        data = self.checked_data(self.validator.validate_update)

        network_manager = NetworkManager()

        old_cluster_id = node.cluster_id

        if data.get("pending_roles") == [] and node.cluster:
            node.cluster.clear_pending_changes(node_id=node.id)

        if "cluster_id" in data:
            if data["cluster_id"] is None and node.cluster:
                node.cluster.clear_pending_changes(node_id=node.id)
                node.roles = node.pending_roles = []
            node.cluster_id = data["cluster_id"]
            if node.cluster_id != old_cluster_id:
                if old_cluster_id:
                    network_manager.clear_assigned_networks(node)
                    network_manager.clear_all_allowed_networks(node.id)
                if node.cluster_id:
                    network_manager.assign_networks_by_default(node)
                    network_manager.allow_network_assignment_to_all_interfaces(
                        node)

        regenerate_volumes = any(
            ('roles' in data
             and set(data['roles']) != set(node.roles), 'pending_roles' in data
             and set(data['pending_roles']) != set(node.pending_roles),
             node.cluster_id != old_cluster_id))

        for key, value in data.iteritems():
            # we don't allow to update id explicitly
            # and updated cluster_id before all other fields
            if key in ("id", "cluster_id"):
                continue
            setattr(node, key, value)

        if not node.status in ('provisioning',
                               'deploying') and regenerate_volumes:
            try:
                node.attributes.volumes = \
                    node.volume_manager.gen_volumes_info()
            except Exception as exc:
                msg = (u"Failed to generate volumes "
                       "info for node '{0}': '{1}'").format(
                           node.name or data.get("mac") or data.get("id"),
                           str(exc) or "see logs for details")
                logger.warning(traceback.format_exc())
                notifier.notify("error", msg, node_id=node.id)
        db().commit()
        return self.render(node)
Example #13
0
 def _download_release_error(cls, release_id, error_message):
     release = db().query(Release).get(release_id)
     release.state = "error"
     db().flush()
     # TODO(NAME): remove this ugly checks
     if error_message != "Task aborted":
         notifier.notify("error", error_message)
Example #14
0
    def dump_environment_resp(cls, **kwargs):
        logger.info(
            "RPC method dump_environment_resp received: %s" %
            jsonutils.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')
        status = kwargs.get('status')
        progress = kwargs.get('progress')
        error = kwargs.get('error')
        msg = kwargs.get('msg')

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)

        if status == 'error':
            notifier.notify('error', error)

            data = {'status': status, 'progress': 100, 'message': error}
            objects.Task.update(task, data)

        elif status == 'ready':
            dumpfile = os.path.basename(msg)
            notifier.notify('done', 'Snapshot is ready. '
                            'Visit Support page to download')
            dumpfile_url = reverse('SnapshotDownloadHandler',
                                   kwargs={'snapshot_name': dumpfile})
            data = {'status': status, 'progress': progress,
                    'message': dumpfile_url}
            objects.Task.update(task, data)
Example #15
0
    def fail(self, transaction, reason):
        objects.Transaction.on_finish(
            transaction, consts.TASK_STATUSES.error, message=reason
        )
        helpers.TaskHelper.update_action_log(transaction)
        for sub_transaction in transaction.subtasks:
            if sub_transaction.status == consts.TASK_STATUSES.pending:
                # on_start and on_finish called to properly handle
                # status transition
                objects.Transaction.on_start(sub_transaction)
                objects.Transaction.on_finish(
                    sub_transaction, consts.TASK_STATUSES.error, "Aborted"
                )

        _update_cluster_status(transaction)
        notifier.notify(
            consts.NOTIFICATION_TOPICS.error,
            "Graph execution failed with error: '{0}'."
            "Please check deployment history for more details."
            .format(reason),
            transaction.cluster_id,
            None,
            task_uuid=transaction.uuid
        )
        return True
Example #16
0
    def _error_action(cls, task, status, progress, message=None):
        task_name = task.name.title()
        if message:
            message = u"{0} has failed. {1}".format(task_name, message)
            # in case we are sending faild task message from astute
            # we should not create a notification with it, because its add
            # a lot of clutter for user
            notify_message = message.split('\n\n')[0]
        else:
            message = u"{0} has failed. Check these nodes:\n{1}".format(
                task_name,
                cls._generate_error_message(
                    task,
                    error_types=('deploy', 'provision'),
                    names_only=True
                )
            )
            notify_message = message

        notifier.notify(
            "error",
            notify_message,
            task.cluster_id
        )
        data = {'status': status, 'progress': progress, 'message': message}
        objects.Task.update(task, data)
Example #17
0
    def _success_action(cls, task, status, progress):
        # check if all nodes are ready
        if any(map(lambda n: n.status == 'error', task.cluster.nodes)):
            cls._error_action(task, 'error', 100)
            return

        task_name = task.name.title()
        try:
            message = (u"{0} of environment '{1}' is done. ").format(
                task_name,
                task.cluster.name,
            )
        except Exception as exc:
            logger.error(": ".join([str(exc), traceback.format_exc()]))
            message = u"{0} of environment '{1}' is done".format(
                task_name, task.cluster.name)

        zabbix_url = objects.Cluster.get_network_manager(
            task.cluster).get_zabbix_url(task.cluster)

        if zabbix_url:
            message = "{0} Access Zabbix dashboard at {1}".format(
                message, zabbix_url)

        plugins_msg = cls._make_plugins_success_message(task.cluster.plugins)
        if plugins_msg:
            message = '{0}\n\n{1}'.format(message, plugins_msg)

        notifier.notify("done", message, task.cluster_id)
        data = {'status': status, 'progress': progress, 'message': message}
        objects.Task.update(task, data)
Example #18
0
    def fail(self, transaction, reason):
        objects.Transaction.on_finish(
            transaction, consts.TASK_STATUSES.error, message=reason
        )
        helpers.TaskHelper.update_action_log(transaction)
        for sub_transaction in transaction.subtasks:
            if sub_transaction.status == consts.TASK_STATUSES.pending:
                # on_start and on_finish called to properly handle
                # status transition
                objects.Transaction.on_start(sub_transaction)
                objects.Transaction.on_finish(
                    sub_transaction, consts.TASK_STATUSES.error, "Aborted"
                )

        _update_cluster_status(transaction)
        notifier.notify(
            consts.NOTIFICATION_TOPICS.error,
            "Graph execution failed with error: '{0}'."
            "Please check deployment history for more details."
            .format(reason),
            transaction.cluster_id,
            None,
            task_uuid=transaction.uuid
        )
        return True
Example #19
0
    def remove_cluster_resp(cls, **kwargs):
        logger.info(
            "RPC method remove_cluster_resp received: %s" %
            jsonutils.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')

        # in remove_nodes_resp method all objects are already locked
        cls.remove_nodes_resp(**kwargs)

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)
        cluster = task.cluster

        if task.status in ('ready',):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            ips = db().query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in cluster.network_groups])
            )
            for ip in ips:
                db().delete(ip)
            db().flush()

            nm = objects.Cluster.get_network_manager(cluster)
            admin_nets = nm.get_admin_networks()
            objects.Task.delete(task)
            for task_ in cluster.tasks:
                if task_ != task:
                    objects.Transaction.delete(task_)

            objects.Cluster.delete(cluster)
            if admin_nets != nm.get_admin_networks():
                # import it here due to cyclic dependencies problem
                from nailgun.task.manager import UpdateDnsmasqTaskManager
                UpdateDnsmasqTaskManager().execute()

            notifier.notify(
                "done",
                u"Environment '%s' and all its nodes are deleted" % (
                    cluster_name
                )
            )

        elif task.status in ('error',):
            cluster.status = 'error'
            db().add(cluster)
            db().flush()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(
                        task,
                        error_types=('deletion',)
                    )
                )
            notifier.notify(
                "error",
                task.message,
                cluster.id
            )
Example #20
0
 def _error_start_action(cls, task, status, progress, message=None):
     task_cache=cls.get_task_cache(task)
     task_name = task.name.title()
     if message:
         message = u"The Role {0} of cluster {1} has failed {2}.".format(
             task_cache["role"],
             task.cluster.name,
             task_cache[task_cache["role"].lower()]["action"]
         )
     else:
         message = u"The Role {0} of cluster {1} has failed {2}. Check these nodes:\n{3}".format(
             task_cache["role"],
             task.cluster.name,
             task_cache[task_cache["role"].lower()]["action"],
             cls._generate_error_message(
                 task,
                 error_types=('deploy', 'provision'),
                 names_only=True
             )
         )
     notifier.notify(
         "error",
         message,
         task.cluster_id
     )
     data = {'status': status, 'progress': progress, 'message': message,'timestamp':datetime.datetime.now()}
     objects.Task.update(task, data)
     cls.rollback_role_status(task)
Example #21
0
    def stop_deployment_resp(cls, **kwargs):
        logger.info("RPC method stop_deployment_resp received: %s" % jsonutils.dumps(kwargs))
        task_uuid = kwargs.get("task_uuid")
        nodes = kwargs.get("nodes", [])
        ia_nodes = kwargs.get("inaccessible_nodes", [])
        message = kwargs.get("error")
        status = kwargs.get("status")
        progress = kwargs.get("progress")

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)

        stopping_task_names = [consts.TASK_NAMES.deploy, consts.TASK_NAMES.deployment, consts.TASK_NAMES.provision]

        q_stop_tasks = objects.TaskCollection.filter_by_list(None, "name", stopping_task_names)
        q_stop_tasks = objects.TaskCollection.filter_by(q_stop_tasks, cluster_id=task.cluster_id)
        stop_tasks = objects.TaskCollection.order_by(q_stop_tasks, "id").all()

        # Locking cluster
        objects.Cluster.get_by_uid(task.cluster_id, fail_if_not_found=True, lock_for_update=True)

        if not stop_tasks:
            logger.warning(
                "stop_deployment_resp: deployment tasks \
                            not found for environment '%s'!",
                task.cluster_id,
            )

        if status == consts.TASK_STATUSES.ready:
            task.cluster.status = consts.CLUSTER_STATUSES.stopped

            if stop_tasks:
                map(db().delete, stop_tasks)

            node_uids = [n["uid"] for n in itertools.chain(nodes, ia_nodes)]
            q_nodes = objects.NodeCollection.filter_by_id_list(None, node_uids)
            q_nodes = objects.NodeCollection.filter_by(q_nodes, cluster_id=task.cluster_id)
            q_nodes = objects.NodeCollection.order_by(q_nodes, "id")
            q_nodes = objects.NodeCollection.lock_for_update(q_nodes)

            # locking Nodes for update
            update_nodes = objects.NodeCollection.lock_for_update(q_nodes).all()

            for node in update_nodes:
                objects.Node.reset_to_discover(node)

            if ia_nodes:
                cls._notify_inaccessible(task.cluster_id, [n["uid"] for n in ia_nodes], u"deployment stopping")

            message = (
                u"Deployment of environment '{0}' was successfully stopped. "
                u"Please make changes and reset the environment "
                u"if you want to redeploy it.".format(task.cluster.name or task.cluster_id)
            )

            notifier.notify("done", message, task.cluster_id)

        data = {"status": status, "progress": progress, "message": message}
        objects.Task.update(task, data)

        cls._update_action_log_entry(status, task.name, task_uuid, nodes)
Example #22
0
 def _success_start_action(cls, task, status, progress):
     # check if all nodes are ready
     if any(map(lambda n: n.status == 'error',
                task.cluster.nodes)):
         cls._error_start_action(task, 'error', 100)
         return
     task_name = task.name.title()
     task_cache=cls.get_task_cache(task)
     try:      
         message = (
             u"The Role {0} of cluster '{1}' is success {2}"
         ).format(
             task_cache["role"],
             task.cluster.name,
             task_cache[task_cache["role"].lower()]["action"]
         )
     except Exception as exc:
         logger.error(": ".join([
             str(exc),
             traceback.format_exc()
         ]))
         message = u"{0} of environment '{1}' is done".format(
             task_name,
             task.cluster.name
         )
             
     notifier.notify(
         "done",
         message,
         task.cluster_id
     )
     data = {'status': status, 'progress': progress, 'message': message,'timestamp':datetime.datetime.now()}
     objects.Task.update(task, data)
Example #23
0
    def remove_cluster_resp(cls, **kwargs):
        logger.info("RPC method remove_cluster_resp received: %s" % jsonutils.dumps(kwargs))
        task_uuid = kwargs.get("task_uuid")

        # in remove_nodes_resp method all objects are already locked
        cls.remove_nodes_resp(**kwargs)

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)
        cluster = task.cluster

        if task.status in ("ready",):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            ips = db().query(IPAddr).filter(IPAddr.network.in_([n.id for n in cluster.network_groups]))
            map(db().delete, ips)
            db().flush()

            cls._remove_cluster_relationdata(cluster.id)

            db().delete(cluster)
            db().flush()

            notifier.notify("done", u"环境 '%s' 和所包含的所有节点都已经被删除" % (cluster_name))

        elif task.status in ("error",):
            cluster.status = "error"
            db().add(cluster)
            db().flush()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(task, error_types=("deletion",))
                )
            notifier.notify("error", task.message, cluster.id)
Example #24
0
    def POST(self, cluster_id):
        """:returns: Http response.
        :http: * 201 (nodes are successfully assigned)
               * 400 (invalid nodes data specified)
        """
        data = self.checked_data(self.validator.validate_collection_update,
                                 cluster_id=cluster_id)
        nodes = self.get_objects_list_or_404(Node, data.keys())
        cluster = self.get_object_or_404(Cluster, cluster_id)
        for node in nodes:
            node.cluster = cluster
            node.pending_roles = data[node.id]
            node.pending_addition = True
            try:
                node.attributes.volumes = \
                    node.volume_manager.gen_volumes_info()
                node.cluster.add_pending_changes("disks", node_id=node.id)

                network_manager = node.cluster.network_manager
                network_manager.assign_networks_by_default(node)
            except Exception as exc:
                logger.warning(traceback.format_exc())
                notifier.notify(
                    "error",
                    u"Failed to generate attributes for node '{0}': '{1}'".
                    format(node.human_readable_name(),
                           str(exc) or u"see logs for details"),
                    node_id=node.id)
            db().commit()
        raise web.ok
Example #25
0
    def reset_environment_resp(cls, **kwargs):
        logger.info("RPC method reset_environment_resp received: %s",
                    jsonutils.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes', [])
        #ia_nodes代表不可访问的节点
        ia_nodes = kwargs.get('inaccessible_nodes', [])
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = objects.Task.get_by_uuid(task_uuid,
                                        fail_if_not_found=True,
                                        lock_for_update=True)

        # Locking cluster
        objects.Cluster.get_by_uid(task.cluster_id,
                                   fail_if_not_found=True,
                                   lock_for_update=True)

        if status == consts.TASK_STATUSES.ready:
            # restoring pending changes
            task.cluster.status = consts.CLUSTER_STATUSES.new
            objects.Cluster.add_pending_changes(
                task.cluster, consts.CLUSTER_CHANGES.attributes)
            objects.Cluster.add_pending_changes(
                task.cluster, consts.CLUSTER_CHANGES.networks)

            #删除cluster_deploy_msg表和cluster_role_status表和cluster_setting_info表
            cls._remove_cluster_relationdata(task.cluster_id)

            #itertools.chain可以把一组迭代对象串联起来,形成一个更大的迭代器
            node_uids = [n["uid"] for n in itertools.chain(nodes, ia_nodes)]
            q_nodes = objects.NodeCollection.filter_by_id_list(None, node_uids)
            q_nodes = objects.NodeCollection.filter_by(
                q_nodes, cluster_id=task.cluster_id)
            q_nodes = objects.NodeCollection.order_by(q_nodes, 'id')

            # locking Nodes for update
            update_nodes = objects.NodeCollection.lock_for_update(
                q_nodes).all()

            for node in update_nodes:
                objects.Node.reset_to_discover(node)

            if ia_nodes:
                cls._notify_inaccessible(task.cluster_id,
                                         [n["uid"] for n in ia_nodes],
                                         u"environment resetting")

            message = (u"环境 '{0}' "
                       u"已经成功被重置".format(task.cluster.name or task.cluster_id))

            notifier.notify("done", message, task.cluster_id)

        data = {'status': status, 'progress': progress, 'message': message}
        objects.Task.update(task, data)

        cls._update_action_log_entry(status, task_uuid, nodes)
Example #26
0
 def _download_release_completed(cls, release_id):
     release = db().query(Release).get(release_id)
     release.state = 'available'
     db().flush()
     success_msg = u"Successfully downloaded {0}".format(
         release.name
     )
     notifier.notify("done", success_msg)
Example #27
0
 def _download_release_completed(cls, release_id):
     release = db().query(Release).get(release_id)
     release.state = 'available'
     db().commit()
     success_msg = u"Successfully downloaded {0}".format(
         release.name
     )
     notifier.notify("done", success_msg)
Example #28
0
    def stop_deployment_resp(cls, **kwargs):
        logger.info("RPC method stop_deployment_resp received: %s" %
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes', [])
        ia_nodes = kwargs.get('inaccessible_nodes', [])
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = TaskHelper.get_task_by_uuid(task_uuid)

        stop_tasks = db().query(Task).filter_by(
            cluster_id=task.cluster_id, ).filter(
                Task.name.in_(["deploy", "deployment", "provision"])).all()
        if not stop_tasks:
            logger.warning(
                "stop_deployment_resp: deployment tasks \
                            not found for environment '%s'!", task.cluster_id)

        if status == "ready":
            task.cluster.status = "stopped"

            if stop_tasks:
                map(db().delete, stop_tasks)

            db().commit()

            update_nodes = db().query(Node).filter(
                Node.id.in_(
                    [n["uid"] for n in itertools.chain(nodes, ia_nodes)]),
                Node.cluster_id == task.cluster_id).yield_per(100)

            update_nodes.update(
                {
                    "online": False,
                    "status": "discover",
                    "pending_addition": True
                },
                synchronize_session='fetch')

            for n in update_nodes:
                n.roles, n.pending_roles = n.pending_roles, n.roles

            db().commit()

            if ia_nodes:
                cls._notify_inaccessible(task.cluster_id,
                                         [n["uid"] for n in ia_nodes],
                                         u"deployment stopping")

            message = (u"Deployment of environment '{0}' "
                       u"was successfully stopped".format(task.cluster.name
                                                          or task.cluster_id))

            notifier.notify("done", message, task.cluster_id)

        TaskHelper.update_task_status(task_uuid, status, progress, message)
Example #29
0
    def PUT(self, node_id):
        """:returns: JSONized Node object.
        :http: * 200 (OK)
               * 400 (invalid node data specified)
               * 404 (node not found in db)
        """
        node = self.get_object_or_404(Node, node_id)
        if not node.attributes:
            node.attributes = NodeAttributes(node_id=node.id)

        data = self.checked_data(self.validator.validate_update)

        network_manager = NetworkManager()

        old_cluster_id = node.cluster_id

        if data.get("pending_roles") == [] and node.cluster:
            node.cluster.clear_pending_changes(node_id=node.id)

        if "cluster_id" in data:
            if data["cluster_id"] is None and node.cluster:
                node.cluster.clear_pending_changes(node_id=node.id)
                node.roles = node.pending_roles = []
            node.cluster_id = data["cluster_id"]
            if node.cluster_id != old_cluster_id:
                if old_cluster_id:
                    network_manager.clear_assigned_networks(node)
                    network_manager.clear_all_allowed_networks(node.id)
                if node.cluster_id:
                    network_manager.assign_networks_by_default(node)
                    network_manager.allow_network_assignment_to_all_interfaces(node)

        regenerate_volumes = any(
            (
                "roles" in data and set(data["roles"]) != set(node.roles),
                "pending_roles" in data and set(data["pending_roles"]) != set(node.pending_roles),
                node.cluster_id != old_cluster_id,
            )
        )

        for key, value in data.iteritems():
            # we don't allow to update id explicitly
            # and updated cluster_id before all other fields
            if key in ("id", "cluster_id"):
                continue
            setattr(node, key, value)

        if not node.status in ("provisioning", "deploying") and regenerate_volumes:
            try:
                node.attributes.volumes = node.volume_manager.gen_volumes_info()
            except Exception as exc:
                msg = (u"Failed to generate volumes " "info for node '{0}': '{1}'").format(
                    node.name or data.get("mac") or data.get("id"), str(exc) or "see logs for details"
                )
                logger.warning(traceback.format_exc())
                notifier.notify("error", msg, node_id=node.id)
        db().commit()
        return self.render(node)
Example #30
0
    def _success_action(cls, task, status, progress):
        network_manager = NetworkManager()
        # check if all nodes are ready
        if any(map(lambda n: n.status == 'error', task.cluster.nodes)):
            cls._error_action(task, 'error', 100)
            return

        if task.cluster.mode in ('singlenode', 'multinode'):
            # determining horizon url - it's an IP
            # of a first cluster controller
            controller = db().query(Node).filter_by(
                cluster_id=task.cluster_id).filter(
                    Node.role_list.any(name='controller')).first()
            if controller:
                logger.debug(
                    u"Controller is found, node_id=%s, "
                    "getting it's IP addresses", controller.id)
                public_net = filter(
                    lambda n: n['name'] == 'public' and 'ip' in n,
                    network_manager.get_node_networks(controller.id))
                if public_net:
                    horizon_ip = public_net[0]['ip'].split('/')[0]
                    message = (
                        u"Deployment of environment '{0}' is done. "
                        "Access the OpenStack dashboard (Horizon) at "
                        "http://{1}/ or via internal network at http://{2}/"
                    ).format(task.cluster.name, horizon_ip, controller.ip)
                else:
                    message = (
                        u"Deployment of environment '{0}' is done").format(
                            task.cluster.name)
                    logger.warning(u"Public ip for controller node "
                                   "not found in '{0}'".format(
                                       task.cluster.name))
            else:
                message = (u"Deployment of environment"
                           " '{0}' is done").format(task.cluster.name)
                logger.warning(u"Controller node not found in '{0}'".format(
                    task.cluster.name))
        elif task.cluster.is_ha_mode:
            # determining horizon url in HA mode - it's vip
            # from a public network saved in task cache
            try:
                netmanager = NetworkManager()
                message = (
                    u"Deployment of environment '{0}' is done. "
                    "Access the OpenStack dashboard (Horizon) at {1}").format(
                        task.cluster.name,
                        netmanager.get_horizon_url(task.cluster.id))
            except Exception as exc:
                logger.error(": ".join([str(exc), traceback.format_exc()]))
                message = (u"Deployment of environment"
                           " '{0}' is done").format(task.cluster.name)
                logger.warning(u"Cannot find virtual IP for '{0}'".format(
                    task.cluster.name))

        notifier.notify("done", message, task.cluster_id)
        TaskHelper.update_task_status(task.uuid, status, progress, message)
Example #31
0
 def _error_action(cls, task, status, progress, message=None):
     if message:
         message = u"Deployment has failed. {0}".format(message)
     else:
         message = u"Deployment has failed. Check these nodes:\n{0}".format(
             cls._generate_error_message(task, error_types=("deploy", "provision"), names_only=True)
         )
     notifier.notify("error", message, task.cluster_id)
     TaskHelper.update_task_status(task.uuid, status, progress, message)
Example #32
0
    def remove_nodes_resp(cls, **kwargs):
        logger.info("RPC method remove_nodes_resp received: %s" %
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes') or []
        error_nodes = kwargs.get('error_nodes') or []
        inaccessible_nodes = kwargs.get('inaccessible_nodes') or []
        error_msg = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        for node in nodes:
            node_db = db().query(Node).get(node['uid'])
            if not node_db:
                logger.error(u"Failed to delete node '%s': node doesn't exist",
                             str(node))
                break
            db().delete(node_db)

        for node in inaccessible_nodes:
            # Nodes which not answered by rpc just removed from db
            node_db = db().query(Node).get(node['uid'])
            if node_db:
                logger.warn(u'Node %s not answered by RPC, removing from db',
                            node_db.human_readable_name)
                db().delete(node_db)

        for node in error_nodes:
            node_db = db().query(Node).get(node['uid'])
            if not node_db:
                logger.error(
                    u"Failed to delete node '%s' marked as error from Naily:"
                    " node doesn't exist", str(node))
                break
            node_db.pending_deletion = False
            node_db.status = 'error'
            db().add(node_db)
            node['name'] = node_db.name
        db().commit()

        success_msg = u"No nodes were removed"
        err_msg = u"No errors occurred"
        if nodes:
            success_msg = u"Successfully removed {0} node(s)".format(
                len(nodes))
            notifier.notify("done", success_msg)
        if error_nodes:
            err_msg = u"Failed to remove {0} node(s): {1}".format(
                len(error_nodes), ', '.join([
                    n.get('name') or "ID: {0}".format(n['uid'])
                    for n in error_nodes
                ]))
            notifier.notify("error", err_msg)
        if not error_msg:
            error_msg = ". ".join([success_msg, err_msg])

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
Example #33
0
 def _notify_inaccessible(cls, cluster_id, nodes_uids, action):
     ia_nodes_db = db().query(Node.name).filter(
         Node.id.in_(nodes_uids),
         Node.cluster_id == cluster_id).order_by(Node.id).yield_per(100)
     ia_message = (u"Fuel couldn't reach these nodes during "
                   u"{0}: {1}. Manual check may be needed.".format(
                       action, u", ".join(
                           [u"'{0}'".format(n.name) for n in ia_nodes_db])))
     notifier.notify("warning", ia_message, cluster_id)
Example #34
0
def _update_nodes(transaction, nodes_instances, nodes_params):
    allow_update = {
        'name',
        'status',
        'hostname',
        'kernel_params',
        'pending_addition',
        'pending_deletion',
        'error_msg',
        'online',
        'progress',
    }

    # dry-run transactions must not update nodes except progress column
    if transaction.dry_run:
        allow_update = {'progress'}

    for node in nodes_instances:
        node_params = nodes_params.pop(node.uid)

        for param in allow_update.intersection(node_params):
            if param == 'status':
                new_status = node_params['status']
                if new_status == 'deleted':
                    # the deleted is special status which causes
                    # to delete node from cluster
                    objects.Node.remove_from_cluster(node)
                elif new_status == 'error':
                    # TODO(bgaifullin) do not persist status in DB
                    node.status = new_status
                    node.error_type = node_params.get(
                        'error_type', consts.NODE_ERRORS.deploy)
                    node.progress = 100
                    # Notification on particular node failure
                    notifier.notify(consts.NOTIFICATION_TOPICS.error,
                                    u"Node '{0}' failed: {1}".format(
                                        node.name,
                                        node_params.get(
                                            'error_msg', "Unknown error")),
                                    cluster_id=transaction.cluster_id,
                                    node_id=node.uid,
                                    task_uuid=transaction.uuid)
                elif new_status == 'ready':
                    # TODO(bgaifullin) need to remove pengind roles concept
                    node.roles = list(set(node.roles + node.pending_roles))
                    node.pending_roles = []
                    node.progress = 100
                    node.status = new_status
                else:
                    node.status = new_status
            else:
                setattr(node, param, node_params[param])
    db.flush()

    if nodes_params:
        logger.warning("The following nodes are not found: %s",
                       ",".join(sorted(nodes_params.keys())))
Example #35
0
 def _notify_inaccessible(cls, cluster_id, nodes_uids, action):
     ia_nodes_db = db().query(Node.name).filter(
         Node.id.in_(nodes_uids),
         Node.cluster_id == cluster_id).order_by(Node.id).yield_per(100)
     ia_message = (u"Fuel couldn't reach these nodes during "
                   u"{0}: {1}. Manual check may be needed.".format(
                       action, u", ".join(
                           [u"'{0}'".format(n.name) for n in ia_nodes_db])))
     notifier.notify("warning", ia_message, cluster_id)
Example #36
0
    def remove_cluster_resp(cls, **kwargs):
        network_manager = NetworkManager()
        logger.info(
            "RPC method remove_cluster_resp received: %s" %
            json.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')

        cls.remove_nodes_resp(**kwargs)

        task = db().query(Task).filter_by(uuid=task_uuid).first()
        cluster = task.cluster

        if task.status in ('ready',):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            nws = itertools.chain(
                *[n.networks for n in cluster.network_groups]
            )
            ips = db().query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in nws])
            )
            map(db().delete, ips)
            db().commit()

            db().delete(cluster)
            db().commit()

            # Dmitry's hack for clearing VLANs without networks
            network_manager.clear_vlans()

            notifier.notify(
                "done",
                u"Environment '%s' and all its nodes are deleted" % (
                    cluster_name
                )
            )

        elif task.status in ('error',):
            cluster.status = 'error'
            db().add(cluster)
            db().commit()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(
                        task,
                        error_types=('deletion',)
                    )
                )
            notifier.notify(
                "error",
                task.message,
                cluster.id
            )
Example #37
0
    def reset_environment_resp(cls, **kwargs):
        logger.info("RPC method reset_environment_resp received: %s",
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes', [])
        ia_nodes = kwargs.get('inaccessible_nodes', [])
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = TaskHelper.get_task_by_uuid(task_uuid)

        if status == "ready":

            # restoring pending changes
            task.cluster.status = "new"
            objects.Cluster.add_pending_changes(task.cluster, "attributes")
            objects.Cluster.add_pending_changes(task.cluster, "networks")

            for node in task.cluster.nodes:
                objects.Cluster.add_pending_changes(task.cluster,
                                                    "disks",
                                                    node_id=node.id)

            update_nodes = db().query(Node).filter(
                Node.id.in_(
                    [n["uid"] for n in itertools.chain(nodes, ia_nodes)]),
                Node.cluster_id == task.cluster_id).yield_per(100)

            update_nodes.update(
                {
                    "online": False,
                    "status": "discover",
                    "pending_addition": True,
                    "pending_deletion": False,
                },
                synchronize_session='fetch')

            for n in update_nodes:
                n.roles, n.pending_roles = n.pending_roles, n.roles

            db().commit()

            if ia_nodes:
                cls._notify_inaccessible(task.cluster_id,
                                         [n["uid"] for n in ia_nodes],
                                         u"environment resetting")

            message = (u"Environment '{0}' "
                       u"was successfully reset".format(task.cluster.name
                                                        or task.cluster_id))

            notifier.notify("done", message, task.cluster_id)

        TaskHelper.update_task_status(task.uuid, status, progress, message)
Example #38
0
 def success(self, transaction):
     objects.Transaction.on_finish(transaction, consts.TASK_STATUSES.ready)
     helpers.TaskHelper.update_action_log(transaction)
     _update_cluster_status(transaction)
     notifier.notify(
         consts.NOTIFICATION_TOPICS.done,
         "Graph execution has been successfully completed."
         "You can check deployment history for detailed information.",
         transaction.cluster_id,
         None,
         task_uuid=transaction.uuid)
Example #39
0
 def success(self, transaction):
     objects.Transaction.update(transaction, {"status": consts.TASK_STATUSES.ready, "progress": 100})
     _update_cluster_status(transaction)
     notifier.notify(
         consts.NOTIFICATION_TOPICS.done,
         "Graph execution has been successfully completed."
         "You can check deployment history for detailed information.",
         transaction.cluster_id,
         None,
         task_uuid=transaction.uuid,
     )
Example #40
0
def _update_nodes(transaction, nodes_instances, nodes_params):
    allow_update = {
        "name",
        "status",
        "hostname",
        "kernel_params",
        "pending_addition",
        "pending_deletion",
        "error_msg",
        "online",
        "progress",
    }

    # dry-run transactions must not update nodes except progress column
    if transaction.dry_run:
        allow_update = {"progress"}

    for node in nodes_instances:
        node_params = nodes_params.pop(node.uid)

        for param in allow_update.intersection(node_params):
            if param == "status":
                new_status = node_params["status"]
                if new_status == "deleted":
                    # the deleted is special status which causes
                    # to delete node from cluster
                    objects.Node.remove_from_cluster(node)
                elif new_status == "error":
                    # TODO(bgaifullin) do not persist status in DB
                    node.status = new_status
                    node.error_type = node_params.get("error_type", consts.NODE_ERRORS.deploy)
                    node.progress = 100
                    # Notification on particular node failure
                    notifier.notify(
                        consts.NOTIFICATION_TOPICS.error,
                        u"Node '{0}' failed: {1}".format(node.name, node_params.get("error_msg", "Unknown error")),
                        cluster_id=transaction.cluster_id,
                        node_id=node.uid,
                        task_uuid=transaction.uuid,
                    )
                elif new_status == "ready":
                    # TODO(bgaifullin) need to remove pengind roles concept
                    node.roles = list(set(node.roles + node.pending_roles))
                    node.pending_roles = []
                    node.progress = 100
                    node.status = new_status
                else:
                    node.status = new_status
            else:
                setattr(node, param, node_params[param])
    db.flush()

    if nodes_params:
        logger.warning("The following nodes are not found: %s", ",".join(sorted(nodes_params.keys())))
Example #41
0
 def _error_action(cls, task, status, progress, message=None):
     if message:
         message = u"Deployment has failed. {0}".format(message)
     else:
         message = u"Deployment has failed. Check these nodes:\n{0}".format(
             cls._generate_error_message(task,
                                         error_types=('deploy',
                                                      'provision'),
                                         names_only=True))
     notifier.notify("error", message, task.cluster_id)
     TaskHelper.update_task_status(task.uuid, status, progress, message)
Example #42
0
    def remove_cluster_resp(cls, **kwargs):
        logger.info(
            "RPC method remove_cluster_resp received: %s" %
            jsonutils.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')

        # in remove_nodes_resp method all objects are already locked
        cls.remove_nodes_resp(**kwargs)

        task = objects.Task.get_by_uuid(task_uuid, fail_if_not_found=True)
        cluster = task.cluster

        if task.status in ('ready',):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            ips = db().query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in cluster.network_groups])
            )
            map(db().delete, ips)
            db().flush()

            nm = objects.Cluster.get_network_manager(cluster)
            admin_nets = nm.get_admin_networks()
            objects.Cluster.delete(cluster)
            if admin_nets != nm.get_admin_networks():
                # import it here due to cyclic dependencies problem
                from nailgun.task.manager import UpdateDnsmasqTaskManager
                UpdateDnsmasqTaskManager().execute()

            notifier.notify(
                "done",
                u"Environment '%s' and all its nodes are deleted" % (
                    cluster_name
                )
            )

        elif task.status in ('error',):
            cluster.status = 'error'
            db().add(cluster)
            db().flush()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(
                        task,
                        error_types=('deletion',)
                    )
                )
            notifier.notify(
                "error",
                task.message,
                cluster.id
            )
Example #43
0
 def _error_action(cls, task, status, progress, message=None):
     task_name = task.name.title()
     if message:
         message = u"{0} has failed. {1}".format(task_name, message)
     else:
         message = u"{0} has failed. Check these nodes:\n{1}".format(
             task_name, cls._generate_error_message(task, error_types=("deploy", "provision"), names_only=True)
         )
     notifier.notify("error", message, task.cluster_id)
     data = {"status": status, "progress": progress, "message": message}
     objects.Task.update(task, data)
Example #44
0
 def _download_release_error(
     cls,
     release_id,
     error_message
 ):
     release = db().query(Release).get(release_id)
     release.state = 'error'
     db().commit()
     # TODO(NAME): remove this ugly checks
     if error_message != 'Task aborted':
         notifier.notify('error', error_message)
Example #45
0
    def reset_environment_resp(cls, **kwargs):
        logger.info("RPC method reset_environment_resp received: %s",
                    jsonutils.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes', [])
        ia_nodes = kwargs.get('inaccessible_nodes', [])
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = objects.Task.get_by_uuid(
            task_uuid, fail_if_not_found=True, lock_for_update=True)

        # Locking cluster
        objects.Cluster.get_by_uid(
            task.cluster_id, fail_if_not_found=True, lock_for_update=True)

        if status == consts.TASK_STATUSES.ready:
            # restoring pending changes
            task.cluster.status = consts.CLUSTER_STATUSES.new
            objects.Cluster.add_pending_changes(
                task.cluster, consts.CLUSTER_CHANGES.attributes)
            objects.Cluster.add_pending_changes(
                task.cluster, consts.CLUSTER_CHANGES.networks)

            node_uids = [n["uid"] for n in itertools.chain(nodes, ia_nodes)]
            q_nodes = objects.NodeCollection.filter_by_id_list(None, node_uids)
            q_nodes = objects.NodeCollection.filter_by(
                q_nodes, cluster_id=task.cluster_id)
            q_nodes = objects.NodeCollection.order_by(q_nodes, 'id')

            # locking Nodes for update
            update_nodes = objects.NodeCollection.lock_for_update(
                q_nodes).all()

            for node in update_nodes:
                logs_utils.delete_node_logs(node)
                objects.Node.reset_to_discover(node)

            if ia_nodes:
                cls._notify_inaccessible(task.cluster_id,
                                         [n["uid"] for n in ia_nodes],
                                         u"environment resetting")

            message = (u"Environment '{0}' "
                       u"was successfully reset".format(task.cluster.name
                                                        or task.cluster_id))

            notifier.notify("done", message, task.cluster_id)

        data = {'status': status, 'progress': progress, 'message': message}
        objects.Task.update(task, data)

        cls._update_action_log_entry(status, task.name, task_uuid, nodes)
Example #46
0
 def success(self, transaction):
     objects.Transaction.on_finish(transaction, consts.TASK_STATUSES.ready)
     helpers.TaskHelper.update_action_log(transaction)
     _update_cluster_status(transaction)
     notifier.notify(
         consts.NOTIFICATION_TOPICS.done,
         "Graph execution has been successfully completed."
         "You can check deployment history for detailed information.",
         transaction.cluster_id,
         None,
         task_uuid=transaction.uuid
     )
Example #47
0
    def _success_action(cls, task, status, progress):
        network_manager = NetworkManager()
        # check if all nodes are ready
        if any(map(lambda n: n.status == "error", task.cluster.nodes)):
            cls._error_action(task, "error", 100)
            return

        if task.cluster.mode in ("singlenode", "multinode"):
            # determining horizon url - it's an IP
            # of a first cluster controller
            controller = (
                db()
                .query(Node)
                .filter_by(cluster_id=task.cluster_id)
                .filter(Node.role_list.any(name="controller"))
                .first()
            )
            if controller:
                logger.debug(u"Controller is found, node_id=%s, " "getting it's IP addresses", controller.id)
                public_net = filter(
                    lambda n: n["name"] == "public" and "ip" in n, network_manager.get_node_networks(controller.id)
                )
                if public_net:
                    horizon_ip = public_net[0]["ip"].split("/")[0]
                    message = (
                        u"Deployment of environment '{0}' is done. "
                        "Access the OpenStack dashboard (Horizon) at "
                        "http://{1}/ or via internal network at http://{2}/"
                    ).format(task.cluster.name, horizon_ip, controller.ip)
                else:
                    message = (u"Deployment of environment '{0}' is done").format(task.cluster.name)
                    logger.warning(u"Public ip for controller node " "not found in '{0}'".format(task.cluster.name))
            else:
                message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name)
                logger.warning(u"Controller node not found in '{0}'".format(task.cluster.name))
        elif task.cluster.mode == "ha":
            # determining horizon url in HA mode - it's vip
            # from a public network saved in task cache
            args = task.cache.get("args")
            try:
                vip = args["attributes"]["public_vip"]
                message = (
                    u"Deployment of environment '{0}' is done. "
                    "Access the OpenStack dashboard (Horizon) at http://{1}/"
                ).format(task.cluster.name, vip)
            except Exception as exc:
                logger.error(": ".join([str(exc), traceback.format_exc()]))
                message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name)
                logger.warning(u"Cannot find virtual IP for '{0}'".format(task.cluster.name))

        notifier.notify("done", message, task.cluster_id)
        TaskHelper.update_task_status(task.uuid, status, progress, message)
Example #48
0
 def update_status_nodes(self):
     to_update = db().query(Node).filter(
         not_(Node.status == 'provisioning')).filter(
             datetime.now() > (Node.timestamp +
                               timedelta(seconds=self.timeout))).filter_by(
                                   online=True)
     for node_db in to_update:
         notifier.notify("error",
                         u"Node '{0}' has gone away".format(
                             node_db.human_readable_name),
                         node_id=node_db.id)
     to_update.update({"online": False})
     db().commit()
Example #49
0
def update_nodes_status(timeout, logger):
    to_update = db().query(Node).filter(
        not_(Node.status == 'provisioning')).filter(
            datetime.now() > (Node.timestamp +
                              timedelta(seconds=timeout))).filter_by(
                                  online=True)
    for node_db in to_update:
        away_message = u"Node '{0}' has gone away".format(
            node_db.human_readable_name)

        notifier.notify("error", away_message, node_id=node_db.id)
        logger.warning(away_message)
    to_update.update({"online": False})
    db().commit()
Example #50
0
    def redhat_check_licenses_resp(cls, **kwargs):
        logger.info(
            "RPC method redhat_check_licenses_resp received: %s" %
            json.dumps(kwargs)
        )
        task_uuid = kwargs.get('task_uuid')
        error_msg = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')
        notify = kwargs.get('msg')

        task = get_task_by_uuid(task_uuid)
        if not task:
            logger.error("redhat_check_licenses_resp: task \
                    with UUID %s not found!", task_uuid)
            return

        release_info = task.cache['args']['release_info']
        release_id = release_info['release_id']
        release = db().query(Release).get(release_id)
        if not release:
            logger.error("download_release_resp: Release"
                         " with ID %s not found", release_id)
            return

        if error_msg:
            status = 'error'
            cls._update_release_state(release_id, 'error')
            # TODO(NAME): remove this ugly checks
            if 'Unknown error' in error_msg:
                error_msg = 'Failed to check Red Hat licenses '
            if error_msg != 'Task aborted':
                notifier.notify('error', error_msg)

        if notify:
            notifier.notify('error', notify)

        result = {
            "release_info": {
                "release_id": release_id
            }
        }

        TaskHelper.update_task_status(
            task_uuid,
            status,
            progress,
            error_msg,
            result
        )
Example #51
0
 def _error_action(cls, task, status, progress, message=None):
     task_name = task.name.title()
     if message:
         message = u"{0} has failed. {1}".format(task_name, message)
     else:
         message = u"{0} has failed. Check these nodes:\n{1}".format(
             task_name,
             cls._generate_error_message(task,
                                         error_types=('deploy',
                                                      'provision'),
                                         names_only=True))
     notifier.notify("error", message, task.cluster_id)
     data = {'status': status, 'progress': progress, 'message': message}
     objects.Task.update(task, data)
Example #52
0
    def remove_nodes_resp(cls, **kwargs):
        logger.info("RPC method remove_nodes_resp received: %s" % json.dumps(kwargs))
        task_uuid = kwargs.get("task_uuid")
        nodes = kwargs.get("nodes") or []
        error_nodes = kwargs.get("error_nodes") or []
        inaccessible_nodes = kwargs.get("inaccessible_nodes") or []
        error_msg = kwargs.get("error")
        status = kwargs.get("status")
        progress = kwargs.get("progress")

        for node in nodes:
            node_db = db().query(Node).get(node["uid"])
            if not node_db:
                logger.error(u"Failed to delete node '%s': node doesn't exist", str(node))
                break
            db().delete(node_db)

        for node in inaccessible_nodes:
            # Nodes which not answered by rpc just removed from db
            node_db = db().query(Node).get(node["uid"])
            if node_db:
                logger.warn(u"Node %s not answered by RPC, removing from db", node_db.human_readable_name)
                db().delete(node_db)

        for node in error_nodes:
            node_db = db().query(Node).get(node["uid"])
            if not node_db:
                logger.error(u"Failed to delete node '%s' marked as error from Naily:" " node doesn't exist", str(node))
                break
            node_db.pending_deletion = False
            node_db.status = "error"
            db().add(node_db)
            node["name"] = node_db.name
        db().commit()

        success_msg = u"No nodes were removed"
        err_msg = u"No errors occurred"
        if nodes:
            success_msg = u"Successfully removed {0} node(s)".format(len(nodes))
            notifier.notify("done", success_msg)
        if error_nodes:
            err_msg = u"Failed to remove {0} node(s): {1}".format(
                len(error_nodes), ", ".join([n.get("name") or "ID: {0}".format(n["uid"]) for n in error_nodes])
            )
            notifier.notify("error", err_msg)
        if not error_msg:
            error_msg = ". ".join([success_msg, err_msg])

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
Example #53
0
    def _notify(cls, task, topic, message, node_id=None, task_uuid=None):
        """Send notification.

        :param task: objects.Task object
        :param topic: consts.NOTIFICATION_TOPICS value
        :param message: message text
        :param node_id: node identifier
        :param task_uuid: task uuid. specify task_uuid if necessary to pass it
        """
        # Due to design of UI, that shows all notifications,
        # we should notify provision task only then the task is top-level task
        if (task.name == consts.TASK_NAMES.provision and task.parent_id is not None) or message is None:
            return

        notifier.notify(topic, message, task.cluster_id, node_id=node_id, task_uuid=task_uuid)
Example #54
0
 def checked_data(self, validate_method=None):
     try:
         if validate_method:
             data = validate_method(web.data())
         else:
             data = self.validator.validate(web.data())
     except (errors.InvalidInterfacesInfo, errors.InvalidMetadata) as exc:
         notifier.notify("error", str(exc))
         raise web.badrequest(message=str(exc))
     except (errors.AlreadyExists) as exc:
         err = web.conflict()
         err.message = exc.message
         raise err
     except (errors.InvalidData, Exception) as exc:
         raise web.badrequest(message=str(exc))
     return data
Example #55
0
    def checked_data(self, validate_method=None, **kwargs):
        try:
            data = kwargs.pop('data', web.data())
            method = validate_method or self.validator.validate

            valid_data = method(data, **kwargs)
        except (errors.InvalidInterfacesInfo, errors.InvalidMetadata) as exc:
            notifier.notify("error", str(exc))
            raise web.badrequest(message=str(exc))
        except (errors.AlreadyExists) as exc:
            err = web.conflict()
            err.message = exc.message
            raise err
        except (errors.InvalidData, Exception) as exc:
            raise web.badrequest(message=str(exc))
        return valid_data
Example #56
0
 def dump_environment_resp(cls, **kwargs):
     logger.info("RPC method dump_environment_resp received: %s" %
                 json.dumps(kwargs))
     task_uuid = kwargs.get('task_uuid')
     status = kwargs.get('status')
     progress = kwargs.get('progress')
     error = kwargs.get('error')
     msg = kwargs.get('msg')
     if status == 'error':
         notifier.notify('error', error)
         TaskHelper.update_task_status(task_uuid, status, 100, error)
     elif status == 'ready':
         dumpfile = os.path.basename(msg)
         notifier.notify(
             'done', 'Snapshot is ready. '
             'Visit Support page to download')
         TaskHelper.update_task_status(task_uuid, status, progress,
                                       '/dump/{0}'.format(dumpfile))
Example #57
0
    def redhat_check_licenses_resp(cls, **kwargs):
        logger.info("RPC method redhat_check_licenses_resp received: %s" %
                    json.dumps(kwargs))
        task_uuid = kwargs.get('task_uuid')
        error_msg = kwargs.get('error')
        nodes = kwargs.get('nodes')
        status = kwargs.get('status')
        progress = kwargs.get('progress')
        notify = kwargs.get('msg')

        task = db().query(Task).filter_by(uuid=task_uuid).first()
        if not task:
            logger.error(
                "redhat_check_licenses_resp: task \
                    with UUID %s not found!", task_uuid)
            return

        release_info = task.cache['args']['release_info']
        release_id = release_info['release_id']
        release = db().query(Release).get(release_id)
        if not release:
            logger.error(
                "download_release_resp: Release"
                " with ID %s not found", release_id)
            return

        if error_msg:
            status = 'error'
            cls._update_release_state(release_id, 'error')
            # TODO: remove this ugly checks
            if 'Unknown error' in error_msg:
                error_msg = 'Failed to check Red Hat licenses '
            if error_msg != 'Task aborted':
                notifier.notify('error', error_msg)

        if notify:
            notifier.notify('error', notify)

        result = {"release_info": {"release_id": release_id}}

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg,
                                      result)
Example #58
0
    def _notify(cls, task, topic, message, node_id=None, task_uuid=None):
        """Send notification.

        :param task: objects.Task object
        :param topic: consts.NOTIFICATION_TOPICS value
        :param message: message text
        :param node_id: node identifier
        :param task_uuid: task uuid. specify task_uuid if necessary to pass it
        """
        # Due to design of UI, that shows all notifications,
        # we should notify provision task only then the task is top-level task
        if (task.name == consts.TASK_NAMES.provision
                and task.parent_id is not None) or message is None:
            return

        notifier.notify(topic,
                        message,
                        task.cluster_id,
                        node_id=node_id,
                        task_uuid=task_uuid)