Ejemplo n.º 1
0
 def PUT(self, node_id):
     node = self.get_object_or_404(Node, node_id)
     if not node.attributes:
         node.attributes = NodeAttributes(node_id=node.id)
     data = self.validator.validate_update(web.data())
     for key, value in data.iteritems():
         setattr(node, key, value)
         if key == 'cluster_id':
             if key:
                 self.allow_network_assignment_to_all_interfaces(node)
                 self.assign_networks_to_main_interface(node)
             else:
                 self.clear_assigned_networks(node)
                 self.clear_all_allowed_networks(node)
     if not node.status in ('provisioning', 'deploying') \
             and "role" in data or "cluster_id" in data:
         try:
             node.attributes.volumes = \
                 node.volume_manager.gen_volumes_info()
         except Exception as exc:
             msg = (
                 u"Failed to generate volumes "
                 "info for node '{0}': '{1}'"
             ).format(
                 node.name or data.get("mac") or data.get("id"),
                 str(exc) or "see logs for details"
             )
             logger.warning(traceback.format_exc())
             notifier.notify("error", msg, node_id=node.id)
     self.db.commit()
     return self.render(node)
Ejemplo n.º 2
0
    def run(self):
        super(FakeDeletionThread, self).run()
        receiver = NailgunReceiver
        receiver.initialize()
        kwargs = {
            'task_uuid': self.task_uuid,
            'nodes': self.data['args']['nodes'],
            'status': 'ready'
        }
        nodes_to_restore = self.data['args'].get('nodes_to_restore', [])
        tick_interval = int(settings.FAKE_TASKS_TICK_INTERVAL) or 3
        resp_method = getattr(receiver, self.respond_to)
        resp_method(**kwargs)
        orm = scoped_session(
            sessionmaker(bind=engine, query_cls=NoCacheQuery)
        )

        for node in nodes_to_restore:
            # Offline node just deleted from db
            # and could not recreated with status
            # discover
            if not node.online:
                continue

            node.status = 'discover'
            orm.add(node)
            orm.commit()
            ram = round(node.meta.get('ram') or 0, 1)
            cores = node.meta.get('cores') or 'unknown'
            notifier.notify("discover",
                            "New node with %s CPU core(s) "
                            "and %s GB memory is discovered" %
                            (cores, ram), node_id=node.id)
        receiver.stop()
Ejemplo n.º 3
0
    def run(self):
        super(FakeDeletionThread, self).run()
        receiver = NailgunReceiver
        receiver.initialize()
        kwargs = {
            'task_uuid': self.task_uuid,
            'nodes': self.data['args']['nodes'],
            'status': 'ready'
        }
        nodes_to_restore = self.data['args'].get('nodes_to_restore', [])
        tick_interval = int(settings.FAKE_TASKS_TICK_INTERVAL) or 3
        resp_method = getattr(receiver, self.respond_to)
        resp_method(**kwargs)
        orm = scoped_session(sessionmaker(bind=engine, query_cls=NoCacheQuery))

        for node in nodes_to_restore:
            # Offline node just deleted from db
            # and could not recreated with status
            # discover
            if not node.online:
                continue

            node.status = 'discover'
            orm.add(node)
            orm.commit()
            ram = round(node.meta.get('ram') or 0, 1)
            cores = node.meta.get('cores') or 'unknown'
            notifier.notify("discover",
                            "New node with %s CPU core(s) "
                            "and %s GB memory is discovered" % (cores, ram),
                            node_id=node.id)
        receiver.stop()
Ejemplo n.º 4
0
 def run(self):
     while not self.stoprequest.isSet():
         self.db.expire_all()
         for node_db in self.db.query(Node).filter(
             # nodes may become unresponsive while provisioning
             not_(Node.status == 'provisioning')
         ):
             timedelta = (datetime.now() - node_db.timestamp).seconds
             if timedelta > self.timeout:
                 logger.warning(
                     u"Node '{0}' seems to be offline "
                     "for {1} seconds...".format(
                         node_db.name,
                         timedelta
                     )
                 )
                 if node_db.online:
                     node_db.online = False
                     self.db.add(node_db)
                     self.db.commit()
                     notifier.notify(
                         "error",
                         u"Node '{0}' has gone away".format(
                             node_db.name or node_db.mac
                         ),
                         node_id=node_db.id
                     )
         self.sleep()
Ejemplo n.º 5
0
    def POST(self):
        data = self.checked_data()

        node = Node()
        for key, value in data.iteritems():
            if key == "meta":
                node.create_meta(value)
            else:
                setattr(node, key, value)
        node.name = "Untitled (%s)" % data['mac'][-5:]
        node.timestamp = datetime.now()
        self.db.add(node)
        self.db.commit()
        node.attributes = NodeAttributes()

        try:
            node.attributes.volumes = node.volume_manager.gen_volumes_info()
            if node.cluster:
                node.cluster.add_pending_changes(
                    "disks",
                    node_id=node.id
                )
        except Exception as exc:
            msg = (
                u"Failed to generate volumes "
                "info for node '{0}': '{1}'"
            ).format(
                node.name or data.get("mac") or data.get("id"),
                str(exc) or "see logs for details"
            )
            logger.warning(traceback.format_exc())
            notifier.notify("error", msg, node_id=node.id)
        self.db.add(node)
        self.db.commit()

        network_manager = NetworkManager()
        # Add interfaces for node from 'meta'.
        if node.meta and node.meta.get('interfaces'):
            network_manager.update_interfaces_info(node.id)

        if node.cluster_id:
            network_manager.allow_network_assignment_to_all_interfaces(node.id)
            network_manager.assign_networks_to_main_interface(node.id)

        try:
            ram = str(round(float(
                node.meta['memory']['total']) / 1073741824, 1))
        except (KeyError, TypeError, ValueError):
            ram = "unknown"

        cores = str(node.meta.get('cpu', {}).get('total', "unknown"))
        notifier.notify("discover",
                        "New node with %s CPU core(s) "
                        "and %s GB memory is discovered" %
                        (cores, ram), node_id=node.id)
        raise web.webapi.created(json.dumps(
            NodeHandler.render(node),
            indent=4
        ))
Ejemplo n.º 6
0
    def POST(self):
        data = self.validator.validate(web.data())
        node = Node()
        for key, value in data.iteritems():
            setattr(node, key, value)
        node.name = "Untitled (%s)" % data['mac'][-5:]
        node.timestamp = datetime.now()
        self.db.add(node)
        self.db.commit()
        node.attributes = NodeAttributes()

        try:
            node.attributes.volumes = node.volume_manager.gen_volumes_info()
            if node.cluster:
                node.cluster.add_pending_changes(
                    "disks",
                    node_id=node.id
                )
        except Exception as exc:
            msg = (
                u"Failed to generate volumes "
                "info for node '{0}': '{1}'"
            ).format(
                node.name or data.get("mac") or data.get("id"),
                str(exc) or "see logs for details"
            )
            logger.warning(traceback.format_exc())
            notifier.notify("error", msg, node_id=node.id)
        self.db.add(node)
        self.db.commit()

        # Add interfaces for node from 'meta'.
        if node.meta and node.meta.get('interfaces'):
            nics = self.get_nics_from_meta(node)
            map(self.db.add, nics)
            self.db.commit()

        if node.cluster_id:
            self.allow_network_assignment_to_all_interfaces(node)
            self.assign_networks_to_main_interface(node)
            self.db.commit()

        try:
            ram = str(round(float(
                node.meta['memory']['total']) / 1073741824, 1))
        except (KeyError, TypeError, ValueError):
            ram = "unknown"

        cores = str(node.meta.get('cpu', {}).get('total', "unknown"))
        notifier.notify("discover",
                        "New node with %s CPU core(s) "
                        "and %s GB memory is discovered" %
                        (cores, ram), node_id=node.id)
        raise web.webapi.created(json.dumps(
            NodeHandler.render(node),
            indent=4
        ))
Ejemplo n.º 7
0
    def _success_action(cls, task, status, progress):
        # check if all nodes are ready
        if any(map(lambda n: n.status == 'error', task.cluster.nodes)):
            cls._error_action(task, 'error', 100)
            return

        if task.cluster.mode in ('singlenode', 'multinode'):
            # determining horizon url - it's an IP
            # of a first cluster controller
            controller = cls.db.query(Node).filter_by(
                cluster_id=task.cluster_id, role='controller').first()
            if controller:
                logger.debug(
                    u"Controller is found, node_id=%s, "
                    "getting it's IP addresses", controller.id)
                public_net = filter(
                    lambda n: n['name'] == 'public' and 'ip' in n,
                    cls.network_manager.get_node_networks(controller.id))
                if public_net:
                    horizon_ip = public_net[0]['ip'].split('/')[0]
                    message = (
                        u"Deployment of environment '{0}' is done. "
                        "Access WebUI of OpenStack at http://{1}/ or via "
                        "internal network at http://{2}/").format(
                            task.cluster.name, horizon_ip, controller.ip)
                else:
                    message = (
                        u"Deployment of environment '{0}' is done").format(
                            task.cluster.name)
                    logger.warning(u"Public ip for controller node "
                                   "not found in '{0}'".format(
                                       task.cluster.name))
            else:
                message = (u"Deployment of environment"
                           " '{0}' is done").format(task.cluster.name)
                logger.warning("Controller node not found in '{0}'".format(
                    task.cluster.name))
        elif task.cluster.mode == 'ha':
            # determining horizon url in HA mode - it's vip
            # from a public network saved in task cache
            args = task.cache.get('args')
            try:
                vip = args['attributes']['public_vip']
                message = (u"Deployment of environment '{0}' is done. "
                           "Access WebUI of OpenStack at http://{1}/").format(
                               task.cluster.name, vip)
            except Exception as exc:
                logger.error(": ".join([str(exc), traceback.format_exc()]))
                message = (u"Deployment of environment"
                           " '{0}' is done").format(task.cluster.name)
                logger.warning(u"Cannot find virtual IP for '{0}'".format(
                    task.cluster.name))

        notifier.notify("done", message, task.cluster_id)
        TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 8
0
 def _error_action(cls, task, status, progress, message=None):
     if message:
         message = u"Deployment has failed. {0}".format(message)
     else:
         message = u"Deployment has failed. Check these nodes:\n{0}".format(
             cls._generate_error_message(task,
                                         error_types=('deploy',
                                                      'provision'),
                                         names_only=True))
     notifier.notify("error", message, task.cluster_id)
     TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 9
0
    def remove_cluster_resp(cls, **kwargs):
        network_manager = NetworkManager()
        logger.info("RPC method remove_cluster_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')

        cls.remove_nodes_resp(**kwargs)

        task = db().query(Task).filter_by(uuid=task_uuid).first()
        cluster = task.cluster

        if task.status in ('ready',):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            nws = itertools.chain(
                *[n.networks for n in cluster.network_groups]
            )
            ips = db().query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in nws])
            )
            map(db().delete, ips)
            db().commit()

            db().delete(cluster)
            db().commit()

            # Dmitry's hack for clearing VLANs without networks
            network_manager.clear_vlans()

            notifier.notify(
                "done",
                u"Environment '%s' and all its nodes are deleted" % (
                    cluster_name
                )
            )

        elif task.status in ('error',):
            cluster.status = 'error'
            db().add(cluster)
            db().commit()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(
                        task,
                        error_types=('deletion',)
                    )
                )
            notifier.notify(
                "error",
                task.message,
                cluster.id
            )
Ejemplo n.º 10
0
    def remove_nodes_resp(cls, **kwargs):
        logger.info("RPC method remove_nodes_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes') or []
        error_nodes = kwargs.get('error_nodes') or []
        error_msg = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        for node in nodes:
            node_db = cls.db.query(Node).get(node['uid'])
            if not node_db:
                logger.error(
                    u"Failed to delete node '%s': node doesn't exist",
                    str(node)
                )
                break
            cls.db.delete(node_db)

        for node in error_nodes:
            node_db = cls.db.query(Node).get(node['uid'])
            if not node_db:
                logger.error(
                    u"Failed to delete node '%s' marked as error from Naily:"
                    " node doesn't exist", str(node)
                )
                break
            node_db.pending_deletion = False
            node_db.status = 'error'
            cls.db.add(node_db)
            node['name'] = node_db.name
        cls.db.commit()

        success_msg = u"No nodes were removed"
        err_msg = u"No errors occurred"
        if nodes:
            success_msg = u"Successfully removed {0} node(s)".format(
                len(nodes)
            )
            notifier.notify("done", success_msg)
        if error_nodes:
            err_msg = u"Failed to remove {0} node(s): {1}".format(
                len(error_nodes),
                ', '.join(
                    [n.get('name') or "ID: {0}".format(n['uid'])
                        for n in error_nodes])
            )
            notifier.notify("error", err_msg)
        if not error_msg:
            error_msg = ". ".join([success_msg, err_msg])

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
Ejemplo n.º 11
0
    def remove_nodes_resp(cls, **kwargs):
        logger.info("RPC method remove_nodes_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes') or []
        error_nodes = kwargs.get('error_nodes') or []
        error_msg = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        for node in nodes:
            node_db = cls.db.query(Node).get(node['uid'])
            if not node_db:
                logger.error(u"Failed to delete node '%s': node doesn't exist",
                             str(node))
                break
            cls.db.delete(node_db)

        for node in error_nodes:
            node_db = cls.db.query(Node).get(node['uid'])
            if not node_db:
                logger.error(
                    u"Failed to delete node '%s' marked as error from Naily:"
                    " node doesn't exist", str(node))
                break
            node_db.pending_deletion = False
            node_db.status = 'error'
            cls.db.add(node_db)
            node['name'] = node_db.name
        cls.db.commit()

        success_msg = u"No nodes were removed"
        err_msg = u"No errors occurred"
        if nodes:
            success_msg = u"Successfully removed {0} node(s)".format(
                len(nodes))
            notifier.notify("done", success_msg)
        if error_nodes:
            err_msg = u"Failed to remove {0} node(s): {1}".format(
                len(error_nodes), ', '.join([
                    n.get('name') or "ID: {0}".format(n['uid'])
                    for n in error_nodes
                ]))
            notifier.notify("error", err_msg)
        if not error_msg:
            error_msg = ". ".join([success_msg, err_msg])

        TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
Ejemplo n.º 12
0
 def _error_action(cls, task, status, progress, message=None):
     if message:
         message = u"Deployment has failed. {0}".format(message)
     else:
         message = u"Deployment has failed. Check these nodes:\n{0}".format(
             cls._generate_error_message(
                 task,
                 error_types=('deploy', 'provision'),
                 names_only=True
             )
         )
     notifier.notify(
         "error",
         message,
         task.cluster_id
     )
     TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 13
0
 def update_status_nodes(self):
     for node_db in self.db.query(Node).filter(
         # nodes may become unresponsive while provisioning
             not_(Node.status == 'provisioning')):
         timedelta = (datetime.now() - node_db.timestamp).seconds
         if timedelta > self.timeout:
             logger.warning(
                 u"Node '{0}' seems to be offline "
                 "for {1} seconds...".format(
                     node_db.name,
                     timedelta))
             if node_db.online:
                 node_db.online = False
                 self.db.commit()
                 notifier.notify(
                     "error",
                     u"Node '{0}' has gone away".format(
                         node_db.human_readable_name),
                     node_id=node_db.id)
Ejemplo n.º 14
0
    def run(self):
        super(FakeDeletionThread, self).run()
        receiver = NailgunReceiver
        receiver.initialize()
        kwargs = {
            'task_uuid': self.task_uuid,
            'nodes': self.data['args']['nodes'],
            'status': 'ready'
        }
        nodes_to_restore = self.data['args'].get('nodes_to_restore', [])
        tick_interval = int(settings.FAKE_TASKS_TICK_INTERVAL) or 3
        resp_method = getattr(receiver, self.respond_to)
        resp_method(**kwargs)
        orm = make_session()

        for node_data in nodes_to_restore:
            node = Node(**node_data)

            # Offline node just deleted from db
            # and could not recreated with status
            # discover
            if not node.online:
                continue

            node.status = 'discover'
            orm.add(node)
            orm.commit()
            node.attributes = NodeAttributes(node_id=node.id)
            node.attributes.volumes = node.volume_manager.gen_volumes_info()
            network_manager = NetworkManager(db=orm)
            network_manager.update_interfaces_info(node.id)
            orm.commit()

            ram = round(node.meta.get('ram') or 0, 1)
            cores = node.meta.get('cores') or 'unknown'
            notifier.notify("discover",
                            "New node with %s CPU core(s) "
                            "and %s GB memory is discovered" %
                            (cores, ram), node_id=node.id)
        receiver.stop()
Ejemplo n.º 15
0
    def remove_cluster_resp(cls, **kwargs):
        logger.info("RPC method remove_cluster_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')

        cls.remove_nodes_resp(**kwargs)

        task = cls.db.query(Task).filter_by(uuid=task_uuid).first()
        cluster = task.cluster

        if task.status in ('ready', ):
            logger.debug("Removing environment itself")
            cluster_name = cluster.name

            nws = itertools.chain(
                *[n.networks for n in cluster.network_groups])
            ips = cls.db.query(IPAddr).filter(
                IPAddr.network.in_([n.id for n in nws]))
            map(cls.db.delete, ips)
            cls.db.commit()

            cls.db.delete(cluster)
            cls.db.commit()

            # Dmitry's hack for clearing VLANs without networks
            cls.network_manager.clear_vlans()

            notifier.notify(
                "done", u"Environment '%s' and all its nodes are deleted" %
                (cluster_name))

        elif task.status in ('error', ):
            cluster.status = 'error'
            cls.db.add(cluster)
            cls.db.commit()
            if not task.message:
                task.message = "Failed to delete nodes:\n{0}".format(
                    cls._generate_error_message(task,
                                                error_types=('deletion', )))
            notifier.notify("error", task.message, cluster.id)
Ejemplo n.º 16
0
 def checked_data(self, validate_method=None):
     try:
         if validate_method:
             data = validate_method(web.data())
         else:
             data = self.validator.validate(web.data())
     except (
         errors.InvalidInterfacesInfo,
         errors.InvalidMetadata
     ) as exc:
         notifier.notify("error", str(exc))
         raise web.badrequest(message=str(exc))
     except (
         errors.AlreadyExists
     ) as exc:
         err = web.conflict()
         err.message = exc.message
         raise err
     except (
         errors.InvalidData,
         Exception
     ) as exc:
         raise web.badrequest(message=str(exc))
     return data
Ejemplo n.º 17
0
    def _success_action(cls, task, status, progress):
        network_manager = NetworkManager()
        # check if all nodes are ready
        if any(map(lambda n: n.status == 'error',
                   task.cluster.nodes)):
            cls._error_action(task, 'error', 100)
            return

        if task.cluster.mode in ('singlenode', 'multinode'):
            # determining horizon url - it's an IP
            # of a first cluster controller
            controller = db().query(Node).filter_by(
                cluster_id=task.cluster_id,
                role='controller'
            ).first()
            if controller:
                logger.debug(
                    u"Controller is found, node_id=%s, "
                    "getting it's IP addresses",
                    controller.id
                )
                public_net = filter(
                    lambda n: n['name'] == 'public' and 'ip' in n,
                    network_manager.get_node_networks(controller.id)
                )
                if public_net:
                    horizon_ip = public_net[0]['ip'].split('/')[0]
                    message = (
                        u"Deployment of environment '{0}' is done. "
                        "Access the OpenStack dashboard (Horizon) at "
                        "http://{1}/ or via internal network at http://{2}/"
                    ).format(
                        task.cluster.name,
                        horizon_ip,
                        controller.ip
                    )
                else:
                    message = (
                        u"Deployment of environment '{0}' is done"
                    ).format(task.cluster.name)
                    logger.warning(
                        u"Public ip for controller node "
                        "not found in '{0}'".format(task.cluster.name)
                    )
            else:
                message = (
                    u"Deployment of environment"
                    " '{0}' is done"
                ).format(task.cluster.name)
                logger.warning("Controller node not found in '{0}'".format(
                    task.cluster.name
                ))
        elif task.cluster.mode == 'ha':
            # determining horizon url in HA mode - it's vip
            # from a public network saved in task cache
            args = task.cache.get('args')
            try:
                vip = args['attributes']['public_vip']
                message = (
                    u"Deployment of environment '{0}' is done. "
                    "Access the OpenStack dashboard (Horizon) at http://{1}/"
                ).format(
                    task.cluster.name,
                    vip
                )
            except Exception as exc:
                logger.error(": ".join([
                    str(exc),
                    traceback.format_exc()
                ]))
                message = (
                    u"Deployment of environment"
                    " '{0}' is done"
                ).format(task.cluster.name)
                logger.warning(
                    u"Cannot find virtual IP for '{0}'".format(
                        task.cluster.name
                    )
                )

        notifier.notify(
            "done",
            message,
            task.cluster_id
        )
        TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 18
0
    def PUT(self):
        data = self.validator.validate_collection_update(web.data())
        q = self.db.query(Node)
        nodes_updated = []
        for nd in data:
            is_agent = nd.pop("is_agent") if "is_agent" in nd else False
            node = None
            if "mac" in nd:
                node = q.filter_by(mac=nd["mac"]).first() \
                    or self.validator.validate_existent_node_mac(nd)
            else:
                node = q.get(nd["id"])
            if nd.get("cluster_id") is None and node.cluster:
                node.cluster.clear_pending_changes(node_id=node.id)
            old_cluster_id = node.cluster_id
            for key, value in nd.iteritems():
                if is_agent and (key, value) == ("status", "discover") \
                        and node.status == "provisioning":
                    # We don't update provisioning back to discover
                    logger.debug(
                        "Node is already provisioning - "
                        "status not updated by agent"
                    )
                    continue
                setattr(node, key, value)
            if not node.attributes:
                node.attributes = NodeAttributes()
                self.db.commit()
            if not node.attributes.volumes:
                node.attributes.volumes = \
                    node.volume_manager.gen_volumes_info()
                self.db.commit()
            if not node.status in ('provisioning', 'deploying'):
                variants = (
                    "disks" in node.meta and
                    len(node.meta["disks"]) != len(
                        filter(
                            lambda d: d["type"] == "disk",
                            node.attributes.volumes
                        )
                    ),
                    "role" in nd,
                    "cluster_id" in nd
                )
                if any(variants):
                    try:
                        node.attributes.volumes = \
                            node.volume_manager.gen_volumes_info()
                        if node.cluster:
                            node.cluster.add_pending_changes(
                                "disks",
                                node_id=node.id
                            )
                    except Exception as exc:
                        msg = (
                            "Failed to generate volumes "
                            "info for node '{0}': '{1}'"
                        ).format(
                            node.name or data.get("mac") or data.get("id"),
                            str(exc) or "see logs for details"
                        )
                        logger.warning(traceback.format_exc())
                        notifier.notify("error", msg, node_id=node.id)

                self.db.commit()
            if is_agent:
                node.timestamp = datetime.now()
                if not node.online:
                    node.online = True
                    msg = u"Node '{0}' is back online".format(
                        node.name or node.mac
                    )
                    logger.info(msg)
                    notifier.notify(
                        "discover",
                        msg,
                        node_id=node.id
                    )
                # Update node's NICs.
                if node.meta and 'interfaces' in node.meta:
                    db_nics = list(node.interfaces)
                    nics = self.get_nics_from_meta(node)
                    for nic in nics:
                        db_nic = filter(lambda i: i.mac == nic.mac, db_nics)
                        if not db_nic:
                            self.db.add(nic)
                            continue
                        db_nic = db_nic[0]
                        for key in ('name', 'current_speed', 'max_speed'):
                            setattr(db_nic, key, getattr(nic, key))
                        db_nics.remove(db_nic)
                    map(self.db.delete, db_nics)
            nodes_updated.append(node)
            self.db.commit()
            if 'cluster_id' in nd and nd['cluster_id'] != old_cluster_id:
                if old_cluster_id:
                    self.clear_assigned_networks(node)
                    self.clear_all_allowed_networks(node)
                if nd['cluster_id']:
                    self.allow_network_assignment_to_all_interfaces(node)
                    self.assign_networks_to_main_interface(node)
                self.db.commit()
        return map(NodeHandler.render, nodes_updated)
Ejemplo n.º 19
0
    def deploy_resp(cls, **kwargs):
        logger.info("RPC method deploy_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes') or []
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = db().query(Task).filter_by(uuid=task_uuid).first()
        if not task:
            # No task found - nothing to do here, returning
            logger.warning(
                u"No task with uuid '{0}'' found - nothing changed".format(
                    task_uuid
                )
            )
            return
        if not status:
            status = task.status

        error_nodes = []
        # First of all, let's update nodes in database
        for node in nodes:
            node_db = db().query(Node).get(node['uid'])

            if not node_db:
                logger.warning(
                    u"No node found with uid '{0}' - nothing changed".format(
                        node['uid']
                    )
                )
                continue

            update_fields = (
                'error_msg',
                'error_type',
                'status',
                'progress',
                'online'
            )
            for param in update_fields:
                if param in node:
                    logger.debug(
                        u"Updating node {0} - set {1} to {2}".format(
                            node['uid'],
                            param,
                            node[param]
                        )
                    )
                    setattr(node_db, param, node[param])

                    if param == 'progress' and node.get('status') == 'error' \
                            or node.get('online') is False:
                        # If failure occurred with node
                        # it's progress should be 100
                        node_db.progress = 100
                        # Setting node error_msg for offline nodes
                        if node.get('online') is False \
                                and not node_db.error_msg:
                            node_db.error_msg = u"Node is offline"
                        # Notification on particular node failure
                        notifier.notify(
                            "error",
                            u"Failed to deploy node '{0}': {1}".format(
                                node_db.name,
                                node_db.error_msg or "Unknown error"
                            ),
                            cluster_id=task.cluster_id,
                            node_id=node['uid'],
                            task_uuid=task_uuid
                        )

            db().add(node_db)
            db().commit()

        # We should calculate task progress by nodes info
        task = db().query(Task).filter_by(uuid=task_uuid).first()
        coeff = settings.PROVISIONING_PROGRESS_COEFF or 0.3
        if nodes and not progress:
            nodes_progress = []
            nodes_db = db().query(Node).filter_by(
                cluster_id=task.cluster_id).all()
            for node in nodes_db:
                if node.status == "discover":
                    nodes_progress.append(0)
                elif not node.online:
                    nodes_progress.append(100)
                elif node.status in ['provisioning', 'provisioned'] or \
                        node.needs_reprovision:
                    nodes_progress.append(float(node.progress) * coeff)
                elif node.status in ['deploying', 'ready'] or \
                        node.needs_redeploy:
                    nodes_progress.append(
                        100.0 * coeff + float(node.progress) * (1.0 - coeff)
                    )
            if nodes_progress:
                progress = int(
                    float(sum(nodes_progress)) / len(nodes_progress)
                )

        # Let's check the whole task status
        if status in ('error',):
            cls._error_action(task, status, progress, message)
        elif status in ('ready',):
            cls._success_action(task, status, progress)
        else:
            TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 20
0
    def deploy_resp(cls, **kwargs):
        logger.info("RPC method deploy_resp received: %s" % kwargs)
        task_uuid = kwargs.get('task_uuid')
        nodes = kwargs.get('nodes') or []
        message = kwargs.get('error')
        status = kwargs.get('status')
        progress = kwargs.get('progress')

        task = cls.db.query(Task).filter_by(uuid=task_uuid).first()
        if not task:
            # No task found - nothing to do here, returning
            logger.warning(
                u"No task with uuid '{0}'' found - nothing changed".format(
                    task_uuid))
            return
        if not status:
            status = task.status

        error_nodes = []
        # First of all, let's update nodes in database
        for node in nodes:
            node_db = cls.db.query(Node).get(node['uid'])

            if not node_db:
                logger.warning(
                    u"No node found with uid '{0}' - nothing changed".format(
                        node['uid']))
                continue

            update_fields = ('error_msg', 'error_type', 'status', 'progress',
                             'online')
            for param in update_fields:
                if param in node:
                    logger.debug(u"Updating node {0} - set {1} to {2}".format(
                        node['uid'], param, node[param]))
                    setattr(node_db, param, node[param])

                    if param == 'progress' and node.get('status') == 'error' \
                            or node.get('online') is False:
                        # If failure occurred with node
                        # it's progress should be 100
                        node_db.progress = 100
                        # Setting node error_msg for offline nodes
                        if node.get('online') is False \
                                and not node_db.error_msg:
                            node_db.error_msg = u"Node is offline"
                        # Notification on particular node failure
                        notifier.notify(
                            "error",
                            u"Failed to deploy node '{0}': {1}".format(
                                node_db.name, node_db.error_msg
                                or "Unknown error"),
                            cluster_id=task.cluster_id,
                            node_id=node['uid'],
                            task_uuid=task_uuid)

            cls.db.add(node_db)
            cls.db.commit()

        # We should calculate task progress by nodes info
        task = cls.db.query(Task).filter_by(uuid=task_uuid).first()
        coeff = settings.PROVISIONING_PROGRESS_COEFF or 0.3
        if nodes and not progress:
            nodes_progress = []
            nodes_db = cls.db.query(Node).filter_by(
                cluster_id=task.cluster_id).all()
            for node in nodes_db:
                if node.status == "discover":
                    nodes_progress.append(0)
                elif not node.online:
                    nodes_progress.append(100)
                elif node.status in ['provisioning', 'provisioned'] or \
                        node.needs_reprovision:
                    nodes_progress.append(float(node.progress) * coeff)
                elif node.status in ['deploying', 'ready'] or \
                        node.needs_redeploy:
                    nodes_progress.append(100.0 * coeff +
                                          float(node.progress) * (1.0 - coeff))
            if nodes_progress:
                progress = int(
                    float(sum(nodes_progress)) / len(nodes_progress))

        # Let's check the whole task status
        if status in ('error', ):
            cls._error_action(task, status, progress, message)
        elif status in ('ready', ):
            cls._success_action(task, status, progress)
        else:
            TaskHelper.update_task_status(task.uuid, status, progress, message)
Ejemplo n.º 21
0
    def PUT(self):
        data = self.checked_data(
            self.validator.validate_collection_update
        )

        network_manager = NetworkManager()
        q = self.db.query(Node)
        nodes_updated = []
        for nd in data:
            is_agent = nd.pop("is_agent") if "is_agent" in nd else False
            node = None
            if "mac" in nd:
                node = q.filter_by(mac=nd["mac"]).first() \
                    or self.validator.validate_existent_node_mac_update(nd)
            else:
                node = q.get(nd["id"])
            if is_agent:
                node.timestamp = datetime.now()
                if not node.online:
                    node.online = True
                    msg = u"Node '{0}' is back online".format(
                        node.human_readable_name)
                    logger.info(msg)
                    notifier.notify("discover", msg, node_id=node.id)
                self.db.commit()
            if nd.get("cluster_id") is None and node.cluster:
                node.cluster.clear_pending_changes(node_id=node.id)
            old_cluster_id = node.cluster_id
            for key, value in nd.iteritems():
                if is_agent and (key, value) == ("status", "discover") \
                        and node.status == "provisioning":
                    # We don't update provisioning back to discover
                    logger.debug(
                        "Node is already provisioning - "
                        "status not updated by agent"
                    )
                    continue
                if key == "meta":
                    node.update_meta(value)
                else:
                    setattr(node, key, value)
            self.db.commit()
            if not node.attributes:
                node.attributes = NodeAttributes()
                self.db.commit()
            if not node.attributes.volumes:
                node.attributes.volumes = \
                    node.volume_manager.gen_volumes_info()
                self.db.commit()
            if not node.status in ('provisioning', 'deploying'):
                variants = (
                    "disks" in node.meta and
                    len(node.meta["disks"]) != len(
                        filter(
                            lambda d: d["type"] == "disk",
                            node.attributes.volumes
                        )
                    ),
                    "role" in nd,
                    "cluster_id" in nd
                )
                if any(variants):
                    try:
                        node.attributes.volumes = \
                            node.volume_manager.gen_volumes_info()
                        if node.cluster:
                            node.cluster.add_pending_changes(
                                "disks",
                                node_id=node.id
                            )
                    except Exception as exc:
                        msg = (
                            "Failed to generate volumes "
                            "info for node '{0}': '{1}'"
                        ).format(
                            node.name or data.get("mac") or data.get("id"),
                            str(exc) or "see logs for details"
                        )
                        logger.warning(traceback.format_exc())
                        notifier.notify("error", msg, node_id=node.id)

                self.db.commit()
            if is_agent:
                # Update node's NICs.
                if node.meta and 'interfaces' in node.meta:
                    # we won't update interfaces if data is invalid
                    network_manager.update_interfaces_info(node.id)

            nodes_updated.append(node)
            self.db.commit()
            if 'cluster_id' in nd and nd['cluster_id'] != old_cluster_id:
                if old_cluster_id:
                    network_manager.clear_assigned_networks(node.id)
                    network_manager.clear_all_allowed_networks(node.id)
                if nd['cluster_id']:
                    network_manager.allow_network_assignment_to_all_interfaces(
                        node.id
                    )
                    network_manager.assign_networks_to_main_interface(node.id)
        return map(NodeHandler.render, nodes_updated)