def run(self): super(FakeDeletionThread, self).run() receiver = NailgunReceiver kwargs = { 'task_uuid': self.task_uuid, 'nodes': self.data['args']['nodes'], 'status': 'ready' } nodes_to_restore = self.data['args'].get('nodes_to_restore', []) resp_method = getattr(receiver, self.respond_to) resp_method(**kwargs) recover_nodes = self.params.get("recover_nodes", True) if not recover_nodes: return for node_data in nodes_to_restore: # Offline node just deleted from db # and could not recreated with status # discover if "online" in node_data and not node_data["online"]: continue node_data["status"] = "discover" objects.Node.create(node_data) db().commit()
def message(cls, task): nodes_to_reset = db().query(Node).filter( Node.cluster_id == task.cluster.id ).yield_per(100) rpc_message = make_astute_message( task, "reset_environment", "reset_environment_resp", { "nodes": [ { 'uid': n.uid, 'roles': n.roles, 'slave_name': objects.Node.make_slave_name(n) } for n in nodes_to_reset ], "engine": { "url": settings.COBBLER_URL, "username": settings.COBBLER_USER, "password": settings.COBBLER_PASSWORD, "master_ip": settings.MASTER_IP, } } ) db().commit() return rpc_message
def message(cls, task, nodes_to_provisioning): logger.debug("ProvisionTask.message(task=%s)" % task.uuid) task = objects.Task.get_by_uid( task.id, fail_if_not_found=True, lock_for_update=True ) objects.NodeCollection.lock_nodes(nodes_to_provisioning) serialized_cluster = provisioning_serializers.serialize( task.cluster, nodes_to_provisioning) for node in nodes_to_provisioning: if settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP: continue admin_net_id = objects.Node.get_network_manager( node ).get_admin_network_group_id(node.id) TaskHelper.prepare_syslog_dir(node, admin_net_id) rpc_message = make_astute_message( task, 'provision', 'provision_resp', { 'provisioning_info': serialized_cluster } ) db().commit() return rpc_message
def update_verify_networks(cls, uuid, status, progress, msg, result): #TODO(dshulyak) move network tests into ostf task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s': No task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status statuses = [sub.status for sub in task.subtasks] messages = [sub.message for sub in task.subtasks] messages.append(msg) statuses.append(status) if any(st == 'error' for st in statuses): task.status = 'error' else: task.status = status or task.status task.progress = progress or task.progress task.result = result or task.result # join messages if not None or "" task.message = '\n'.join([m for m in messages if m]) db().commit() if previous_status != task.status and task.cluster_id: logger.debug("Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid)
def message(cls, task, stop_task): nodes_to_stop = db().query(Node).filter( Node.cluster_id == task.cluster.id ).filter( not_(Node.status == 'ready') ).yield_per(100) rpc_message = make_astute_message( task, "stop_deploy_task", "stop_deployment_resp", { "stop_task_uuid": stop_task.uuid, "nodes": [ { 'uid': n.uid, 'roles': n.roles, 'slave_name': objects.Node.make_slave_name(n), 'admin_ip': objects.Node.get_network_manager( n ).get_admin_ip_for_node(n.id) } for n in nodes_to_stop ], "engine": { "url": settings.COBBLER_URL, "username": settings.COBBLER_USER, "password": settings.COBBLER_PASSWORD, "master_ip": settings.MASTER_IP, } } ) db().commit() return rpc_message
def PUT(self, cluster_id): """:returns: JSONized Cluster object. :http: * 200 (OK) * 400 (invalid cluster data specified) * 404 (cluster not found in db) """ cluster = self.get_object_or_404(Cluster, cluster_id) data = self.checked_data(cluster_id=cluster_id) network_manager = cluster.network_manager for key, value in data.iteritems(): if key == "nodes": # TODO(NAME): sepatate nodes #for deletion and addition by set(). new_nodes = db().query(Node).filter( Node.id.in_(value) ) nodes_to_remove = [n for n in cluster.nodes if n not in new_nodes] nodes_to_add = [n for n in new_nodes if n not in cluster.nodes] for node in nodes_to_add: if not node.online: raise web.badrequest( "Can not add offline node to cluster") map(cluster.nodes.remove, nodes_to_remove) map(cluster.nodes.append, nodes_to_add) for node in nodes_to_remove: network_manager.clear_assigned_networks(node) for node in nodes_to_add: network_manager.assign_networks_by_default(node) else: setattr(cluster, key, value) db().commit() return self.render(cluster)
def message(cls, task, nodes): logger.debug("%s.message(task=%s)", cls.__class__.__name__, task.uuid) for n in nodes: if n.pending_roles: n.roles += n.pending_roles n.pending_roles = [] n.status = 'provisioned' n.progress = 0 # here we replace deployment data if user redefined them serialized_cluster = deployment_serializers.serialize( task.cluster, nodes) # After serialization set pending_addition to False for node in nodes: node.pending_addition = False rpc_message = make_astute_message( task, 'deploy', 'deploy_resp', { 'deployment_info': serialized_cluster } ) db().commit() return rpc_message
def PUT(self, cluster_id): """:returns: JSONized Cluster attributes. :http: * 200 (OK) * 400 (wrong attributes data specified) * 404 (cluster not found in db) * 500 (cluster has no attributes) """ cluster = self.get_object_or_404(Cluster, cluster_id) if not cluster.attributes: raise web.internalerror("No attributes found!") data = self.checked_data() if cluster.is_locked: error = web.forbidden() error.data = "Environment attributes can't be changed " \ "after, or in deploy." raise error for key, value in data.iteritems(): setattr(cluster.attributes, key, value) cluster.add_pending_changes("attributes") db().commit() return {"editable": cluster.attributes.editable}
def PUT(self, cluster_id): """:returns: JSONized Cluster attributes. :http: * 200 (OK) * 400 (wrong attributes data specified) * 404 (cluster not found in db) * 500 (cluster has no attributes) """ cluster = self.get_object_or_404( Cluster, cluster_id, log_404=( "warning", "Error: there is no cluster " "with id '{0}' in DB.".format(cluster_id) ) ) if not cluster.attributes: logger.error('ClusterAttributesDefaultsHandler: no attributes' ' found for cluster_id %s' % cluster_id) raise web.internalerror("No attributes found!") cluster.attributes.editable = cluster.release.attributes_metadata.get( "editable" ) db().commit() cluster.add_pending_changes("attributes") logger.debug('ClusterAttributesDefaultsHandler:' ' editable attributes for cluster_id %s were reset' ' to default' % cluster_id) return {"editable": cluster.attributes.editable}
def PUT(self, cluster_id): cluster = self.get_object_or_404(Cluster, cluster_id) data = self.checked_data() network_manager = NetworkManager() for key, value in data.iteritems(): if key == "nodes": # Todo: sepatate nodes for deletion and addition by set(). new_nodes = db().query(Node).filter( Node.id.in_(value) ) nodes_to_remove = [n for n in cluster.nodes if n not in new_nodes] nodes_to_add = [n for n in new_nodes if n not in cluster.nodes] for node in nodes_to_add: if not node.online: raise web.badrequest( "Can not add offline node to cluster") map(cluster.nodes.remove, nodes_to_remove) map(cluster.nodes.append, nodes_to_add) for node in nodes_to_remove: network_manager.clear_assigned_networks(node.id) network_manager.clear_all_allowed_networks(node.id) for node in nodes_to_add: network_manager.allow_network_assignment_to_all_interfaces( node.id ) network_manager.assign_networks_to_main_interface(node.id) else: setattr(cluster, key, value) db().commit() return self.render(cluster)
def update(cls, role, data): role.name = data['name'] cls._update_release(role, data) db().flush() return role
def _remove_obsolete_tasks(self): cluster_tasks = objects.TaskCollection.get_cluster_tasks( cluster_id=self.cluster.id) current_tasks = objects.TaskCollection.filter_by( cluster_tasks, name=consts.TASK_NAMES.deploy) # locking cluster objects.Cluster.get_by_uid( self.cluster.id, fail_if_not_found=True, lock_for_update=True ) for task in current_tasks: if task.status in (consts.TASK_STATUSES.ready, consts.TASK_STATUSES.error): objects.Task.delete(task) db().flush() obsolete_tasks = objects.TaskCollection.filter_by_list( cluster_tasks, 'name', (consts.TASK_NAMES.stop_deployment, consts.TASK_NAMES.reset_environment) ) for task in obsolete_tasks: objects.Task.delete(task) db().flush()
def add_pending_changes(cls, instance, changes_type, node_id=None): """Add pending changes for current Cluster. If node_id is specified then links created changes with node. :param instance: Cluster instance :param changes_type: name of changes to add :param node_id: node id for changes :returns: None """ #TODO(enchantner): check if node belongs to cluster ex_chs = db().query(models.ClusterChanges).filter_by( cluster=instance, name=changes_type ) if not node_id: ex_chs = ex_chs.first() else: ex_chs = ex_chs.filter_by(node_id=node_id).first() # do nothing if changes with the same name already pending if ex_chs: return ch = models.ClusterChanges( cluster_id=instance.id, name=changes_type ) if node_id: ch.node_id = node_id db().add(ch) db().flush()
def __add_new_interface(cls, node, interface_attrs): interface = NodeNICInterface() interface.node_id = node.id cls.__set_interface_attributes(interface, interface_attrs) db().add(interface) db().commit() node.interfaces.append(interface)
def PUT(self): """:returns: node id. :http: * 200 (node are successfully updated) * 304 (node data not changed since last request) * 400 (data validation failed) * 404 (node not found) """ nd = self.checked_data( self.validator.validate_update, data=web.data()) node = self.collection.single.get_by_meta(nd) if not node: raise self.http(404, "Can't find node: {0}".format(nd)) node.timestamp = datetime.now() if not node.online: node.online = True msg = u"Node '{0}' is back online".format(node.human_readable_name) logger.info(msg) notifier.notify("discover", msg, node_id=node.id) db().flush() if 'agent_checksum' in nd and ( node.agent_checksum == nd['agent_checksum'] ): return {'id': node.id, 'cached': True} self.collection.single.update_by_agent(node, nd) return {"id": node.id}
def attach_to_model(cls, graph_instance, instance, graph_type=None): """Attach existing deployment graph to given model. graph_type is working like unique namespace and if there are existing graph with this type attached to model it will be replaced. :param graph_instance: deployment graph model :type graph_instance: models.DeploymentGraph :param instance: model that should have relation to graph :type instance: models.Plugin|models.Cluster|models.Release| :param graph_type: graph type :type graph_type: basestring :return: graph instance :rtype: models.DeploymentGraph :raises: IntegrityError """ if graph_type is None: graph_type = consts.DEFAULT_DEPLOYMENT_GRAPH_TYPE association_class = cls.get_association_for_model(instance) if association_class: association = association_class( type=graph_type, deployment_graph_id=graph_instance.id ) instance.deployment_graphs_assoc.append(association) db().flush()
def create(cls, data): """Create DeploymentTask model. :param data: task data :type data: dict :return: DeploymentGraphTask instance :rtype: DeploymentGraphTask """ db_fields = set(c.name for c in cls.model.__table__.columns) data_to_create = {} custom_fields = {} # fields that is not in table for field, value in six.iteritems(data): # pack string roles to be [role] if field in ('role', 'groups') and \ isinstance(value, six.string_types): value = [value] # remap fields if field in cls._incoming_fields_map: data_to_create[cls._incoming_fields_map[field]] = value else: if field in db_fields: data_to_create[field] = value else: custom_fields[field] = value # wrap custom fields if custom_fields: data_to_create['_custom'] = custom_fields # todo(ikutukov): super for this create method is not called to avoid # force flush in base method. deployment_task_instance = models.DeploymentGraphTask(**data_to_create) db().add(deployment_task_instance) return deployment_task_instance
def update_slave_nodes_fqdn(cls, nodes): for n in nodes: fqdn = cls.make_slave_fqdn(n.id) if n.fqdn != fqdn: n.fqdn = fqdn logger.debug("Updating node fqdn: %s %s", n.id, n.fqdn) db().commit()
def update(cls, instance, data): """Create DeploymentGraph and related DeploymentGraphTask models. It is possible to create empty graphs if not tasks data provided. :param instance: DeploymentGraph instance :type instance: DeploymentGraph :param data: data to update :type data: dict :returns: instance of new DeploymentGraphModel :rtype: DeploymentGraphModel """ data = data.copy() tasks = data.pop('tasks', None) super(DeploymentGraph, cls).update(instance, data) if tasks is not None: instance.tasks = [] # flush is required to avoid task.id+graph.id key conflicts db().flush() for task in tasks: instance.tasks.append( DeploymentGraphTask.create(task)) db().flush() return instance
def get_ifaces_for_network_in_cluster( cls, instance, net): """Method for receiving node_id:iface pairs for all nodes in specific cluster :param instance: Cluster instance :param net: Nailgun specific network name :type net: str :returns: List of node_id, iface pairs for all nodes in cluster. """ nics_db = db().query( models.NodeNICInterface.node_id, models.NodeNICInterface.name ).filter( models.NodeNICInterface.node.has(cluster_id=instance.id), models.NodeNICInterface.assigned_networks_list.any(name=net) ) bonds_db = db().query( models.NodeBondInterface.node_id, models.NodeBondInterface.name ).filter( models.NodeBondInterface.node.has(cluster_id=instance.id), models.NodeBondInterface.assigned_networks_list.any(name=net) ) return nics_db.union(bonds_db)
def update_task_status(cls, uuid, status, progress, msg="", result=None): # verify_networks - task is expecting to receive result with # some data if connectivity_verification fails logger.debug("Updating task: %s", uuid) task = db().query(Task).filter_by(uuid=uuid).first() if not task: logger.error("Can't set status='%s', message='%s':no task \ with UUID %s found!", status, msg, uuid) return previous_status = task.status data = {'status': status, 'progress': progress, 'message': msg, 'result': result} for key, value in data.iteritems(): if value is not None: setattr(task, key, value) logger.info( u"Task {0} ({1}) {2} is set to {3}".format( task.uuid, task.name, key, value ) ) db().add(task) db().commit() if previous_status != status and task.cluster_id: logger.debug("Updating cluster status: " "cluster_id: %s status: %s", task.cluster_id, status) cls.update_cluster_status(uuid) if task.parent: logger.debug("Updating parent task: %s.", task.parent.uuid) cls.update_parent_task(task.parent.uuid)
def remove_from_cluster(cls, instance): """Remove Node from Cluster. Also drops networks assignment for Node and clears both roles and pending roles :param instance: Node instance :returns: None """ if instance.cluster: Cluster.clear_pending_changes( instance.cluster, node_id=instance.id ) netmanager = Cluster.get_network_manager( instance.cluster ) netmanager.clear_assigned_networks(instance) netmanager.clear_bond_configuration(instance) cls.update_roles(instance, []) cls.update_pending_roles(instance, []) cls.remove_replaced_params(instance) instance.cluster_id = None instance.group_id = None instance.kernel_params = None instance.reset_name_to_default() db().flush() db().refresh(instance)
def get_interface_by_net_name(cls, node_id, netname): """Get interface with specified network assigned to it. This method first checks for a NodeNICInterface with the specified network assigned. If that fails it will look for a NodeBondInterface with that network assigned. :param instance_id: Node ID :param netname: NetworkGroup name :returns: either NodeNICInterface or NodeBondInterface """ iface = db().query(models.NodeNICInterface).join( (models.NetworkGroup, models.NodeNICInterface.assigned_networks_list) ).filter( models.NetworkGroup.name == netname ).filter( models.NodeNICInterface.node_id == node_id ).first() if iface: return iface return db().query(models.NodeBondInterface).join( (models.NetworkGroup, models.NodeBondInterface.assigned_networks_list) ).filter( models.NetworkGroup.name == netname ).filter( models.NodeBondInterface.node_id == node_id ).first()
def update(cls, cluster, network_configuration): from nailgun.network.neutron import NeutronManager network_manager = NeutronManager() if 'networks' in network_configuration: for ng in network_configuration['networks']: if ng['id'] == network_manager.get_admin_network_group_id(): continue ng_db = db().query(NetworkGroup).get(ng['id']) for key, value in ng.iteritems(): if key == "ip_ranges": cls._set_ip_ranges(ng['id'], value) else: if key == 'cidr' and \ not ng['name'] in ('private',): network_manager.update_ranges_from_cidr( ng_db, value) setattr(ng_db, key, value) if ng['name'] != 'private': network_manager.create_networks(ng_db) ng_db.cluster.add_pending_changes('networks') if 'neutron_parameters' in network_configuration: for key, value in network_configuration['neutron_parameters'] \ .items(): setattr(cluster.neutron_config, key, value) db().add(cluster.neutron_config) db().commit()
def get_networks_to_interfaces_mapping_on_all_nodes(cls, cluster): """Query networks to interfaces mapping on all nodes in cluster. Returns combined results for NICs and bonds for every node. Names are returned for node and interface (NIC or bond), IDs are returned for networks. Results are sorted by node name then interface name. """ nodes_nics_networks = db().query( models.Node.hostname, models.NodeNICInterface.name, models.NetworkGroup.id, ).join( models.Node.nic_interfaces, models.NodeNICInterface.assigned_networks_list ).filter( models.Node.cluster_id == cluster.id, ) nodes_bonds_networks = db().query( models.Node.hostname, models.NodeBondInterface.name, models.NetworkGroup.id, ).join( models.Node.bond_interfaces, models.NodeBondInterface.assigned_networks_list ).filter( models.Node.cluster_id == cluster.id, ) return nodes_nics_networks.union( nodes_bonds_networks ).order_by( # column 1 then 2 from the result. cannot call them by name as # names for column 2 are different in this union '1', '2' )
def test_astute_message_creation(self, mmake_astute_message, mrpc): # 'discover' node is not deployed yet -- it will be removed # immediately n_discover = self.add_node(consts.NODE_STATUSES.discover) # 'ready' node is deployed -- astute will take care of it self.add_node(consts.NODE_STATUSES.ready) # 'offline' node will also be passed to astute self.add_node(consts.NODE_STATUSES.ready, online=False) nodes = DeletionTask.get_task_nodes_for_cluster(self.cluster_db) astute_nodes = [node for node in nodes['nodes_to_delete'] if node['id'] != n_discover.id] self.assertEqual(len(nodes['nodes_to_delete']), 3) self.assertEqual(len(nodes['nodes_to_restore']), 0) task = models.Task( name=consts.TASK_NAMES.cluster_deletion, cluster=self.cluster_db ) db().add(task) db().commit() DeletionTask.execute(task, nodes=nodes) self.assertEqual(mmake_astute_message.call_count, 1) message = mmake_astute_message.call_args[0][3] self.assertIn('nodes', message) self.assertItemsEqual(message['nodes'], astute_nodes) self.assertEqual(mrpc.cast.call_count, 1)
def test_undeployed_node_called( self, mremove_undeployed_nodes_from_db, mmake_astute_message, mrpc): self.add_node(consts.NODE_STATUSES.discover) nodes = DeletionTask.get_task_nodes_for_cluster(self.cluster_db) self.assertEqual(len(nodes['nodes_to_delete']), 1) self.assertEqual(len(nodes['nodes_to_restore']), 0) task = models.Task( name=consts.TASK_NAMES.cluster_deletion, cluster=self.cluster_db ) db().add(task) db().commit() mremove_undeployed_nodes_from_db.return_value = [] DeletionTask.execute(task, nodes=nodes) mremove_undeployed_nodes_from_db.assert_called_once_with( nodes['nodes_to_delete']) self.assertEqual(mmake_astute_message.call_count, 1) self.assertEqual(mrpc.cast.call_count, 1)
def update_roles(cls, instance, new_roles): """Update roles for Node instance. Logs an error if node doesn't belong to Cluster :param instance: Node instance :param new_roles: list of new role names :returns: None """ if not instance.cluster_id: logger.warning( u"Attempting to assign roles to node " u"'{0}' which isn't added to cluster".format( instance.name or instance.id ) ) return if new_roles: instance.role_list = db().query(models.Role).filter_by( release_id=instance.cluster.release_id, ).filter( models.Role.name.in_(new_roles) ).all() else: instance.role_list = [] db().flush() db().refresh(instance)
def expose_network_check_error_messages(task, result, err_messages): if err_messages: task.result = result db().add(task) db().commit() full_err_msg = u"\n".join(err_messages) raise errors.NetworkCheckError(full_err_msg, add_client=False)
def update_attributes(cls, instance, data): PluginManager.process_cluster_attributes(instance, data['editable']) for key, value in data.iteritems(): setattr(instance.attributes, key, value) cls.add_pending_changes(instance, "attributes") db().flush()
def setUpClass(cls): cls.db = db() cls.app = TestApp(build_app().wsgifunc()) syncdb()
def DELETE(self, release_id): release = self.get_object_or_404(Release, release_id) db().delete(release) db().commit() raise web.webapi.HTTPError(status="204 No Content", data="")
def update_extensions_for_object(obj, extensions_names): obj.extensions = list(set(obj.extensions + extensions_names)) db().flush()
def remove_extensions_from_object(obj, extensions_names): obj.extensions = list(set(obj.extensions) - set(extensions_names)) db().flush()
def PUT(self): #这个函数是个定时执行函数,用来监听所有节点的变化. #120s调用一次 """:returns: node id. :http: * 200 (node are successfully updated) * 304 (node data not changed since last request) * 400 (invalid nodes data specified) * 404 (node not found) """ data = jsonutils.loads(web.data()) meta = data.get('meta', {}) ip = data.get("ip") nodeinstance = self.collection.single.get_by_ip(ip) if nodeinstance is None: logger.info("数据库中不存在ip为 {0} 的机器,使用powerip查询..".format(ip)) #discover状态发送put请求的时候执行. nodeinstance = self.collection.single.get_by_powerip(ip) if nodeinstance is None: logger.info("数据库中不存在ip和powerip为 {0} 的机器,使用mac查询..".format(ip)) #重置环境不会删除表中的nodes的数据,但会提交数据到此处,执行新增会引发mac冲突 self.collection.single.copyfile_to_agent(data["ip"]) data["nochange_powerip"] = True nodeinstance = self.collection.single.get_by_mac_or_uid( data['mac']) if nodeinstance is None: logger.info("数据库中不存在当前提交过来的node数据,调用新增节点函数...") self.collection.single.create(data) return data['mac'] = nodeinstance.mac #ovsbind会提交过来一个不存在的mac地址 # don't update interfaces information, if agent has sent an empty array # 删除集群后,重新发现节点是通过此函数,但是由于删除集群会先删除nodes表中的数据 # 导致此处会出现异常,nodeinstance会为None if meta and len( meta.get('interfaces', [])) == 0 and nodeinstance.meta.get('interfaces'): logger.warning( u'Node {0} has received an empty interfaces array - ' u'interfaces information will not be updated'.format( nodeinstance.human_readable_name)) meta['interfaces'] = nodeinstance.meta['interfaces'] nd = self.checked_data(self.validator.validate_update, data=jsonutils.dumps(data)) node = self.collection.single.get_by_meta(nd) if not node: raise self.http(404, "Can't find node: {0}".format(nd)) node.timestamp = datetime.now() #test copyfile #self.collection.single.copyfile_to_agent(node) if not node.online: #判断如果节点处于离线状态 node.online = True msg = u"节点 '{0}' 已经重新上线".format(node.name) logger.info(msg) notifier.notify("discover", msg, node_id=node.id) db().flush() if 'agent_checksum' in nd and (node.agent_checksum == nd['agent_checksum']): return {'id': node.id, 'cached': True} self.collection.single.update_by_agent(node, nd) return {"id": node.id}
def verify_data_correctness(cls, node): db_node = db().query(Node).filter_by(id=node['id']).first() if not db_node: raise errors.InvalidData( "There is no node with ID '{0}' in DB".format(node['id']), log_message=True) if objects.Node.is_interfaces_configuration_locked(db_node): raise errors.InvalidData( "Node '{0}': Interfaces configuration can't be changed after " "or during deployment.".format(db_node.id)) interfaces = node['interfaces'] db_interfaces = db_node.nic_interfaces net_manager = objects.Cluster.get_network_manager(db_node.cluster) bonded_eth_ids = set() pxe_iface_name = net_manager._get_pxe_iface_name(db_node) if not pxe_iface_name: raise errors.InvalidData( "Node '{0}': Interfaces configuration can't be changed if" "there is no pxe interface in DB".format(node['id']), log_message=True) for iface in interfaces: iface_nets = [ n.get('name') for n in iface.get('assigned_networks') ] # networks can be assigned only to nodes added # into cluster if iface_nets and not db_node.cluster: raise errors.InvalidData( "Node '{0}': networks {1} cannot be assigned " "as the node is not added to any cluster".format( node['id'], ", ".join(iface_nets))) if iface['type'] == consts.NETWORK_INTERFACE_TYPES.ether: db_iface = next( six.moves.filter(lambda i: i.id == iface['id'], db_interfaces), None) if not db_iface: raise errors.InvalidData( "Node '{0}': there is no interface with ID '{1}'" " in DB".format(node['id'], iface['id']), log_message=True) if not db_iface.pxe: if consts.NETWORKS.fuelweb_admin in iface_nets: raise errors.InvalidData( "Node '{0}': admin network can not be assigned to" " non-pxe interface {1}".format( node['id'], iface['name']), log_message=True) elif iface['type'] == consts.NETWORK_INTERFACE_TYPES.bond: pxe_iface_present = False for slave in iface['slaves']: iface_id = [ i.id for i in db_interfaces if i.name == slave['name'] ] if slave["name"] == pxe_iface_name: pxe_iface_present = True if iface_id: if iface_id[0] in bonded_eth_ids: raise errors.InvalidData( "Node '{0}': interface '{1}' is used in bonds " "more than once".format( node['id'], iface_id[0]), log_message=True) bonded_eth_ids.add(iface_id[0]) else: raise errors.InvalidData( "Node '{0}': there is no interface '{1}' found " "for bond '{2}' in DB".format( node['id'], slave['name'], iface['name']), log_message=True) if consts.NETWORKS.fuelweb_admin in iface_nets: prohibited_modes = net_manager.\ get_prohibited_admin_bond_modes() bond_mode = cls.get_bond_mode(iface) if bond_mode in prohibited_modes: raise errors.InvalidData( "Node '{0}': interface '{1}' belongs to " "admin network and has lacp mode '{2}'".format( node['id'], iface['name'], bond_mode), log_message=True) if not pxe_iface_present: raise errors.InvalidData( "Node '{0}': interface '{1}' belongs to " "admin network and doesn't contain node's pxe " "interface '{2}'".format(node['id'], iface['name'], pxe_iface_name), log_message=True) for iface in interfaces: if iface['type'] == consts.NETWORK_INTERFACE_TYPES.ether \ and iface['id'] in bonded_eth_ids \ and len(iface['assigned_networks']) > 0: raise errors.InvalidData( "Node '{0}': interface '{1}' cannot have " "assigned networks as it is used in " "bond".format(node['id'], iface['id']), log_message=True) cls.check_networks_are_acceptable_for_node_to_assign( interfaces, db_node)
def execute(self, graphs, dry_run=False, noop_run=False, force=False, debug=False, subgraphs=None): """Start a new transaction with a given parameters. Under the hood starting a new transaction means serialize a lot of stuff and assemble an Astute message. So at the end of method we either send an Astute message with execution flow or mark transaction as failed. :param graphs: a list of graph type to be run on a given nodes :param dry_run: run a new transaction in dry run mode :param noop_run: run a new transaction in noop run mode :param force: re-evaluate tasks's conditions as it's a first run :param debug: enable debug mode for tasks executor """ logger.info( 'Start new transaction: ' 'cluster=%d graphs=%s dry_run=%d noop_run=%s force=%d ', self.cluster_id, graphs, dry_run, noop_run, force) # So far we don't support parallel execution of transactions within # one cluster. So we need to fail quickly in there's transaction # in-progress. cluster = self._acquire_cluster() # Unfortunately, by historical reasons UI polls 'deployment' tasks # for cluster and expects there's only one. That one is considered # as latest and is used for tracking progress and showing error # message. So we have came up with the following workaround: # # * each new transaction we mark previous ones as deleted # * /tasks endpoint doesn't return "deleted" transactions in response # * /transactions endpoint does return "deleted" transactions # # FIXME: We must provide a way to get latest transaction with its # sub-transactions via API. Once it's done, and UI uses it - # we can safely remove this workaround. _remove_obsolete_tasks(cluster) transaction = objects.Transaction.create({ 'name': consts.TASK_NAMES.deploy, 'cluster_id': self.cluster_id, 'status': consts.TASK_STATUSES.running, 'dry_run': dry_run or noop_run, }) objects.Transaction.on_start(transaction) helpers.TaskHelper.create_action_log(transaction) for graph in graphs: # 'dry_run' flag is a part of transaction, so we can restore its # value anywhere. That doesn't apply to 'force' flag, because it # affects only context calculation. However we need somehow to # pass it down in order to build context once first graph # is executed (much much latter, when we call continue_ in RPC # receiver). cache = graph.copy() cache['force'] = force cache['noop_run'] = noop_run cache['dry_run'] = dry_run cache['debug'] = debug cache['subgraphs'] = subgraphs transaction.create_subtask( self.task_name, status=consts.TASK_STATUSES.pending, dry_run=dry_run or noop_run, graph_type=graph['type'], # We need to save input parameters in cache, so RPC receiver # can use them to do further serialization. # # FIXME: Consider to use a separate set of columns. cache=cache, ) # We need to commit transaction because asynchronous call below might # be executed in separate process or thread. db().commit() self.continue_(transaction) return transaction
def test_unknown_locks_chain_failed(self): db().query(Release).with_lockmode('update').all() self.assertRaises(LockTransitionNotAllowedError, db().query(Node).with_lockmode, 'update') db().rollback() db().query(Task).with_lockmode('update').all() db().query(Cluster).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() self.assertRaises(LockTransitionNotAllowedError, db().query(Task).with_lockmode, 'update') db().rollback()
def tearDown(self): super(TestDeadlockDetector, self).tearDown() db().rollback()
def test_locks_chain(self): db().query(Cluster).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() db().commit() db().query(Cluster).with_lockmode('update').all() db().query(Cluster).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() db().commit()
def move_roles_to_pending_roles(cls, instance): """Move roles to pending_roles""" instance.pending_roles = instance.pending_roles + instance.roles instance.roles = [] instance.primary_roles = [] db().flush()
def test_lock_cleaned_on_rollback(self): db().query(Cluster).with_lockmode('update').all() db().query(Node).with_lockmode('update').all() self.assertTrue(len(deadlock_detector.context.locks) > 0) db().rollback() self.assertEquals(0, len(deadlock_detector.context.locks))
def update(cls, instance, data): """Update Node instance with specified parameters in DB. This includes: * adding node to Cluster (if cluster_id is not None in data) \ (see :func:`add_into_cluster`) * updating roles for Node if it belongs to Cluster \ (see :func:`update_roles` and :func:`update_pending_roles`) * removing node from Cluster (if cluster_id is None in data) \ (see :func:`remove_from_cluster`) * updating interfaces for Node in DB (see :func:`update_interfaces`) * creating default Node attributes (see :func:`create_attributes`) :param data: dictionary of key-value pairs as object fields :returns: Node instance """ data.pop("id", None) data.pop("network_data", None) roles = data.pop("roles", None) pending_roles = data.pop("pending_roles", None) new_meta = data.pop("meta", None) update_by_agent = data.pop("is_agent", False) disks_changed = None if new_meta and "disks" in new_meta and "disks" in instance.meta: key = operator.itemgetter("name") new_disks = sorted(new_meta["disks"], key=key) old_disks = sorted(instance.meta["disks"], key=key) disks_changed = (new_disks != old_disks) # TODO(enchantner): fix this temporary hack in clients if "cluster_id" not in data and "cluster" in data: cluster_id = data.pop("cluster", None) data["cluster_id"] = cluster_id if new_meta: instance.update_meta(new_meta) # The call to update_interfaces will execute a select query for # the current instance. This appears to overwrite the object in the # current session and we lose the meta changes. db().flush() if cls.hardware_info_locked(instance): logger.debug("Interfaces are locked for update on node %s", instance.human_readable_name) else: instance.ip = data.pop("ip", None) or instance.ip instance.mac = data.pop("mac", None) or instance.mac db().flush() cls.update_interfaces(instance, update_by_agent) cluster_changed = False add_to_cluster = False if "cluster_id" in data: new_cluster_id = data.pop("cluster_id") if instance.cluster_id: if new_cluster_id is None: # removing node from cluster cluster_changed = True cls.remove_from_cluster(instance) elif new_cluster_id != instance.cluster_id: # changing node cluster to another # (is currently not allowed) raise errors.CannotUpdate( u"Changing cluster on the fly is not allowed") else: if new_cluster_id is not None: # assigning node to cluster cluster_changed = True add_to_cluster = True instance.cluster_id = new_cluster_id if "group_id" in data: new_group_id = data.pop("group_id") if instance.group_id != new_group_id: nm = Cluster.get_network_manager(instance.cluster) nm.clear_assigned_networks(instance) nm.clear_bond_configuration(instance) instance.group_id = new_group_id add_to_cluster = True # calculating flags roles_changed = (roles is not None and set(roles) != set(instance.roles)) pending_roles_changed = ( pending_roles is not None and set(pending_roles) != set(instance.pending_roles)) super(Node, cls).update(instance, data) if roles_changed: cls.update_roles(instance, roles) if pending_roles_changed: cls.update_pending_roles(instance, pending_roles) if add_to_cluster: cls.add_into_cluster(instance, instance.cluster_id) if any(( roles_changed, pending_roles_changed, cluster_changed, disks_changed, )) and instance.status not in (consts.NODE_STATUSES.provisioning, consts.NODE_STATUSES.deploying): # TODO(eli): we somehow should move this # condition into extension, in order to do # that probably we will have to create separate # table to keep disks which were used to create # volumes mapping. # Should be solved as a part of blueprint # https://blueprints.launchpad.net/fuel/+spec # /volume-manager-refactoring fire_callback_on_node_update(instance) return instance
def get_nic_by_name(cls, instance, iface_name): nic = db().query(models.NodeNICInterface).filter_by( name=iface_name).filter_by(node_id=instance.id).first() return nic
def upload_fixture(fileobj, loader=None): fixture = load_fixture(fileobj, loader) queue = Queue.Queue() keys = {} for obj in fixture: pk = obj['pk'] model_name = obj["model"].split(".")[1] try: itertools.dropwhile(lambda m: not hasattr(models, m), [ model_name.capitalize(), "".join( map(lambda n: n.capitalize(), model_name.split("_"))) ]).next() except StopIteration: raise Exception("Couldn't find model {0}".format(model_name)) obj['model'] = getattr(models, capitalize_model_name(model_name)) keys[obj['model'].__tablename__] = {} # Check if it's already uploaded obj_from_db = db().query(obj['model']).get(pk) if obj_from_db: logger.info( "Fixture model '%s' with pk='%s' already" " uploaded. Skipping", model_name, pk) continue queue.put(obj) pending_objects = [] while True: try: obj = queue.get_nowait() except Exception: break new_obj = obj['model']() fk_fields = {} for field, value in obj["fields"].iteritems(): f = getattr(obj['model'], field) impl = getattr(f, 'impl', None) fk_model = None try: if hasattr(f.comparator.prop, "argument"): if hasattr(f.comparator.prop.argument, "__call__"): fk_model = f.comparator.prop.argument() else: fk_model = f.comparator.prop.argument.class_ except AttributeError: pass if fk_model: if value not in keys[fk_model.__tablename__]: if obj not in pending_objects: queue.put(obj) pending_objects.append(obj) continue else: logger.error(u"Can't resolve foreign key " "'{0}' for object '{1}'".format( field, obj["model"])) break else: value = keys[fk_model.__tablename__][value].id if isinstance(impl, orm.attributes.ScalarObjectAttributeImpl): if value: fk_fields[field] = (value, fk_model) elif isinstance(impl, orm.attributes.CollectionAttributeImpl): if value: fk_fields[field] = (value, fk_model) elif hasattr(f, 'property') and isinstance( f.property.columns[0].type, sqlalchemy.types.DateTime): if value: setattr(new_obj, field, datetime.strptime(value, "%d-%m-%Y %H:%M:%S")) else: setattr(new_obj, field, datetime.now()) else: setattr(new_obj, field, value) for field, data in fk_fields.iteritems(): if isinstance(data[0], int): setattr(new_obj, field, db().query(data[1]).get(data[0])) elif isinstance(data[0], list): for v in data[0]: getattr(new_obj, field).append(db().query(data[1]).get(v)) db().add(new_obj) db().commit() keys[obj['model'].__tablename__][obj["pk"]] = new_obj # UGLY HACK for testing if new_obj.__class__.__name__ == 'Node': objects.Node.create_attributes(new_obj) objects.Node.update_volumes(new_obj) objects.Node.update_interfaces(new_obj) db().commit()
def set_network_template(cls, instance): template = instance.cluster.network_config.configuration_template cls.apply_network_template(instance, template) db().flush()
def PUT(self): """:returns: Collection of JSONized Node objects. :http: * 200 (nodes are successfully updated) * 400 (invalid nodes data specified) """ data = self.checked_data(self.validator.validate_collection_update) network_manager = NetworkManager() q = db().query(Node) nodes_updated = [] for nd in data: is_agent = nd.pop("is_agent") if "is_agent" in nd else False node = None if "mac" in nd: node = q.filter_by(mac=nd["mac"]).first() \ or self.validator.validate_existent_node_mac_update(nd) else: node = q.get(nd["id"]) if is_agent: node.timestamp = datetime.now() if not node.online: node.online = True msg = u"Node '{0}' is back online".format( node.human_readable_name) logger.info(msg) notifier.notify("discover", msg, node_id=node.id) db().commit() old_cluster_id = node.cluster_id if nd.get("pending_roles") == [] and node.cluster: node.cluster.clear_pending_changes(node_id=node.id) if "cluster_id" in nd: if nd["cluster_id"] is None and node.cluster: node.cluster.clear_pending_changes(node_id=node.id) node.roles = node.pending_roles = [] node.cluster_id = nd["cluster_id"] for key, value in nd.iteritems(): if is_agent and (key, value) == ("status", "discover") \ and node.status == "provisioning": # We don't update provisioning back to discover logger.debug("Node is already provisioning - " "status not updated by agent") continue if key == "meta": node.update_meta(value) else: setattr(node, key, value) db().commit() if not node.attributes: node.attributes = NodeAttributes() db().commit() if not node.attributes.volumes: node.attributes.volumes = \ node.volume_manager.gen_volumes_info() db().commit() if not node.status in ('provisioning', 'deploying'): variants = ("disks" in node.meta and len(node.meta["disks"]) != len( filter(lambda d: d["type"] == "disk", node.attributes.volumes)), "roles" in nd, "cluster_id" in nd) if any(variants): try: node.attributes.volumes = \ node.volume_manager.gen_volumes_info() if node.cluster: node.cluster.add_pending_changes("disks", node_id=node.id) except Exception as exc: msg = ("Failed to generate volumes " "info for node '{0}': '{1}'").format( node.name or data.get("mac") or data.get("id"), str(exc) or "see logs for details") logger.warning(traceback.format_exc()) notifier.notify("error", msg, node_id=node.id) db().commit() if is_agent: # Update node's NICs. if node.meta and 'interfaces' in node.meta: # we won't update interfaces if data is invalid network_manager.update_interfaces_info(node.id) nodes_updated.append(node) db().commit() if 'cluster_id' in nd and nd['cluster_id'] != old_cluster_id: if old_cluster_id: network_manager.clear_assigned_networks(node) network_manager.clear_all_allowed_networks(node.id) if nd['cluster_id']: network_manager.allow_network_assignment_to_all_interfaces( node) network_manager.assign_networks_to_main_interface(node) # we need eagerload everything that is used in render nodes = db().query(Node).options( joinedload('cluster'), joinedload('interfaces'), joinedload('interfaces.assigned_networks')).\ filter(Node.id.in_([n.id for n in nodes_updated])).all() return self.render(nodes)
def create(cls, data): """Create Node instance with specified parameters in DB. This includes: * generating its name by MAC (if name is not specified in data) * adding node to Cluster (if cluster_id is not None in data) \ (see :func:`add_into_cluster`) with specified roles \ (see :func:`update_roles` and :func:`update_pending_roles`) * creating interfaces for Node in DB (see :func:`update_interfaces`) * creating default Node attributes (see :func:`create_attributes`) * creating Notification about newly discovered Node \ (see :func:`create_discover_notification`) :param data: dictionary of key-value pairs as object fields :returns: Node instance """ if "name" not in data: data["name"] = "Untitled ({0})".format(data['mac'][-5:].lower()) data["timestamp"] = datetime.now() data.pop("id", None) # TODO(enchantner): fix this temporary hack in clients if "cluster_id" not in data and "cluster" in data: cluster_id = data.pop("cluster", None) data["cluster_id"] = cluster_id roles = data.pop("roles", None) pending_roles = data.pop("pending_roles", None) primary_roles = data.pop("primary_roles", None) new_node_meta = data.pop("meta", {}) new_node_cluster_id = data.pop("cluster_id", None) new_node = super(Node, cls).create(data) new_node.create_meta(new_node_meta) if 'hostname' not in data: new_node.hostname = \ cls.get_unique_hostname(new_node, new_node_cluster_id) db().flush() # Add interfaces for node from 'meta'. if new_node.meta and new_node.meta.get('interfaces'): cls.update_interfaces(new_node) # adding node into cluster if new_node_cluster_id: cls.add_into_cluster(new_node, new_node_cluster_id) # updating roles if roles is not None: cls.update_roles(new_node, roles) if pending_roles is not None: cls.update_pending_roles(new_node, pending_roles) if primary_roles is not None: cls.update_primary_roles(new_node, primary_roles) # creating attributes cls.create_attributes(new_node) cls.create_discover_notification(new_node) if new_node.ip: cls.check_ip_belongs_to_any_admin_network(new_node) fire_callback_on_node_create(new_node) return new_node
def all(cls): """Get all instances of this object (model) :returns: iterable (SQLAlchemy query) """ return db().query(cls.single.model)
def reset_nodes_timestamp(self): db().query(Node).update({'timestamp': datetime.now()}) db().commit()
def set_network_template(cls, instance, template): instance.network_config.configuration_template = template cls.update_nodes_network_template(instance, instance.nodes) db().flush()
def POST(self): """:returns: JSONized Node object. :http: * 201 (cluster successfully created) * 400 (invalid node data specified) * 403 (node has incorrect status) * 409 (node with such parameters already exists) """ data = self.checked_data() if data.get("status", "") != "discover": error = web.forbidden() error.data = "Only bootstrap nodes are allowed to be registered." msg = u"Node with mac '{0}' was not created, " \ u"because request status is '{1}'."\ .format(data[u'mac'], data[u'status']) logger.warning(msg) raise error node = Node() if "cluster_id" in data: # FIXME(vk): this part is needed only for tests. Normally, # nodes are created only by agent and POST requests don't contain # cluster_id, but our integration and unit tests widely use it. # We need to assign cluster first cluster_id = data.pop("cluster_id") if cluster_id: node.cluster = db.query(Cluster).get(cluster_id) for key, value in data.iteritems(): if key == "id": continue elif key == "meta": node.create_meta(value) else: setattr(node, key, value) node.name = "Untitled (%s)" % data['mac'][-5:] node.timestamp = datetime.now() db().add(node) db().commit() node.attributes = NodeAttributes() try: node.attributes.volumes = node.volume_manager.gen_volumes_info() if node.cluster: node.cluster.add_pending_changes("disks", node_id=node.id) except Exception as exc: msg = (u"Failed to generate volumes " "info for node '{0}': '{1}'").format( node.name or data.get("mac") or data.get("id"), str(exc) or "see logs for details") logger.warning(traceback.format_exc()) notifier.notify("error", msg, node_id=node.id) db().add(node) db().commit() network_manager = NetworkManager() # Add interfaces for node from 'meta'. if node.meta and node.meta.get('interfaces'): network_manager.update_interfaces_info(node.id) if node.cluster_id: network_manager.allow_network_assignment_to_all_interfaces(node) network_manager.assign_networks_to_main_interface(node) try: # we use multiplier of 1024 because there are no problems here # with unfair size calculation ram = str( round(float(node.meta['memory']['total']) / 1073741824, 1)) + " GB RAM" except Exception as exc: logger.warning(traceback.format_exc()) ram = "unknown RAM" try: # we use multiplier of 1000 because disk vendors specify HDD size # in terms of decimal capacity. Sources: # http://knowledge.seagate.com/articles/en_US/FAQ/172191en # http://physics.nist.gov/cuu/Units/binary.html hd_size = round( float( sum([d["size"] for d in node.meta["disks"]]) / 1000000000), 1) # if HDD > 100 GB we show it's size in TB if hd_size > 100: hd_size = str(hd_size / 1000) + " TB HDD" else: hd_size = str(hd_size) + " GB HDD" except Exception as exc: logger.warning(traceback.format_exc()) hd_size = "unknown HDD" cores = str(node.meta.get('cpu', {}).get('total', "unknown")) notifier.notify("discover", "New node is discovered: %s CPUs / %s / %s " % (cores, ram, hd_size), node_id=node.id) raise web.webapi.created(json.dumps(NodeHandler.render(node), indent=4))
def create_default_group(cls, instance): node_group = models.NodeGroup(name=consts.NODE_GROUPS.default) instance.node_groups.append(node_group) db.add(node_group) db().flush() return node_group
def get_nodes_ids(cls, instance): return [ x[0] for x in db().query(models.Node.id).filter( models.Node.cluster_id == instance.id).all() ]
def get_nodes_not_for_deletion(cls, cluster): """All clusters nodes except nodes for deletion.""" return db().query(models.Node).filter_by( cluster=cluster, pending_deletion=False).order_by(models.Node.id)
def get_vmware_attributes(cls, instance): """Get VmwareAttributes instance from DB. Now we have relation with cluster 1:1. """ return db().query(models.VmwareAttributes).filter( models.VmwareAttributes.cluster_id == instance.id).first()
def make_deploy_msgs(self, cluster, supertask, deploymsg, status): #ebs_rolelist=["gangliasrv","nagiossrv","gangliacli","nagioscli"] task_messages = [] ebs_rolelist = [ "gangliasrv", "nagiossrv", "gangliacli", "nagioscli", "management", "watcher", "tgtd", "rsyslogsrv", "rsyslogcli" ] if status == 1: ebs_rolelist = [ "rsyslogcli", "rsyslogsrv", "tgtd", "watcher", "management", "nagioscli", "gangliacli", "nagiossrv", "gangliasrv" ] #获取当前集群下所有处于已就绪的节点 nodes_to_startorstop = TaskHelper.nodes_to_startorstop(cluster) #获取所有节点的所有角色集合 nodes_roles = [] for node in nodes_to_startorstop: nodes_roles.extend(node.roles) nodes_roles = list(set(nodes_roles)) logger.info(deploymsg) for role in nodes_roles: if role in ebs_rolelist: task_deployment = supertask.create_subtask( TASK_NAMES.deployment) db().commit() newdeploymsg = copy.deepcopy(deploymsg) newdeploymsg['respond_to'] = "start_stop_resp" newdeploymsg['args']['task_uuid'] = task_deployment.uuid deployment_infos = [] for deployment_info in deploymsg['args']['deployment_info']: if deployment_info["role"] != role: newdeploymsg['args']['deployment_info'].remove( deployment_info) else: if status == 2: deployment_info[role]['action'] = "start" logger.info(u"匹配到角色{0},节点id {1},开始启动...".format( role, deployment_info["ip"])) else: deployment_info[role]['action'] = "stop" logger.info(u"匹配到角色{0},节点id {1},开始停止...".format( role, deployment_info["ip"])) deployment_infos.append(deployment_info) newdeploymsg['args']['deployment_info'] = deployment_infos task_messages.append(newdeploymsg) task_deployment = objects.Task.get_by_uid( task_deployment.id, fail_if_not_found=True, lock_for_update=True) # if failed to generate task message for orchestrator # then task is already set to error if task_deployment.status == TASK_STATUSES.error: return supertask task_deployment.cache = newdeploymsg db().commit() self.update_cluster_role_status(cluster, role, status) new_task_messages = [] logger.info(len(task_messages)) for ebsrole in ebs_rolelist: for task in task_messages: if task['args']['deployment_info'][0]['role'] == ebsrole: new_task_messages.append(task) return new_task_messages
def update_changes(cls, instance, changes): instance.changes_list = [ models.ClusterChanges(**change) for change in changes ] db().flush()
def __update_existing_interface(cls, interface_id, interface_attrs): interface = db().query(NodeNICInterface).get(interface_id) cls.__set_interface_attributes(interface, interface_attrs) db().add(interface) db().flush()
def patch_attributes(cls, instance, data): PluginManager.process_cluster_attributes(instance, data['editable']) instance.attributes.editable = dict_merge(instance.attributes.editable, data['editable']) cls.add_pending_changes(instance, "attributes") db().flush()