def test_do_not_redeploy_nodes_in_ready_status(self): self.env.create(nodes_kwargs=[ {"pending_addition": True}, {"pending_addition": True, 'roles': ['compute']}]) cluster_db = self.env.clusters[0] # Generate ips, fqdns TaskHelper.prepare_for_deployment(cluster_db.nodes) # First node with status ready # should not be readeployed self.env.nodes[0].status = 'ready' self.env.nodes[0].pending_addition = False self.db.commit() cluster_db.clear_pending_changes() supertask = self.env.launch_deployment() self.assertEquals(supertask.name, 'deploy') self.assertIn(supertask.status, ('running', 'ready')) self.assertEquals(self.env.nodes[0].status, 'ready') self.env.wait_for_nodes_status([self.env.nodes[1]], 'provisioning') self.env.wait_ready(supertask) self.env.refresh_nodes() self.assertEquals(self.env.nodes[1].status, 'ready') self.assertEquals(self.env.nodes[1].progress, 100)
def create_env(self, nodes): cluster = self.env.create( nodes_kwargs=nodes) cluster_db = self.db.query(Cluster).get(cluster['id']) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def message(cls, task, nodes_to_provisioning): logger.debug("ProvisionTask.message(task=%s)" % task.uuid) serialized_cluster = task.cluster.replaced_provisioning_info or \ provisioning_serializers.serialize( task.cluster, nodes_to_provisioning) for node in nodes_to_provisioning: if settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP: continue admin_net_id = objects.Node.get_network_manager( node ).get_admin_network_group_id() TaskHelper.prepare_syslog_dir(node, admin_net_id) return make_astute_message( 'provision', 'provision_resp', { 'task_uuid': task.uuid, 'provisioning_info': serialized_cluster } )
def create_env(self, nodes): cluster = self.env.create(cluster_kwargs={'mode': 'ha_compact'}, nodes_kwargs=nodes) cluster_db = self.db.query(Cluster).get(cluster['id']) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def check_redhat_credentials_resp(cls, **kwargs): logger.info("RPC method check_redhat_credentials_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) release_info = task.cache['args']['release_info'] release_id = release_info['release_id'] release = db().query(Release).get(release_id) if not release: logger.error( "download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = 'error' cls._update_release_state(release_id, 'error') # TODO(NAME): remove this ugly checks if 'Unknown error' in error_msg: error_msg = 'Failed to check Red Hat ' \ 'credentials' if error_msg != 'Task aborted': notifier.notify('error', error_msg) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def PUT(self, cluster_id): data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: network_configuration = self.validator.\ validate_networks_update(json.dumps(data)) NetworkConfiguration.update(cluster, data) except web.webapi.badrequest as exc: TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def launch_verify(self, cluster): try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) data["networks"] = [ n for n in data["networks"] if n.get("name") != "fuelweb_admin" ] vlan_ids = [{ 'name': n['name'], 'vlans': cluster.network_manager.generate_vlan_ids_list( data, cluster, n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) try: task = task_manager.execute(data, vlan_ids) except errors.CantRemoveOldVerificationTask: raise web.badrequest("You cannot delete running task manually") return TaskHandler.render(task)
def check_task_name_and_sanitized_data(self, pos, logger, task_name, one_parameter=False): """Test task name against known value Check sanitized data doesn't contain keys which are absent in white_list :param pos: position of call parameters inside logger.call_args_list, (negative value: -1 - last call, -2 - pre-last call, etc.) :param logger: mock object for logger method :param task_name: expected task name :param one_parameter: whether given call must go with one parameter """ log_args = logger.call_args_list task = log_args[pos][0][0] self.assertEqual(task.name, task_name) if len(log_args[pos][0]) == 2: log_record = log_args[pos][0][1] if task_name in task_output_white_list: self.check_keys_included( task_output_white_list[task_name], TaskHelper.sanitize_task_output(task.cache, log_record)) else: self.assertIsNone( TaskHelper.sanitize_task_output(task.cache, log_record)) else: self.assertTrue(one_parameter)
def _create_cluster_for_vlan_splinters(self, segment_type='gre'): meta = { 'interfaces': [ {'name': 'eth0', 'mac': self.env._generate_random_mac()}, {'name': 'eth1', 'mac': self.env._generate_random_mac()}, {'name': 'eth2', 'mac': self.env._generate_random_mac()}, {'name': 'eth3', 'mac': self.env._generate_random_mac()}, {'name': 'eth4', 'mac': self.env._generate_random_mac()} ] } cluster = self.env.create( cluster_kwargs={ 'mode': 'multinode', 'net_provider': 'neutron', 'net_segment_type': segment_type }, nodes_kwargs=[ {'roles': ['controller'], 'pending_addition': True, 'meta': meta} ] ) cluster_db = self.db.query(Cluster).get(cluster['id']) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def test_deploy_grow_controllers(self): cluster = self.create_env( nodes_kwargs=[ {"roles": ["controller"]}, {"roles": ["controller"], "pending_addition": True}, {"roles": ["controller"], "pending_addition": True}, ] ) # We have to build 2 new controllers n_nodes = TaskHelper.nodes_to_provision(cluster) self.assertEqual(len(n_nodes), 2) # All controllers must re-deploy (run puppet) r_nodes = TaskHelper.nodes_to_deploy(cluster) self.assertEqual(len(r_nodes), 3) supertask = self.env.launch_deployment() self.assertEqual(supertask.name, "deploy") self.env.wait_ready(supertask) self.assertEqual(supertask.status, "ready") controllers = self.filter_by_role(cluster.nodes, "controller") self.assertEqual(len(controllers), 3)
def check_redhat_credentials_resp(cls, **kwargs): logger.info("RPC method check_redhat_credentials_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get("task_uuid") error_msg = kwargs.get("error") status = kwargs.get("status") progress = kwargs.get("progress") task = db().query(Task).filter_by(uuid=task_uuid).first() if not task: logger.error( "check_redhat_credentials_resp: task \ with UUID %s not found!", task_uuid, ) return release_info = task.cache["args"]["release_info"] release_id = release_info["release_id"] release = db().query(Release).get(release_id) if not release: logger.error("download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = "error" cls._update_release_state(release_id, "error") # TODO(NAME): remove this ugly checks if "Unknown error" in error_msg: error_msg = "Failed to check Red Hat " "credentials" if error_msg != "Task aborted": notifier.notify("error", error_msg) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def download_release_resp(cls, **kwargs): logger.info("RPC method download_release_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get("task_uuid") error_msg = kwargs.get("error") status = kwargs.get("status") progress = kwargs.get("progress") task = db().query(Task).filter_by(uuid=task_uuid).first() if not task: logger.error("download_release_resp: task" " with UUID %s not found", task_uuid) return release_info = task.cache["args"]["release_info"] release_id = release_info["release_id"] release = db().query(Release).get(release_id) if not release: logger.error("download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = "error" error_msg = "{0} download and preparation " "has failed.".format(release.name) cls._download_release_error(release_id, error_msg) elif progress == 100 and status == "ready": cls._download_release_completed(release_id) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def PUT(self, cluster_id): data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': if 'networks' in data: network_configuration = self.validator.\ validate_networks_update(json.dumps(data)) try: NetworkConfiguration.update(cluster, data) except Exception as exc: err = str(exc) TaskHelper.update_task_status(task.uuid, status="error", progress=100, msg=err) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': self.db.rollback() else: self.db.commit() raise web.accepted(data=data)
def message(cls, task, nodes_to_provisioning): logger.debug("ProvisionTask.message(task=%s)" % task.uuid) task = objects.Task.get_by_uid( task.id, fail_if_not_found=True, lock_for_update=True ) objects.NodeCollection.lock_nodes(nodes_to_provisioning) serialized_cluster = provisioning_serializers.serialize( task.cluster, nodes_to_provisioning) for node in nodes_to_provisioning: if settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP: continue admin_net_id = objects.Node.get_network_manager( node ).get_admin_network_group_id(node.id) TaskHelper.prepare_syslog_dir(node, admin_net_id) rpc_message = make_astute_message( task, 'provision', 'provision_resp', { 'provisioning_info': serialized_cluster } ) db().commit() return rpc_message
def test_deploy_grow_controllers(self): cluster = self.create_env(nodes_kwargs=[{ 'roles': ['controller'] }, { 'roles': ['controller'], 'pending_addition': True }, { 'roles': ['controller'], 'pending_addition': True }]) # We have to build 2 new controllers n_nodes = TaskHelper.nodes_to_provision(cluster) self.assertEqual(len(n_nodes), 2) # All controllers must re-deploy (run puppet) r_nodes = TaskHelper.nodes_to_deploy(cluster) self.assertEqual(len(r_nodes), 3) supertask = self.env.launch_deployment() self.assertEqual(supertask.name, 'deploy') self.env.wait_ready(supertask) self.assertEqual(supertask.status, 'ready') controllers = self.filter_by_role(cluster.nodes, 'controller') self.assertEqual(len(controllers), 3)
def create_env(self, mode, network_manager='FlatDHCPManager'): node_args = [{ 'roles': ['controller', 'cinder'], 'pending_addition': True }, { 'roles': ['compute', 'cinder'], 'pending_addition': True }, { 'roles': ['compute'], 'pending_addition': True }, { 'roles': ['mongo'], 'pending_addition': True }, { 'roles': [], 'pending_roles': ['cinder'], 'pending_addition': True }] cluster = self.env.create(cluster_kwargs={ 'mode': mode, 'net_manager': network_manager }, nodes_kwargs=node_args) cluster_db = self.db.query(Cluster).get(cluster['id']) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def provision_resp(cls, **kwargs): logger.info( "RPC method provision_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') message = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') nodes = kwargs.get('nodes', []) task = get_task_by_uuid(task_uuid) for node in nodes: uid = node.get('uid') node_db = db().query(Node).get(uid) if not node_db: logger.warn('Task with uid "{0}" not found'.format(uid)) continue if node.get('status') == 'error': node_db.status = 'error' node_db.progress = 100 node_db.error_type = 'provision' node_db.error_msg = node.get('error_msg', 'Unknown error') else: node_db.status = node.get('status') node_db.progress = node.get('progress') TaskHelper.update_task_status(task.uuid, status, progress, message)
def create_env(self, mode): cluster = self.env.create(cluster_kwargs={ 'mode': mode, 'net_provider': 'neutron', 'net_segment_type': 'vlan' }, nodes_kwargs=[{ 'roles': ['controller'], 'pending_addition': True }, { 'roles': ['controller'], 'pending_addition': True }, { 'roles': ['controller', 'cinder'], 'pending_addition': True }, { 'roles': ['compute', 'cinder'], 'pending_addition': True }, { 'roles': ['compute'], 'pending_addition': True }, { 'roles': ['cinder'], 'pending_addition': True }]) cluster_db = self.db.query(Cluster).get(cluster['id']) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def PUT(self, cluster_id): """ :IMPORTANT: this method should be rewritten to be more RESTful :returns: JSONized Task object. :http: * 202 (network checking task failed) * 200 (network verification task started) * 404 (cluster not found in db) """ cluster = self.get_object_or_404(Cluster, cluster_id) try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) vlan_ids = [{ 'name': n['name'], 'vlans': NetworkGroup.generate_vlan_ids_list(n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data, vlan_ids) return TaskHandler.render(task)
def PUT(self, cluster_id): data = json.loads(web.data()) if data.get("networks"): data["networks"] = [ n for n in data["networks"] if n.get("name") != "fuelweb_admin" ] cluster = self.get_object_or_404(Cluster, cluster_id) check_if_network_configuration_locked(cluster) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: self.validator.validate_networks_update(json.dumps(data)) if 'neutron_parameters' in data: self.validator.validate_neutron_params(json.dumps(data)) NeutronNetworkConfiguration.update(cluster, data) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def PUT(self, cluster_id): """:IMPORTANT: this method should be rewritten to be more RESTful :returns: JSONized Task object. :http: * 202 (network checking task failed) * 200 (network verification task started) * 404 (cluster not found in db) """ cluster = self.get_object_or_404(Cluster, cluster_id) try: data = self.validator.validate_networks_update(web.data()) except web.webapi.badrequest as exc: task = Task(name='check_networks', cluster=cluster) db().add(task) db().commit() TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) json_task = build_json_response(TaskHandler.render(task)) raise web.accepted(data=json_task) vlan_ids = [{ 'name': n['name'], 'vlans': NetworkGroup.generate_vlan_ids_list(n) } for n in data['networks']] task_manager = VerifyNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data, vlan_ids) return TaskHandler.render(task)
def check_dhcp_resp(cls, **kwargs): """Receiver method for check_dhcp task For example of kwargs check FakeCheckingDhcpThread """ logger.info("RPC method check_dhcp_resp received: %s", json.dumps(kwargs)) messages = [] result = collections.defaultdict(list) message_template = (u"Dhcp server on {server_id} - {mac}." "Discovered from node {yiaddr} on {iface}.") task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes', []) error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') macs = [item['addr'] for item in cls._get_master_macs()] logger.debug('Mac addr on master node %s', macs) for node in nodes: if node['status'] == 'ready': for row in node.get('data', []): if row['mac'] not in macs: messages.append(message_template.format(**row)) result[node['uid']].append(row) elif node['status'] == 'error': messages.append( node.get('error_msg', ('Dhcp check method failed.' ' Check logs for details.'))) status = status if not messages else "error" error_msg = '\n'.join(messages) if messages else error_msg TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def PUT(self, cluster_id): """:returns: JSONized Task object. :http: * 202 (network checking task created) * 404 (cluster not found in db) """ data = json.loads(web.data()) cluster = self.get_object_or_404(Cluster, cluster_id) task_manager = CheckNetworksTaskManager(cluster_id=cluster.id) task = task_manager.execute(data) if task.status != 'error': try: if 'networks' in data: self.validator.validate_networks_update(json.dumps(data)) NetworkConfiguration.update(cluster, data) except web.webapi.badrequest as exc: TaskHelper.set_error(task.uuid, exc.data) logger.error(traceback.format_exc()) except Exception as exc: TaskHelper.set_error(task.uuid, exc) logger.error(traceback.format_exc()) data = build_json_response(TaskHandler.render(task)) if task.status == 'error': db().rollback() else: db().commit() raise web.accepted(data=data)
def download_release_resp(cls, **kwargs): logger.info("RPC method download_release_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) release_info = task.cache['args']['release_info'] release_id = release_info['release_id'] release = db().query(Release).get(release_id) if not release: logger.error( "download_release_resp: Release" " with ID %s not found", release_id) return if error_msg: status = 'error' error_msg = "{0} download and preparation " \ "has failed.".format(release.name) cls._download_release_error(release_id, error_msg) elif progress == 100 and status == 'ready': cls._download_release_completed(release_id) result = {"release_info": {"release_id": release_id}} TaskHelper.update_task_status(task_uuid, status, progress, error_msg, result)
def test_do_not_redeploy_nodes_in_ready_status(self): self.env.create(nodes_kwargs=[ {"pending_addition": True}, {"pending_addition": True, 'roles': ['compute']}]) cluster_db = self.env.clusters[0] # Generate ips, fqdns TaskHelper.prepare_for_deployment(cluster_db.nodes) # First node with status ready # should not be readeployed self.env.nodes[0].status = 'ready' self.env.nodes[0].pending_addition = False self.db.commit() objects.Cluster.clear_pending_changes(cluster_db) supertask = self.env.launch_deployment() self.assertEquals(supertask.name, 'deploy') self.assertIn(supertask.status, ('running', 'ready')) self.assertEquals(self.env.nodes[0].status, 'ready') self.env.wait_for_nodes_status([self.env.nodes[1]], 'provisioning') self.env.wait_ready(supertask) self.env.refresh_nodes() self.assertEquals(self.env.nodes[1].status, 'ready') self.assertEquals(self.env.nodes[1].progress, 100)
def test_prepare_action_log_kwargs_with_web_ctx(self): self.env.create(nodes_kwargs=[ { 'roles': ['compute'], 'provisioning': True }, ]) cluster = self.env.clusters[0] task = Task(name='provision', cluster_id=cluster.id) self.db.add(task) self.db.flush() actor_id = 'xx' with mock.patch.dict(web.ctx, {'env': { 'fuel.action.actor_id': actor_id }}): kwargs = TaskHelper.prepare_action_log_kwargs(task) self.assertIn('actor_id', kwargs) self.assertEqual(actor_id, kwargs['actor_id']) with mock.patch.dict(web.ctx, {'env': {}}): kwargs = TaskHelper.prepare_action_log_kwargs(task) self.assertIn('actor_id', kwargs) self.assertIsNone(kwargs['actor_id'])
def message(cls, task): logger.debug("ProvisionTask.message(task=%s)" % task.uuid) nodes = TaskHelper.nodes_to_provision(task.cluster) USE_FAKE = settings.FAKE_TASKS or settings.FAKE_TASKS_AMQP # We need to assign admin ips # and only after that prepare syslog # directories task.cluster.prepare_for_provisioning() for node in nodes: if USE_FAKE: continue if node.offline: raise errors.NodeOffline( u'Node "%s" is offline.' " Remove it from environment and try again." % node.full_name ) TaskHelper.prepare_syslog_dir(node) serialized_cluster = task.cluster.replaced_provisioning_info or provisioning_serializers.serialize(task.cluster) message = { "method": "provision", "respond_to": "provision_resp", "args": {"task_uuid": task.uuid, "provisioning_info": serialized_cluster}, } return message
def test_update_cluster_to_error_if_deploy_task_failed(self): task = Task(name='deploy', cluster=self.cluster, status='error') self.db.add(task) self.db.commit() TaskHelper.update_cluster_status(task.uuid) self.assertEquals(self.cluster.status, 'error')
def test_update_cluster_to_operational(self): task = Task(name='deploy', cluster=self.cluster, status='ready') self.db.add(task) self.db.commit() TaskHelper.update_cluster_status(task.uuid) self.assertEquals(self.cluster.status, 'operational')
def stop_deployment_resp(cls, **kwargs): logger.info("RPC method stop_deployment_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes', []) ia_nodes = kwargs.get('inaccessible_nodes', []) message = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) stop_tasks = db().query(Task).filter_by( cluster_id=task.cluster_id, ).filter( Task.name.in_(["deploy", "deployment", "provision"])).all() if not stop_tasks: logger.warning( "stop_deployment_resp: deployment tasks \ not found for environment '%s'!", task.cluster_id) if status == "ready": task.cluster.status = "stopped" if stop_tasks: map(db().delete, stop_tasks) db().commit() update_nodes = db().query(Node).filter( Node.id.in_( [n["uid"] for n in itertools.chain(nodes, ia_nodes)]), Node.cluster_id == task.cluster_id).yield_per(100) update_nodes.update( { "online": False, "status": "discover", "pending_addition": True }, synchronize_session='fetch') for n in update_nodes: n.roles, n.pending_roles = n.pending_roles, n.roles db().commit() if ia_nodes: cls._notify_inaccessible(task.cluster_id, [n["uid"] for n in ia_nodes], u"deployment stopping") message = (u"Deployment of environment '{0}' " u"was successfully stopped".format(task.cluster.name or task.cluster_id)) notifier.notify("done", message, task.cluster_id) TaskHelper.update_task_status(task_uuid, status, progress, message)
def _success_action(cls, task, status, progress): network_manager = NetworkManager() # check if all nodes are ready if any(map(lambda n: n.status == 'error', task.cluster.nodes)): cls._error_action(task, 'error', 100) return if task.cluster.mode in ('singlenode', 'multinode'): # determining horizon url - it's an IP # of a first cluster controller controller = db().query(Node).filter_by( cluster_id=task.cluster_id).filter( Node.role_list.any(name='controller')).first() if controller: logger.debug( u"Controller is found, node_id=%s, " "getting it's IP addresses", controller.id) public_net = filter( lambda n: n['name'] == 'public' and 'ip' in n, network_manager.get_node_networks(controller.id)) if public_net: horizon_ip = public_net[0]['ip'].split('/')[0] message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at " "http://{1}/ or via internal network at http://{2}/" ).format(task.cluster.name, horizon_ip, controller.ip) else: message = ( u"Deployment of environment '{0}' is done").format( task.cluster.name) logger.warning(u"Public ip for controller node " "not found in '{0}'".format( task.cluster.name)) else: message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Controller node not found in '{0}'".format( task.cluster.name)) elif task.cluster.is_ha_mode: # determining horizon url in HA mode - it's vip # from a public network saved in task cache try: netmanager = NetworkManager() message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at {1}").format( task.cluster.name, netmanager.get_horizon_url(task.cluster.id)) except Exception as exc: logger.error(": ".join([str(exc), traceback.format_exc()])) message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Cannot find virtual IP for '{0}'".format( task.cluster.name)) notifier.notify("done", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def _update_parent_instance(cls, instance): subtasks = instance.subtasks if len(subtasks): data = dict() if all( map(lambda s: s.status == consts.TASK_STATUSES.ready, subtasks)): data['status'] = consts.TASK_STATUSES.ready data['progress'] = 100 data['message'] = u'\n'.join( map(lambda s: s.message, filter(lambda s: s.message is not None, subtasks))) cls.update(instance, data) TaskHelper.update_action_log(instance) elif any( map(lambda s: s.status == consts.TASK_STATUSES.error, subtasks)): for subtask in subtasks: if subtask.status not in (consts.TASK_STATUSES.error, consts.TASK_STATUSES.ready): subtask.status = consts.TASK_STATUSES.error subtask.progress = 100 subtask.message = "Task aborted" data['status'] = consts.TASK_STATUSES.error data['progress'] = 100 data['message'] = u'\n'.join(list(set(map( lambda s: (s.message or ""), filter( lambda s: ( s.status == consts.TASK_STATUSES.error and not # TODO(aroma): make this check less ugly s.message == "Task aborted" ), subtasks))))) cls.update(instance, data) TaskHelper.update_action_log(instance) elif instance.status == consts.TASK_STATUSES.pending and any( map( lambda s: s.status in (consts.TASK_STATUSES.running, consts.TASK_STATUSES. ready), subtasks)): instance.status = consts.TASK_STATUSES.running else: subtasks_with_progress = filter( lambda s: s.progress is not None, subtasks) if subtasks_with_progress: instance.progress = \ TaskHelper.calculate_parent_task_progress( subtasks_with_progress ) else: instance.progress = 0
def _error_action(cls, task, status, progress, message=None): if message: message = u"Deployment has failed. {0}".format(message) else: message = u"Deployment has failed. Check these nodes:\n{0}".format( cls._generate_error_message(task, error_types=("deploy", "provision"), names_only=True) ) notifier.notify("error", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def remove_nodes_resp(cls, **kwargs): logger.info("RPC method remove_nodes_resp received: %s" % json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes') or [] error_nodes = kwargs.get('error_nodes') or [] inaccessible_nodes = kwargs.get('inaccessible_nodes') or [] error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') for node in nodes: node_db = db().query(Node).get(node['uid']) if not node_db: logger.error(u"Failed to delete node '%s': node doesn't exist", str(node)) break db().delete(node_db) for node in inaccessible_nodes: # Nodes which not answered by rpc just removed from db node_db = db().query(Node).get(node['uid']) if node_db: logger.warn(u'Node %s not answered by RPC, removing from db', node_db.human_readable_name) db().delete(node_db) for node in error_nodes: node_db = db().query(Node).get(node['uid']) if not node_db: logger.error( u"Failed to delete node '%s' marked as error from Naily:" " node doesn't exist", str(node)) break node_db.pending_deletion = False node_db.status = 'error' db().add(node_db) node['name'] = node_db.name db().commit() success_msg = u"No nodes were removed" err_msg = u"No errors occurred" if nodes: success_msg = u"Successfully removed {0} node(s)".format( len(nodes)) notifier.notify("done", success_msg) if error_nodes: err_msg = u"Failed to remove {0} node(s): {1}".format( len(error_nodes), ', '.join([ n.get('name') or "ID: {0}".format(n['uid']) for n in error_nodes ])) notifier.notify("error", err_msg) if not error_msg: error_msg = ". ".join([success_msg, err_msg]) TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
def prepare_for_provisioning(self): from nailgun.network.manager import NetworkManager from nailgun.task.helpers import TaskHelper netmanager = NetworkManager() nodes = TaskHelper.nodes_to_provision(self) TaskHelper.update_slave_nodes_fqdn(nodes) for node in nodes: netmanager.assign_admin_ips(node.id, len(node.meta.get("interfaces", [])))
def serialize_node(cls, cluster_attrs, node): """Serialize a single node.""" serialized_node = { 'uid': node.uid, 'power_address': node.ip, 'name': TaskHelper.make_slave_name(node.id), # right now it duplicates to avoid possible issues 'slave_name': TaskHelper.make_slave_name(node.id), 'hostname': node.fqdn, 'power_pass': cls.get_ssh_key_path(node), 'profile': cluster_attrs['cobbler']['profile'], 'power_type': 'ssh', 'power_user': '******', 'name_servers': '\"%s\"' % settings.DNS_SERVERS, 'name_servers_search': '\"%s\"' % settings.DNS_SEARCH, 'netboot_enabled': '1', # For provisioning phase 'kernel_options': { 'netcfg/choose_interface': node.admin_interface.mac, 'udevrules': cls.interfaces_mapping_for_udev(node) }, 'ks_meta': { 'pm_data': { 'ks_spaces': node.attributes.volumes, 'kernel_params': node.kernel_params }, 'fuel_version': node.cluster.fuel_version, 'puppet_auto_setup': 1, 'puppet_master': settings.PUPPET_MASTER_HOST, 'puppet_enable': 0, 'mco_auto_setup': 1, 'install_log_2_syslog': 1, 'mco_pskey': settings.MCO_PSKEY, 'mco_vhost': settings.MCO_VHOST, 'mco_host': settings.MCO_HOST, 'mco_user': settings.MCO_USER, 'mco_password': settings.MCO_PASSWORD, 'mco_connector': settings.MCO_CONNECTOR, 'mco_enable': 1, 'auth_key': "\"%s\"" % cluster_attrs.get('auth_key', '') } } orchestrator_data = objects.Release.get_orchestrator_data_dict( node.cluster.release) if orchestrator_data: serialized_node['ks_meta']['repo_metadata'] = \ orchestrator_data['repo_metadata'] vlan_splinters = cluster_attrs.get('vlan_splinters', None) if vlan_splinters == 'kernel_lt': serialized_node['ks_meta']['kernel_lt'] = 1 serialized_node.update(cls.serialize_interfaces(node)) return serialized_node
def _update_parent_instance(cls, instance): subtasks = instance.subtasks if len(subtasks): data = dict() if all(map(lambda s: s.status == consts.TASK_STATUSES.ready, subtasks)): data['status'] = consts.TASK_STATUSES.ready data['progress'] = 100 data['message'] = u'\n'.join(map( lambda s: s.message, filter( lambda s: s.message is not None, subtasks))) cls.update(instance, data) TaskHelper.update_action_log(instance) elif any(map(lambda s: s.status == consts.TASK_STATUSES.error, subtasks)): for subtask in subtasks: if subtask.status not in (consts.TASK_STATUSES.error, consts.TASK_STATUSES.ready): subtask.status = consts.TASK_STATUSES.error subtask.progress = 100 subtask.message = "Task aborted" data['status'] = consts.TASK_STATUSES.error data['progress'] = 100 data['message'] = u'\n'.join(list(set(map( lambda s: (s.message or ""), filter( lambda s: ( s.status == consts.TASK_STATUSES.error and not # TODO(aroma): make this check less ugly s.message == "Task aborted" ), subtasks))))) cls.update(instance, data) TaskHelper.update_action_log(instance) elif instance.status == consts.TASK_STATUSES.pending and any( map(lambda s: s.status in (consts.TASK_STATUSES.running, consts.TASK_STATUSES.ready), subtasks)): instance.status = consts.TASK_STATUSES.running else: subtasks_with_progress = filter( lambda s: s.progress is not None, subtasks ) if subtasks_with_progress: instance.progress = \ TaskHelper.calculate_parent_task_progress( subtasks_with_progress ) else: instance.progress = 0
def prepare_for_provisioning(self): from nailgun.network.manager import NetworkManager from nailgun.task.helpers import TaskHelper netmanager = NetworkManager() nodes = TaskHelper.nodes_to_provision(self) TaskHelper.update_slave_nodes_fqdn(nodes) for node in nodes: netmanager.assign_admin_ips(node.id, len(node.meta.get('interfaces', [])))
def test_repo_and_puppet_data(self): release_id = self.env.create_release().id orch_data = { "repo_metadata": { "nailgun": "http://10.20.0.2:8080/centos-5.0/centos/fuelweb/x86_64/" }, "puppet_modules_source": "rsync://10.20.0.2/puppet/release/5.0/modules", "puppet_manifests_source": "rsync://10.20.0.2/puppet/release/5.0/manifests" } resp = self.app.put( reverse('ReleaseHandler', kwargs={'obj_id': release_id}), params=json.dumps( { "orchestrator_data": orch_data } ), headers=self.default_headers, expect_errors=True ) self.assertEquals(200, resp.status_code) cluster_id = self.env.create( cluster_kwargs={ 'release_id': release_id }, nodes_kwargs=[ {'roles': ['controller'], 'pending_addition': True} ] )["id"] cluster = self.db.query(Cluster).get(cluster_id) TaskHelper.prepare_for_deployment(cluster.nodes) facts = self.serializer.serialize(cluster, cluster.nodes) self.assertEquals(1, len(facts)) fact = facts[0] self.assertEquals( fact['repo_metadata'], { 'nailgun': 'http://10.20.0.2:8080' '/centos-5.0/centos/fuelweb/x86_64/' } ) self.assertEquals( fact['puppet_modules_source'], 'rsync://10.20.0.2/puppet/release/5.0/modules' ) self.assertEquals( fact['puppet_manifests_source'], 'rsync://10.20.0.2/puppet/release/5.0/manifests' )
def reset_environment_resp(cls, **kwargs): logger.info("RPC method reset_environment_resp received: %s", json.dumps(kwargs)) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes', []) ia_nodes = kwargs.get('inaccessible_nodes', []) message = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') task = TaskHelper.get_task_by_uuid(task_uuid) if status == "ready": # restoring pending changes task.cluster.status = "new" objects.Cluster.add_pending_changes(task.cluster, "attributes") objects.Cluster.add_pending_changes(task.cluster, "networks") for node in task.cluster.nodes: objects.Cluster.add_pending_changes(task.cluster, "disks", node_id=node.id) update_nodes = db().query(Node).filter( Node.id.in_( [n["uid"] for n in itertools.chain(nodes, ia_nodes)]), Node.cluster_id == task.cluster_id).yield_per(100) update_nodes.update( { "online": False, "status": "discover", "pending_addition": True, "pending_deletion": False, }, synchronize_session='fetch') for n in update_nodes: n.roles, n.pending_roles = n.pending_roles, n.roles db().commit() if ia_nodes: cls._notify_inaccessible(task.cluster_id, [n["uid"] for n in ia_nodes], u"environment resetting") message = (u"Environment '{0}' " u"was successfully reset".format(task.cluster.name or task.cluster_id)) notifier.notify("done", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def _update_cluster_data(cls, instance): cluster = instance.cluster if instance.name == "deploy": if instance.status == "ready": # If for some reasosns orchestrator # didn't send ready status for node # we should set it explicitly for n in cluster.nodes: if n.status == "deploying": n.status = "ready" n.progress = 100 cls.__update_cluster_status(cluster, "operational") Cluster.clear_pending_changes(cluster) elif instance.status == "error" and not TaskHelper.before_deployment_error(instance): # We don't want to set cluster status to # error because we don't want to lock # settings if cluster wasn't delpoyed cls.__update_cluster_status(cluster, "error") elif instance.name == "deployment" and instance.status == "error": cls.__update_cluster_status(cluster, "error") q_nodes_to_error = TaskHelper.get_nodes_to_deployment_error(cluster) cls.__update_nodes_to_error(q_nodes_to_error, error_type="deploy") elif instance.name == "provision" and instance.status == "error": cls.__update_cluster_status(cluster, "error") q_nodes_to_error = TaskHelper.get_nodes_to_provisioning_error(cluster) cls.__update_nodes_to_error(q_nodes_to_error, error_type="provision") elif instance.name == "stop_deployment": if instance.status == "error": cls.__update_cluster_status(cluster, "error") else: cls.__update_cluster_status(cluster, "stopped") elif instance.name == consts.TASK_NAMES.update: if instance.status == consts.TASK_STATUSES.error: cls.__update_cluster_status(cluster, consts.CLUSTER_STATUSES.update_error) q_nodes_to_error = TaskHelper.get_nodes_to_deployment_error(cluster) cls.__update_nodes_to_error(q_nodes_to_error, error_type=consts.NODE_ERRORS.deploy) elif instance.status == consts.TASK_STATUSES.ready: cls.__update_cluster_status(cluster, consts.CLUSTER_STATUSES.operational) cluster.release_id = cluster.pending_release_id cluster.pending_release_id = None
def download_release_resp(cls, **kwargs): logger.info("RPC method download_release_resp received: %s" % kwargs) task_uuid = kwargs.get('task_uuid') error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') release_info = kwargs.get('release_info') if progress == 100: cls._download_release_completed(release_info['release_id']) TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
def serialize(cluster, nodes): """Serialization depends on deployment mode """ TaskHelper.prepare_for_deployment(cluster.nodes) if cluster.mode == 'multinode': serializer = DeploymentMultinodeSerializer elif cluster.is_ha_mode: serializer = DeploymentHASerializer return serializer.serialize(cluster, nodes)
def serialize_node(cls, cluster_attrs, node): """Serialize a single node.""" serialized_node = { 'uid': node.uid, 'power_address': node.ip, 'name': TaskHelper.make_slave_name(node.id), # right now it duplicates to avoid possible issues 'slave_name': TaskHelper.make_slave_name(node.id), 'hostname': node.fqdn, 'power_pass': cls.get_ssh_key_path(node), 'profile': cluster_attrs['cobbler']['profile'], 'power_type': 'ssh', 'power_user': '******', 'name_servers': '\"%s\"' % settings.DNS_SERVERS, 'name_servers_search': '\"%s\"' % settings.DNS_SEARCH, 'netboot_enabled': '1', # For provisioning phase 'kernel_options': { 'netcfg/choose_interface': node.admin_interface.mac, 'udevrules': cls.interfaces_mapping_for_udev(node)}, 'ks_meta': { 'pm_data': { 'ks_spaces': node.attributes.volumes, 'kernel_params': node.kernel_params}, 'fuel_version': node.cluster.fuel_version, 'puppet_auto_setup': 1, 'puppet_master': settings.PUPPET_MASTER_HOST, 'puppet_enable': 0, 'mco_auto_setup': 1, 'install_log_2_syslog': 1, 'mco_pskey': settings.MCO_PSKEY, 'mco_vhost': settings.MCO_VHOST, 'mco_host': settings.MCO_HOST, 'mco_user': settings.MCO_USER, 'mco_password': settings.MCO_PASSWORD, 'mco_connector': settings.MCO_CONNECTOR, 'mco_enable': 1, 'auth_key': "\"%s\"" % cluster_attrs.get('auth_key', '')}} orchestrator_data = objects.Release.get_orchestrator_data_dict( node.cluster.release) if orchestrator_data: serialized_node['ks_meta']['repo_metadata'] = \ orchestrator_data['repo_metadata'] vlan_splinters = cluster_attrs.get('vlan_splinters', None) if vlan_splinters == 'kernel_lt': serialized_node['ks_meta']['kernel_lt'] = 1 serialized_node.update(cls.serialize_interfaces(node)) return serialized_node
def _error_action(cls, task, status, progress, message=None): if message: message = u"Deployment has failed. {0}".format(message) else: message = u"Deployment has failed. Check these nodes:\n{0}".format( cls._generate_error_message(task, error_types=('deploy', 'provision'), names_only=True)) notifier.notify("error", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def create_env(self, mode, network_manager="FlatDHCPManager"): node_args = [ {"roles": ["controller", "cinder"], "pending_addition": True}, {"roles": ["compute", "cinder"], "pending_addition": True}, {"roles": ["compute"], "pending_addition": True}, {"roles": [], "pending_roles": ["cinder"], "pending_addition": True}, ] cluster = self.env.create(cluster_kwargs={"mode": mode, "net_manager": network_manager}, nodes_kwargs=node_args) cluster_db = self.db.query(Cluster).get(cluster["id"]) TaskHelper.prepare_for_deployment(cluster_db.nodes) return cluster_db
def test_update_nodes_to_error_if_provision_task_failed(self): self.cluster.nodes[0].status = 'provisioning' self.cluster.nodes[0].progress = 12 task = Task(name='provision', cluster=self.cluster, status='error') self.db.add(task) self.db.commit() TaskHelper.update_cluster_status(task.uuid) self.assertEquals(self.cluster.status, 'error') self.node_should_be_error_with_type(self.cluster.nodes[0], 'provision') self.nodes_should_not_be_error(self.cluster.nodes[1:])
def test_serialize_node(self): node = self.env.create_node(api=True, cluster_id=self.cluster.id, pending_addition=True) TaskHelper.prepare_for_deployment(self.cluster.nodes) node_db = self.db.query(Node).get(node["id"]) serialized_data = self.serializer.serialize_node(node_db, "controller") self.assertEquals(serialized_data["role"], "controller") self.assertEquals(serialized_data["uid"], str(node_db.id)) self.assertEquals(serialized_data["status"], node_db.status) self.assertEquals(serialized_data["online"], node_db.online) self.assertEquals(serialized_data["fqdn"], "node-%d.%s" % (node_db.id, settings.DNS_DOMAIN))
def remove_nodes_resp(cls, **kwargs): logger.info("RPC method remove_nodes_resp received: %s" % kwargs) task_uuid = kwargs.get('task_uuid') nodes = kwargs.get('nodes') or [] error_nodes = kwargs.get('error_nodes') or [] error_msg = kwargs.get('error') status = kwargs.get('status') progress = kwargs.get('progress') for node in nodes: node_db = cls.db.query(Node).get(node['uid']) if not node_db: logger.error( u"Failed to delete node '%s': node doesn't exist", str(node) ) break cls.db.delete(node_db) for node in error_nodes: node_db = cls.db.query(Node).get(node['uid']) if not node_db: logger.error( u"Failed to delete node '%s' marked as error from Naily:" " node doesn't exist", str(node) ) break node_db.pending_deletion = False node_db.status = 'error' cls.db.add(node_db) node['name'] = node_db.name cls.db.commit() success_msg = u"No nodes were removed" err_msg = u"No errors occurred" if nodes: success_msg = u"Successfully removed {0} node(s)".format( len(nodes) ) notifier.notify("done", success_msg) if error_nodes: err_msg = u"Failed to remove {0} node(s): {1}".format( len(error_nodes), ', '.join( [n.get('name') or "ID: {0}".format(n['uid']) for n in error_nodes]) ) notifier.notify("error", err_msg) if not error_msg: error_msg = ". ".join([success_msg, err_msg]) TaskHelper.update_task_status(task_uuid, status, progress, error_msg)
def _success_action(cls, task, status, progress): network_manager = NetworkManager() # check if all nodes are ready if any(map(lambda n: n.status == "error", task.cluster.nodes)): cls._error_action(task, "error", 100) return if task.cluster.mode in ("singlenode", "multinode"): # determining horizon url - it's an IP # of a first cluster controller controller = ( db() .query(Node) .filter_by(cluster_id=task.cluster_id) .filter(Node.role_list.any(name="controller")) .first() ) if controller: logger.debug(u"Controller is found, node_id=%s, " "getting it's IP addresses", controller.id) public_net = filter( lambda n: n["name"] == "public" and "ip" in n, network_manager.get_node_networks(controller.id) ) if public_net: horizon_ip = public_net[0]["ip"].split("/")[0] message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at " "http://{1}/ or via internal network at http://{2}/" ).format(task.cluster.name, horizon_ip, controller.ip) else: message = (u"Deployment of environment '{0}' is done").format(task.cluster.name) logger.warning(u"Public ip for controller node " "not found in '{0}'".format(task.cluster.name)) else: message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Controller node not found in '{0}'".format(task.cluster.name)) elif task.cluster.mode == "ha": # determining horizon url in HA mode - it's vip # from a public network saved in task cache args = task.cache.get("args") try: vip = args["attributes"]["public_vip"] message = ( u"Deployment of environment '{0}' is done. " "Access the OpenStack dashboard (Horizon) at http://{1}/" ).format(task.cluster.name, vip) except Exception as exc: logger.error(": ".join([str(exc), traceback.format_exc()])) message = (u"Deployment of environment" " '{0}' is done").format(task.cluster.name) logger.warning(u"Cannot find virtual IP for '{0}'".format(task.cluster.name)) notifier.notify("done", message, task.cluster_id) TaskHelper.update_task_status(task.uuid, status, progress, message)
def test_serialize_node(self): node = self.env.create_node( api=True, cluster_id=self.cluster.id, pending_addition=True) TaskHelper.prepare_for_deployment(self.cluster.nodes) node_db = self.db.query(Node).get(node['id']) serialized_data = self.serializer.serialize_node(node_db, 'controller') self.assertEquals(serialized_data['role'], 'controller') self.assertEquals(serialized_data['uid'], str(node_db.id)) self.assertEquals(serialized_data['status'], node_db.status) self.assertEquals(serialized_data['online'], node_db.online) self.assertEquals(serialized_data['fqdn'], 'node-%d.%s' % (node_db.id, settings.DNS_DOMAIN))
def prepare_for_deployment(self): from nailgun.network.manager import NetworkManager from nailgun.task.helpers import TaskHelper nodes = set(TaskHelper.nodes_to_deploy(self) + TaskHelper.nodes_in_provisioning(self)) TaskHelper.update_slave_nodes_fqdn(nodes) nodes_ids = sorted([n.id for n in nodes]) netmanager = NetworkManager() if nodes_ids: netmanager.assign_ips(nodes_ids, 'management') netmanager.assign_ips(nodes_ids, 'public') netmanager.assign_ips(nodes_ids, 'storage')