def check_node_status(cluster_id, node_number, node_id): """Check Node Status factory function This factory function check cluster status flow on each node of a cluster. :param cluster_id: Unique ID for the cluster that the node is part of. :type cluster_id: string :param node_number: Cluster node # for the node being deleted. :type node_number: number :param node_id: Unique ID for the node. :type node_id: string :return: A flow instance that represents the workflow for checking cluster cluster status on a node. """ flow_name = "check cluster %s status on node %d" % (cluster_id, node_number) node_name = "cluster[%s].node[%d]" % (cluster_id, node_number) # Extract management ip extract_management_ip = lambda node: node['management_ip'] new_node_values = lambda node_status: (ok_status if (node_status == 'OK') else not_ok_status) ok_status = {'status': models.Status.ACTIVE} not_ok_status = {'status': models.Status.DOWN} flow = linear_flow.Flow(flow_name) flow.add( cue_tasks.GetNode(name="Get Node %s" % node_name, inject={'node_id': node_id}, provides="node_%d" % node_number), os_common.Lambda(extract_management_ip, name="extract management ip %s" % node_name, rebind={'node': "node_%d" % node_number}, provides="vm_mgmt_ip_%d" % node_number), cue_tasks.GetRabbitClusterStatus( name="get RabbitMQ cluster status_%d" % node_number, rebind={'vm_ip': "vm_mgmt_ip_%d" % node_number}, provides="node_status_%d" % node_number, inject={'proto': 'http'}), os_common.Lambda( new_node_values, name="build node values %s" % node_name, rebind={'node_status': "node_status_%d" % node_number}, provides="node_values_%d" % node_number), cue_tasks.UpdateNodeRecord( name="update node status %s" % node_name, rebind={'node_values': "node_values_%d" % node_number}, inject={'node_id': node_id})) return flow
def check_cluster_status(cluster_id, node_ids): """Check Cluster status factory function This factory function uses :func:`cue.taskflow.flow.check_node_status` to check cluster status on each node. :param cluster_id: A unique ID assigned to the cluster being created :type cluster_id: string :param node_ids: The Cue Node id's associated with each node in the cluster :type node_ids: list of uuid strings :return: A flow instance that represents the workflow for checking cluster status """ flow = linear_flow.Flow("check cluster status %s" % cluster_id) sub_flow = unordered_flow.Flow("check status of VMs") check_active_status = lambda cluster_status: (ok_status if ( cluster_status == 'OK') else not_ok_status) ok_status = {'status': models.Status.ACTIVE} not_ok_status = {'status': models.Status.DOWN} for i, node_id in enumerate(node_ids): sub_flow.add( check_node_status.check_node_status(cluster_id, i, node_id)) flow.add(sub_flow) node_status_list = [ "%s%d" % ("node_status_", i) for i in range(len(node_ids)) ] # this is used as second arg of lambda in case of cluster size one node_status_list.append('node_status_0') get_cluster_status = os_common.Reduce( lambda a, b: a if (a == 'OK') else b, provides='cluster_status', requires=node_status_list, ) flow.add(get_cluster_status) translate_cluster_status = os_common.Lambda( check_active_status, name="translate status to active or down", provides="final_cluster_status") flow.add(translate_cluster_status) update_cluster_status = cue_tasks.UpdateClusterRecord( name="update cluster status %s" % cluster_id, inject={ 'cluster_id': cluster_id, 'project_only': False }, rebind={'cluster_values': "final_cluster_status"}) flow.add(update_cluster_status) return flow
def create_cluster(cluster_id, node_ids, user_network_id, management_network_id): """Create Cluster flow factory function This factory function uses :func:`cue.taskflow.flow.create_cluster_node` to create a multi node cluster. :param cluster_id: A unique ID assigned to the cluster being created :type cluster_id: string :param node_ids: The Cue Node id's associated with each node in the cluster :type node_ids: list of uuid strings :param user_network_id: The user's network id :type user_network_id: string :param management_network_id: The management network id :type management_network_id: string :return: A flow instance that represents the workflow for creating a cluster """ cluster_name = "cue[%s]" % cluster_id flow = graph_flow.Flow("creating cluster %s" % cluster_id) start_flow_cluster_update = { 'cluster_id': cluster_id, 'cluster_values': {'status': models.Status.BUILDING}} extract_scheduler_hints = lambda vm_group: {'group': str(vm_group['id'])} end_flow_cluster_update = lambda vm_group: { 'status': models.Status.ACTIVE, 'group_id': str(vm_group['id'])} create_cluster_start_task = cue_tasks.UpdateClusterRecord( name="update cluster status start %s" % cluster_name, inject=start_flow_cluster_update) flow.add(create_cluster_start_task) cluster_anti_affinity = cfg.CONF.taskflow.cluster_node_anti_affinity if cluster_anti_affinity: create_vm_group = nova.CreateVmGroup( name="create cluster group %s" % cluster_name, os_client=client.nova_client(), requires=('name', 'policies'), inject={'name': "cue_group_%s" % cluster_id, 'policies': ['anti-affinity']}, provides="cluster_group") flow.add(create_vm_group) get_scheduler_hints = os_common.Lambda( extract_scheduler_hints, name="extract scheduler hints %s" % cluster_name, rebind={'vm_group': "cluster_group"}, provides="scheduler_hints") flow.add(get_scheduler_hints) build_cluster_info = os_common.Lambda( end_flow_cluster_update, name="build new cluster update values %s" % cluster_name, rebind={'vm_group': "cluster_group"}, provides="cluster_values") flow.add(build_cluster_info) flow.link(create_cluster_start_task, create_vm_group) flow.link(create_vm_group, get_scheduler_hints) flow.link(get_scheduler_hints, build_cluster_info) create_node_start_task = build_cluster_info create_cluster_end_task = cue_tasks.UpdateClusterRecord( name="update cluster status end %s" % cluster_name, inject={'cluster_id': cluster_id}) else: create_node_start_task = create_cluster_start_task end_flow_cluster_update = { 'cluster_id': cluster_id, 'cluster_values': {'status': models.Status.ACTIVE}} create_cluster_end_task = cue_tasks.UpdateClusterRecord( name="update cluster status end %s" % cluster_name, inject=end_flow_cluster_update) flow.add(create_cluster_end_task) show_network = os_neutron.ShowNetwork( name="get tenant network information", os_client=client.neutron_client(), inject={'network': user_network_id}, provides="tenant_network_info" ) flow.add(show_network) flow.link(create_node_start_task, show_network) validate_network_info = (lambda tenant_network_info, tenant_id: tenant_network_info['shared'] or tenant_network_info['tenant_id'] == tenant_id) validate_tenant_network = os_common.Assert( validate_network_info, name="validate tenant network info", requires=('tenant_network_info', 'tenant_id') ) flow.add(validate_tenant_network) flow.link(show_network, validate_tenant_network) node_check_timeout = cfg.CONF.taskflow.cluster_node_check_timeout node_check_max_count = cfg.CONF.taskflow.cluster_node_check_max_count check_rabbit_online = linear_flow.Flow( name="wait for RabbitMQ ready state %s" % cluster_name, retry=retry.Times(node_check_max_count, revert_all=True)) check_rabbit_online.add( cue_tasks.GetRabbitClusterStatus( name="get RabbitMQ status %s" % cluster_name, rebind={'vm_ip': "vm_management_ip_0"}, provides="clustering_status", inject={'proto': 'http'}), os_common.CheckFor( name="check cluster status %s" % cluster_name, details="waiting for RabbitMQ clustered status", rebind={'check_var': "clustering_status"}, check_value='OK', retry_delay_seconds=node_check_timeout), ) flow.add(check_rabbit_online) flow.link(check_rabbit_online, create_cluster_end_task) #todo(dagnello): verify node_ids is a list and not a string for i, node_id in enumerate(node_ids): generate_userdata = cue_tasks.ClusterNodeUserData( name="generate userdata %s_%d" % (cluster_name, i), node_count=len(node_ids), node_ip_prefix="vm_management_ip_", inject={'node_name': "rabbit-node-%d" % i, 'cluster_id': cluster_id}) flow.add(generate_userdata) create_cluster_node.create_cluster_node(cluster_id, i, node_id, flow, generate_userdata, validate_tenant_network, check_rabbit_online, user_network_id, management_network_id) return flow
def delete_cluster_node(cluster_id, node_number, node_id): """Delete Cluster Node factory function This factory function deletes a flow for deleting a node of a cluster. :param cluster_id: Unique ID for the cluster that the node is part of. :type cluster_id: string :param node_number: Cluster node # for the node being deleted. :type node_number: number :param node_id: Unique ID for the node. :type node_id: string :return: A flow instance that represents the workflow for deleting a cluster node. """ flow_name = "delete cluster %s node %d" % (cluster_id, node_number) node_name = "cluster[%s].node[%d]" % (cluster_id, node_number) extract_vm_id = lambda node: node['instance_id'] extract_port_ids = lambda interfaces: [i['port_id'] for i in interfaces] deleted_node_values = {'status': models.Status.DELETED, 'deleted': True} deleted_endpoints_values = {'deleted': True} flow = linear_flow.Flow(flow_name) flow.add( cue_tasks.GetNode( name="Get Node %s" % node_name, inject={'node_id': node_id}, provides="node_%d" % node_number), os_common.Lambda( extract_vm_id, name="extract vm id %s" % node_name, rebind={'node': "node_%d" % node_number}, provides="vm_id_%d" % node_number), nova.ListVmInterfaces( os_client=client.nova_client(), name="list vm interfaces %s" % node_name, rebind={'server': "vm_id_%d" % node_number}, inject={'ignore_nova_not_found_exception': True}, provides="vm_interfaces_%d" % node_number), os_common.Lambda( extract_port_ids, name="extract port ids %s" % node_name, rebind={'interfaces': "vm_interfaces_%d" % node_number}, provides="vm_port_list_%d" % node_number), nova.DeleteVm( os_client=client.nova_client(), name="delete vm %s" % node_name, rebind={'server': "vm_id_%d" % node_number}), neutron.DeletePorts( os_client=client.neutron_client(), name="delete vm %s ports" % node_name, rebind={'port_ids': "vm_port_list_%d" % node_number}), cue_tasks.UpdateNodeRecord( name="update node %s" % node_name, inject={'node_id': node_id, 'node_values': deleted_node_values}), cue_tasks.UpdateEndpointsRecord( name="update endpoint for node %s" % node_name, inject={'node_id': node_id, 'endpoints_values': deleted_endpoints_values} )) return flow
def create_cluster_node(cluster_id, node_number, node_id, graph_flow, generate_userdata, start_task, post_task, user_network_id, management_network_id): """Create Cluster Node factory function This factory function creates a flow for creating a node of a cluster. :param cluster_id: Unique ID for the cluster that the node is part of. :type cluster_id: string :param node_number: Cluster node # for the node being created. :type node_number: number :param node_id: Unique ID for the node. :type node_id: string :param graph_flow: TaskFlow graph flow which contains create cluster flow :type graph_flow: taskflow.patterns.graph_flow :param start_task: Update cluster status start task :type start_task: cue.taskflow.task.UpdateClusterRecord :param post_task: Task/Subflow to run after the flow created here :type post_task: taskflow task or flow :param generate_userdata: generate user data task :type generate_userdata: cue.taskflow.task.ClusterNodeUserData :param user_network_id: The user's network id :type user_network_id: string :param management_network_id: The management network id :type management_network_id: string :return: A flow instance that represents the workflow for creating a cluster node. """ node_name = "cue[%s].node[%d]" % (cluster_id, node_number) extract_port_info = ( lambda user_port_info, management_port_info: ( [ # nova boot requires a list of port-id's { 'port-id': user_port_info['port']['id'] }, { 'port-id': management_port_info['port']['id'] } ], # user port ip user_port_info['port']['fixed_ips'][0]['ip_address'], # management port ip management_port_info['port']['fixed_ips'][0]['ip_address'])) extract_vm_id = lambda vm_info: str(vm_info['id']) new_node_values = lambda nova_vm_id, vm_management_ip: { 'status': models.Status.ACTIVE, 'instance_id': nova_vm_id, 'management_ip': vm_management_ip } new_endpoint_values = lambda vm_ip: { 'node_id': node_id, 'uri': vm_ip + ':', 'type': 'AMQP' } create_user_port = neutron.CreatePort(name="create port %s" % node_name, os_client=client.neutron_client(), inject={ 'network_id': user_network_id, 'port_name': 'user_' + node_name }, provides="user_port_info_%d" % node_number) graph_flow.add(create_user_port) graph_flow.link(start_task, create_user_port) create_management_port = neutron.CreatePort( name="create management port %s" % node_name, os_client=client.neutron_client(), inject={ 'network_id': management_network_id, 'port_name': 'management_' + node_name }, provides="management_port_info_%d" % node_number) graph_flow.add(create_management_port) graph_flow.link(start_task, create_management_port) extract_port_data = os_common.Lambda( extract_port_info, name="extract port id %s" % node_name, rebind={ 'user_port_info': "user_port_info_%d" % node_number, 'management_port_info': "management_port_info_%d" % node_number }, provides=("port_ids_%d" % node_number, "vm_user_ip_%d" % node_number, "vm_management_ip_%d" % node_number)) graph_flow.add(extract_port_data) graph_flow.link(create_user_port, extract_port_data) create_vm = nova.CreateVm(name="create vm %s" % node_name, os_client=client.nova_client(), requires=('name', 'image', 'flavor', 'nics'), inject={'name': node_name}, rebind={'nics': "port_ids_%d" % node_number}, provides="vm_info_%d" % node_number) graph_flow.add(create_vm) graph_flow.link(create_management_port, create_vm) graph_flow.link(generate_userdata, create_vm) get_vm_id = os_common.Lambda( extract_vm_id, name="extract vm id %s" % node_name, rebind={'vm_info': "vm_info_%d" % node_number}, provides="vm_id_%d" % node_number) graph_flow.add(get_vm_id) graph_flow.link(create_vm, get_vm_id) retry_count = CONF.flow_options.create_cluster_node_vm_active_retry_count check_vm_active = linear_flow.Flow( name="wait for VM active state %s" % node_name, retry=retry.ExceptionTimes( revert_exception_list=[cue_exceptions.VmErrorException], attempts=retry_count, revert_all=True)) check_vm_active.add( nova.GetVmStatus(os_client=client.nova_client(), name="get vm %s" % node_name, rebind={'nova_vm_id': "vm_id_%d" % node_number}, provides="vm_status_%d" % node_number), cue_tasks.CheckForVmStatus( name="check vm status %s" % node_name, details="waiting for ACTIVE VM status", rebind={'check_var': "vm_status_%d" % node_number}, retry_delay_seconds=10), ) graph_flow.add(check_vm_active) graph_flow.link(get_vm_id, check_vm_active) build_node_info = os_common.Lambda(new_node_values, name="build new node values %s" % node_name, rebind={ 'nova_vm_id': "vm_id_%d" % node_number, 'vm_management_ip': "vm_management_ip_%d" % node_number }, provides="node_values_%d" % node_number) graph_flow.add(build_node_info) graph_flow.link(get_vm_id, build_node_info) update_node = cue_tasks.UpdateNodeRecord( name="update node %s" % node_name, rebind={'node_values': "node_values_%d" % node_number}, inject={'node_id': node_id}) graph_flow.add(update_node) graph_flow.link(build_node_info, update_node) build_endpoint_info = os_common.Lambda( new_endpoint_values, name="build new endpoint values %s" % node_name, rebind={'vm_ip': "vm_user_ip_%d" % node_number}, inject={'node_id': node_id}, provides="endpoint_values_%d" % node_number) graph_flow.add(build_endpoint_info) graph_flow.link(check_vm_active, build_endpoint_info) create_endpoint = cue_tasks.CreateEndpoint( name="update endpoint for node %s" % node_name, rebind={'endpoint_values': "endpoint_values_%d" % node_number}) graph_flow.add(create_endpoint) graph_flow.link(check_vm_active, create_endpoint) graph_flow.link(update_node, post_task) graph_flow.link(create_endpoint, post_task)