Exemple #1
0
def check_node_status(cluster_id, node_number, node_id):
    """Check Node Status factory function

    This factory function check cluster status flow on each node of a cluster.

    :param cluster_id: Unique ID for the cluster that the node is part of.
    :type cluster_id: string
    :param node_number: Cluster node # for the node being deleted.
    :type node_number: number
    :param node_id: Unique ID for the node.
    :type node_id: string
    :return: A flow instance that represents the workflow for checking cluster
             cluster status on a node.
    """

    flow_name = "check cluster %s status on node %d" % (cluster_id,
                                                        node_number)
    node_name = "cluster[%s].node[%d]" % (cluster_id, node_number)

    # Extract management ip
    extract_management_ip = lambda node: node['management_ip']

    new_node_values = lambda node_status: (ok_status if (node_status == 'OK')
                                           else not_ok_status)
    ok_status = {'status': models.Status.ACTIVE}
    not_ok_status = {'status': models.Status.DOWN}

    flow = linear_flow.Flow(flow_name)
    flow.add(
        cue_tasks.GetNode(name="Get Node %s" % node_name,
                          inject={'node_id': node_id},
                          provides="node_%d" % node_number),
        os_common.Lambda(extract_management_ip,
                         name="extract management ip %s" % node_name,
                         rebind={'node': "node_%d" % node_number},
                         provides="vm_mgmt_ip_%d" % node_number),
        cue_tasks.GetRabbitClusterStatus(
            name="get RabbitMQ cluster status_%d" % node_number,
            rebind={'vm_ip': "vm_mgmt_ip_%d" % node_number},
            provides="node_status_%d" % node_number,
            inject={'proto': 'http'}),
        os_common.Lambda(
            new_node_values,
            name="build node values %s" % node_name,
            rebind={'node_status': "node_status_%d" % node_number},
            provides="node_values_%d" % node_number),
        cue_tasks.UpdateNodeRecord(
            name="update node status %s" % node_name,
            rebind={'node_values': "node_values_%d" % node_number},
            inject={'node_id': node_id}))

    return flow
def check_cluster_status(cluster_id, node_ids):
    """Check Cluster status factory function

    This factory function uses :func:`cue.taskflow.flow.check_node_status` to
    check cluster status on each node.

    :param cluster_id: A unique ID assigned to the cluster being created
    :type cluster_id: string
    :param node_ids: The Cue Node id's associated with each node in the cluster
    :type node_ids: list of uuid strings
    :return: A flow instance that represents the workflow for checking cluster
             status
    """

    flow = linear_flow.Flow("check cluster status %s" % cluster_id)
    sub_flow = unordered_flow.Flow("check status of VMs")

    check_active_status = lambda cluster_status: (ok_status if (
        cluster_status == 'OK') else not_ok_status)
    ok_status = {'status': models.Status.ACTIVE}
    not_ok_status = {'status': models.Status.DOWN}

    for i, node_id in enumerate(node_ids):
        sub_flow.add(
            check_node_status.check_node_status(cluster_id, i, node_id))
    flow.add(sub_flow)

    node_status_list = [
        "%s%d" % ("node_status_", i) for i in range(len(node_ids))
    ]
    # this is used as second arg of lambda in case of cluster size one
    node_status_list.append('node_status_0')
    get_cluster_status = os_common.Reduce(
        lambda a, b: a if (a == 'OK') else b,
        provides='cluster_status',
        requires=node_status_list,
    )
    flow.add(get_cluster_status)

    translate_cluster_status = os_common.Lambda(
        check_active_status,
        name="translate status to active or down",
        provides="final_cluster_status")
    flow.add(translate_cluster_status)

    update_cluster_status = cue_tasks.UpdateClusterRecord(
        name="update cluster status %s" % cluster_id,
        inject={
            'cluster_id': cluster_id,
            'project_only': False
        },
        rebind={'cluster_values': "final_cluster_status"})
    flow.add(update_cluster_status)

    return flow
Exemple #3
0
def create_cluster(cluster_id, node_ids, user_network_id,
                   management_network_id):
    """Create Cluster flow factory function

    This factory function uses :func:`cue.taskflow.flow.create_cluster_node` to
    create a multi node cluster.

    :param cluster_id: A unique ID assigned to the cluster being created
    :type cluster_id: string
    :param node_ids: The Cue Node id's associated with each node in the cluster
    :type node_ids: list of uuid strings
    :param user_network_id: The user's network id
    :type user_network_id: string
    :param management_network_id: The management network id
    :type management_network_id: string
    :return: A flow instance that represents the workflow for creating a
             cluster
    """
    cluster_name = "cue[%s]" % cluster_id
    flow = graph_flow.Flow("creating cluster %s" % cluster_id)
    start_flow_cluster_update = {
        'cluster_id': cluster_id,
        'cluster_values': {'status': models.Status.BUILDING}}

    extract_scheduler_hints = lambda vm_group: {'group': str(vm_group['id'])}
    end_flow_cluster_update = lambda vm_group: {
        'status': models.Status.ACTIVE,
        'group_id': str(vm_group['id'])}

    create_cluster_start_task = cue_tasks.UpdateClusterRecord(
        name="update cluster status start %s" % cluster_name,
        inject=start_flow_cluster_update)
    flow.add(create_cluster_start_task)

    cluster_anti_affinity = cfg.CONF.taskflow.cluster_node_anti_affinity
    if cluster_anti_affinity:
        create_vm_group = nova.CreateVmGroup(
            name="create cluster group %s" % cluster_name,
            os_client=client.nova_client(),
            requires=('name', 'policies'),
            inject={'name': "cue_group_%s" % cluster_id,
                    'policies': ['anti-affinity']},
            provides="cluster_group")
        flow.add(create_vm_group)

        get_scheduler_hints = os_common.Lambda(
            extract_scheduler_hints,
            name="extract scheduler hints %s" % cluster_name,
            rebind={'vm_group': "cluster_group"},
            provides="scheduler_hints")
        flow.add(get_scheduler_hints)

        build_cluster_info = os_common.Lambda(
            end_flow_cluster_update,
            name="build new cluster update values %s" % cluster_name,
            rebind={'vm_group': "cluster_group"},
            provides="cluster_values")
        flow.add(build_cluster_info)

        flow.link(create_cluster_start_task, create_vm_group)
        flow.link(create_vm_group, get_scheduler_hints)
        flow.link(get_scheduler_hints, build_cluster_info)
        create_node_start_task = build_cluster_info
        create_cluster_end_task = cue_tasks.UpdateClusterRecord(
            name="update cluster status end %s" % cluster_name,
            inject={'cluster_id': cluster_id})
    else:
        create_node_start_task = create_cluster_start_task
        end_flow_cluster_update = {
            'cluster_id': cluster_id,
            'cluster_values': {'status': models.Status.ACTIVE}}
        create_cluster_end_task = cue_tasks.UpdateClusterRecord(
            name="update cluster status end %s" % cluster_name,
            inject=end_flow_cluster_update)

    flow.add(create_cluster_end_task)

    show_network = os_neutron.ShowNetwork(
        name="get tenant network information",
        os_client=client.neutron_client(),
        inject={'network': user_network_id},
        provides="tenant_network_info"
    )
    flow.add(show_network)
    flow.link(create_node_start_task, show_network)

    validate_network_info = (lambda tenant_network_info, tenant_id:
                             tenant_network_info['shared'] or
                             tenant_network_info['tenant_id'] == tenant_id)

    validate_tenant_network = os_common.Assert(
        validate_network_info,
        name="validate tenant network info",
        requires=('tenant_network_info', 'tenant_id')
    )
    flow.add(validate_tenant_network)
    flow.link(show_network, validate_tenant_network)

    node_check_timeout = cfg.CONF.taskflow.cluster_node_check_timeout
    node_check_max_count = cfg.CONF.taskflow.cluster_node_check_max_count

    check_rabbit_online = linear_flow.Flow(
        name="wait for RabbitMQ ready state %s" % cluster_name,
        retry=retry.Times(node_check_max_count, revert_all=True))
    check_rabbit_online.add(
        cue_tasks.GetRabbitClusterStatus(
            name="get RabbitMQ status %s" % cluster_name,
            rebind={'vm_ip': "vm_management_ip_0"},
            provides="clustering_status",
            inject={'proto': 'http'}),
        os_common.CheckFor(
            name="check cluster status %s" % cluster_name,
            details="waiting for RabbitMQ clustered status",
            rebind={'check_var': "clustering_status"},
            check_value='OK',
            retry_delay_seconds=node_check_timeout),
    )
    flow.add(check_rabbit_online)

    flow.link(check_rabbit_online, create_cluster_end_task)

    #todo(dagnello): verify node_ids is a list and not a string
    for i, node_id in enumerate(node_ids):
        generate_userdata = cue_tasks.ClusterNodeUserData(
            name="generate userdata %s_%d" % (cluster_name, i),
            node_count=len(node_ids),
            node_ip_prefix="vm_management_ip_",
            inject={'node_name': "rabbit-node-%d" % i,
                    'cluster_id': cluster_id})
        flow.add(generate_userdata)

        create_cluster_node.create_cluster_node(cluster_id, i, node_id, flow,
                                                generate_userdata,
                                                validate_tenant_network,
                                                check_rabbit_online,
                                                user_network_id,
                                                management_network_id)

    return flow
Exemple #4
0
def delete_cluster_node(cluster_id, node_number, node_id):
    """Delete Cluster Node factory function

    This factory function deletes a flow for deleting a node of a cluster.

    :param cluster_id: Unique ID for the cluster that the node is part of.
    :type cluster_id: string
    :param node_number: Cluster node # for the node being deleted.
    :type node_number: number
    :param node_id: Unique ID for the node.
    :type node_id: string
    :return: A flow instance that represents the workflow for deleting a
             cluster node.
    """
    flow_name = "delete cluster %s node %d" % (cluster_id, node_number)
    node_name = "cluster[%s].node[%d]" % (cluster_id, node_number)

    extract_vm_id = lambda node: node['instance_id']
    extract_port_ids = lambda interfaces: [i['port_id'] for i in interfaces]

    deleted_node_values = {'status': models.Status.DELETED,
                           'deleted': True}

    deleted_endpoints_values = {'deleted': True}

    flow = linear_flow.Flow(flow_name)
    flow.add(
        cue_tasks.GetNode(
            name="Get Node %s" % node_name,
            inject={'node_id': node_id},
            provides="node_%d" % node_number),
        os_common.Lambda(
            extract_vm_id,
            name="extract vm id %s" % node_name,
            rebind={'node': "node_%d" % node_number},
            provides="vm_id_%d" % node_number),
        nova.ListVmInterfaces(
            os_client=client.nova_client(),
            name="list vm interfaces %s" % node_name,
            rebind={'server': "vm_id_%d" % node_number},
            inject={'ignore_nova_not_found_exception': True},
            provides="vm_interfaces_%d" % node_number),
        os_common.Lambda(
            extract_port_ids,
            name="extract port ids %s" % node_name,
            rebind={'interfaces': "vm_interfaces_%d" % node_number},
            provides="vm_port_list_%d" % node_number),
        nova.DeleteVm(
            os_client=client.nova_client(),
            name="delete vm %s" % node_name,
            rebind={'server': "vm_id_%d" % node_number}),
        neutron.DeletePorts(
            os_client=client.neutron_client(),
            name="delete vm %s ports" % node_name,
            rebind={'port_ids': "vm_port_list_%d" % node_number}),
        cue_tasks.UpdateNodeRecord(
            name="update node %s" % node_name,
            inject={'node_id': node_id,
                    'node_values': deleted_node_values}),
        cue_tasks.UpdateEndpointsRecord(
            name="update endpoint for node %s" % node_name,
            inject={'node_id': node_id,
                    'endpoints_values': deleted_endpoints_values}
        ))
    return flow
Exemple #5
0
def create_cluster_node(cluster_id, node_number, node_id, graph_flow,
                        generate_userdata, start_task, post_task,
                        user_network_id, management_network_id):
    """Create Cluster Node factory function

    This factory function creates a flow for creating a node of a cluster.

    :param cluster_id: Unique ID for the cluster that the node is part of.
    :type cluster_id: string
    :param node_number: Cluster node # for the node being created.
    :type node_number: number
    :param node_id: Unique ID for the node.
    :type node_id: string
    :param graph_flow: TaskFlow graph flow which contains create cluster flow
    :type graph_flow: taskflow.patterns.graph_flow
    :param start_task: Update cluster status start task
    :type start_task: cue.taskflow.task.UpdateClusterRecord
    :param post_task: Task/Subflow to run after the flow created here
    :type post_task: taskflow task or flow
    :param generate_userdata: generate user data task
    :type generate_userdata: cue.taskflow.task.ClusterNodeUserData
    :param user_network_id: The user's network id
    :type user_network_id: string
    :param management_network_id: The management network id
    :type management_network_id: string
    :return: A flow instance that represents the workflow for creating a
             cluster node.
    """
    node_name = "cue[%s].node[%d]" % (cluster_id, node_number)

    extract_port_info = (
        lambda user_port_info, management_port_info: (
            [  # nova boot requires a list of port-id's
                {
                    'port-id': user_port_info['port']['id']
                }, {
                    'port-id': management_port_info['port']['id']
                }
            ],
            # user port ip
            user_port_info['port']['fixed_ips'][0]['ip_address'],
            # management port ip
            management_port_info['port']['fixed_ips'][0]['ip_address']))

    extract_vm_id = lambda vm_info: str(vm_info['id'])

    new_node_values = lambda nova_vm_id, vm_management_ip: {
        'status': models.Status.ACTIVE,
        'instance_id': nova_vm_id,
        'management_ip': vm_management_ip
    }

    new_endpoint_values = lambda vm_ip: {
        'node_id': node_id,
        'uri': vm_ip + ':',
        'type': 'AMQP'
    }

    create_user_port = neutron.CreatePort(name="create port %s" % node_name,
                                          os_client=client.neutron_client(),
                                          inject={
                                              'network_id': user_network_id,
                                              'port_name': 'user_' + node_name
                                          },
                                          provides="user_port_info_%d" %
                                          node_number)
    graph_flow.add(create_user_port)
    graph_flow.link(start_task, create_user_port)

    create_management_port = neutron.CreatePort(
        name="create management port %s" % node_name,
        os_client=client.neutron_client(),
        inject={
            'network_id': management_network_id,
            'port_name': 'management_' + node_name
        },
        provides="management_port_info_%d" % node_number)
    graph_flow.add(create_management_port)
    graph_flow.link(start_task, create_management_port)

    extract_port_data = os_common.Lambda(
        extract_port_info,
        name="extract port id %s" % node_name,
        rebind={
            'user_port_info': "user_port_info_%d" % node_number,
            'management_port_info': "management_port_info_%d" % node_number
        },
        provides=("port_ids_%d" % node_number, "vm_user_ip_%d" % node_number,
                  "vm_management_ip_%d" % node_number))
    graph_flow.add(extract_port_data)
    graph_flow.link(create_user_port, extract_port_data)

    create_vm = nova.CreateVm(name="create vm %s" % node_name,
                              os_client=client.nova_client(),
                              requires=('name', 'image', 'flavor', 'nics'),
                              inject={'name': node_name},
                              rebind={'nics': "port_ids_%d" % node_number},
                              provides="vm_info_%d" % node_number)
    graph_flow.add(create_vm)
    graph_flow.link(create_management_port, create_vm)
    graph_flow.link(generate_userdata, create_vm)

    get_vm_id = os_common.Lambda(
        extract_vm_id,
        name="extract vm id %s" % node_name,
        rebind={'vm_info': "vm_info_%d" % node_number},
        provides="vm_id_%d" % node_number)

    graph_flow.add(get_vm_id)
    graph_flow.link(create_vm, get_vm_id)

    retry_count = CONF.flow_options.create_cluster_node_vm_active_retry_count
    check_vm_active = linear_flow.Flow(
        name="wait for VM active state %s" % node_name,
        retry=retry.ExceptionTimes(
            revert_exception_list=[cue_exceptions.VmErrorException],
            attempts=retry_count,
            revert_all=True))

    check_vm_active.add(
        nova.GetVmStatus(os_client=client.nova_client(),
                         name="get vm %s" % node_name,
                         rebind={'nova_vm_id': "vm_id_%d" % node_number},
                         provides="vm_status_%d" % node_number),
        cue_tasks.CheckForVmStatus(
            name="check vm status %s" % node_name,
            details="waiting for ACTIVE VM status",
            rebind={'check_var': "vm_status_%d" % node_number},
            retry_delay_seconds=10),
    )

    graph_flow.add(check_vm_active)
    graph_flow.link(get_vm_id, check_vm_active)

    build_node_info = os_common.Lambda(new_node_values,
                                       name="build new node values %s" %
                                       node_name,
                                       rebind={
                                           'nova_vm_id':
                                           "vm_id_%d" % node_number,
                                           'vm_management_ip':
                                           "vm_management_ip_%d" % node_number
                                       },
                                       provides="node_values_%d" % node_number)
    graph_flow.add(build_node_info)
    graph_flow.link(get_vm_id, build_node_info)

    update_node = cue_tasks.UpdateNodeRecord(
        name="update node %s" % node_name,
        rebind={'node_values': "node_values_%d" % node_number},
        inject={'node_id': node_id})
    graph_flow.add(update_node)
    graph_flow.link(build_node_info, update_node)

    build_endpoint_info = os_common.Lambda(
        new_endpoint_values,
        name="build new endpoint values %s" % node_name,
        rebind={'vm_ip': "vm_user_ip_%d" % node_number},
        inject={'node_id': node_id},
        provides="endpoint_values_%d" % node_number)
    graph_flow.add(build_endpoint_info)
    graph_flow.link(check_vm_active, build_endpoint_info)

    create_endpoint = cue_tasks.CreateEndpoint(
        name="update endpoint for node %s" % node_name,
        rebind={'endpoint_values': "endpoint_values_%d" % node_number})
    graph_flow.add(create_endpoint)
    graph_flow.link(check_vm_active, create_endpoint)

    graph_flow.link(update_node, post_task)
    graph_flow.link(create_endpoint, post_task)