def status(args):  # noqa: C901 FIXME!!!
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        stack = utils.get_stack(stack_name, cfn)
        sys.stdout.write("\rStatus: %s" % stack.get("StackStatus"))
        sys.stdout.flush()
        if not args.nowait:
            while stack.get("StackStatus") not in [
                "CREATE_COMPLETE",
                "UPDATE_COMPLETE",
                "UPDATE_ROLLBACK_COMPLETE",
                "ROLLBACK_COMPLETE",
                "CREATE_FAILED",
                "DELETE_FAILED",
            ]:
                time.sleep(5)
                stack = utils.get_stack(stack_name, cfn)
                events = utils.get_stack_events(stack_name)[0]
                resource_status = (
                    "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))
                ).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus"))
            sys.stdout.flush()
            if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]:
                state = _poll_head_node_state(stack_name)
                if state == "running":
                    _print_stack_outputs(stack)
                _print_compute_fleet_status(args.cluster_name, stack)
            elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]:
                events = utils.get_stack_events(stack_name)
                for event in events:
                    if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]:
                        LOGGER.info(
                            "%s %s %s %s %s",
                            event.get("Timestamp"),
                            event.get("ResourceStatus"),
                            event.get("ResourceType"),
                            event.get("LogicalResourceId"),
                            event.get("ResourceStatusReason"),
                        )
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
Beispiel #2
0
def delete(args):
    saw_update = False
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        # delete_stack does not raise an exception if stack does not exist
        # Use describe_stacks to explicitly check if the stack exists
        cfn.describe_stacks(StackName=stack_name)
        cfn.delete_stack(StackName=stack_name)
        saw_update = True
        stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
        sys.stdout.write("\rStatus: %s" % stack_status)
        sys.stdout.flush()
        LOGGER.debug("Status: %s", stack_status)
        if not args.nowait:
            while stack_status == "DELETE_IN_PROGRESS":
                time.sleep(5)
                stack_status = utils.get_stack(stack_name,
                                               cfn).get("StackStatus")
                events = cfn.describe_stack_events(
                    StackName=stack_name).get("StackEvents")[0]
                resource_status = ("Status: %s - %s" %
                                   (events.get("LogicalResourceId"),
                                    events.get("ResourceStatus"))).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack_status)
            sys.stdout.flush()
            LOGGER.debug("Status: %s", stack_status)
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
        if stack_status == "DELETE_FAILED":
            LOGGER.info(
                "Cluster did not delete successfully. Run 'pcluster delete %s' again",
                args.cluster_name)
    except ClientError as e:
        if e.response.get("Error").get("Message").endswith("does not exist"):
            if saw_update:
                LOGGER.info("\nCluster deleted successfully.")
                sys.exit(0)
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
Beispiel #3
0
def delete(args):
    PclusterConfig.init_aws(config_file=args.config_file)
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)
    if not utils.stack_exists(stack_name):
        if args.keep_logs:
            utils.warn(
                "Stack for {0} does not exist. Cannot prevent its log groups from being deleted."
                .format(args.cluster_name))
        utils.warn("Cluster {0} has already been deleted.".format(
            args.cluster_name))
        sys.exit(0)
    elif args.keep_logs:
        _persist_cloudwatch_log_groups(args.cluster_name)
    _delete_cluster(args.cluster_name, args.nowait)
def instances(args):
    stack_name = utils.get_stack_name(args.cluster_name)
    PclusterConfig.init_aws(config_file=args.config_file)
    cfn_stack = utils.get_stack(stack_name)
    scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler")

    instances = []
    head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node)
    if head_node_server:
        instances.append(("MasterServer", head_node_server[0].get("InstanceId")))

    if scheduler != "awsbatch":
        instances.extend(_get_compute_instances(stack_name))

    for instance in instances:
        LOGGER.info("%s         %s", instance[0], instance[1])

    if scheduler == "awsbatch":
        LOGGER.info("Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the head node instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    head_node_ip, username = get_head_node_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        HEAD_NODE_IP=head_node_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    try:
        url = retry(_retrieve_dcv_session_url,
                    func_args=[cmd, args.cluster_name, head_node_ip],
                    attempts=4)
        url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(
            url)
    except DCVConnectionError as e:
        error("Something went wrong during DCV connection.\n{0}"
              "Please check the logs in the /var/log/parallelcluster/ folder "
              "of the head node and submit an issue {1}\n".format(
                  e, PCLUSTER_ISSUES_LINK))

    if args.show_url:
        LOGGER.info(url_message)
        return

    try:
        if not webbrowser.open_new(url):
            raise webbrowser.Error("Unable to open the Web browser.")
    except webbrowser.Error as e:
        LOGGER.info("{0}\n{1}".format(e, url_message))
def list_stacks(args):
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    try:
        result = []
        for stack in utils.paginate_boto3(boto3.client("cloudformation").describe_stacks):
            if stack.get("ParentId") is None and stack.get("StackName").startswith(PCLUSTER_STACK_PREFIX):
                pcluster_version = _get_pcluster_version_from_stack(stack)
                result.append(
                    [
                        stack.get("StackName")[len(PCLUSTER_STACK_PREFIX) :],  # noqa: E203
                        _colorize(stack.get("StackStatus"), args),
                        pcluster_version,
                    ]
                )
        LOGGER.info(tabulate(result, tablefmt="plain"))
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("Exiting...")
        sys.exit(0)
Beispiel #7
0
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the master instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    master_ip, username = get_master_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        MASTER_IP=master_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    # Connect by ssh to the master instance and prepare DCV session
    try:
        LOGGER.debug("SSH command: {0}".format(cmd))
        output = _check_command_output(cmd)
        # At first ssh connection, the ssh command alerts it is adding the host to the known hosts list
        if re.search("Permanently added .* to the list of known hosts.",
                     output):
            output = _check_command_output(cmd)

        dcv_parameters = re.search(
            r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)",
            output)
        if dcv_parameters:
            dcv_server_port = dcv_parameters.group(1)
            dcv_session_id = dcv_parameters.group(2)
            dcv_session_token = dcv_parameters.group(3)
        else:
            error(
                "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n"
                "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder "
                "of the master instance and submit an issue {1}.".format(
                    cmd, PCLUSTER_ISSUES_LINK))

    except sub.CalledProcessError as e:
        if "{0}: No such file or directory".format(
                DCV_CONNECT_SCRIPT) in e.output:
            error(
                "The cluster {0} has been created with an old version of ParallelCluster "
                "without the DCV support.".format(args.cluster_name))
        else:
            error("Something went wrong during DCV connection.\n{0}".format(
                e.output))

    # Open web browser
    url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format(
        IP=master_ip,
        PORT=dcv_server_port,
        TOKEN=dcv_session_token,
        SESSION_ID=dcv_session_id)
    try:
        webbrowser.open_new(url)
    except webbrowser.Error:
        LOGGER.info(
            "Unable to open the Web browser. "
            "Please use the following URL in your browser within 30 seconds:\n{0}"
            .format(url))