Example #1
0
def ssh_con_fabric(test_vars):
    """Create an SSH connection to the controller."""
    log = logging.getLogger("ssh_con_fabric")

    # SSH connection/client to the public IP.
    pub_client = Connection(test_vars["public_ip"],
                            user=test_vars["controller_user"],
                            connect_kwargs={
                                "key_filename": test_vars["ssh_priv_key"],
                            })

    # If the controller's IP is not the same as the public IP, then we are
    # using a jumpbox to get into the VNET containing the controller. In that
    # case, create an SSH tunnel before connecting to the controller.
    msg_con = "SSH connection to controller ({})".format(
        test_vars["controller_ip"])
    if test_vars["public_ip"] != test_vars["controller_ip"]:
        for port_attempt in range(1, 11):
            tunnel_local_port = get_unused_local_port()
            tunnel_remote_port = 22

            msg_con += " via jumpbox ({0}), local port {1}".format(
                test_vars["public_ip"], tunnel_local_port)

            log.debug("Opening {}".format(msg_con))
            with pub_client.forward_local(
                    local_port=tunnel_local_port,
                    remote_port=tunnel_remote_port,
                    remote_host=test_vars["controller_ip"]):
                client = Connection("127.0.0.1",
                                    user=test_vars["controller_user"],
                                    port=tunnel_local_port,
                                    connect_kwargs={
                                        "key_filename":
                                        test_vars["ssh_priv_key"],
                                    })
                try:
                    client.open()
                except NoValidConnectionsError as ex:
                    exp_err = "Unable to connect to port {} on 127.0.0.1".format(
                        tunnel_local_port)
                    if exp_err not in str(ex):
                        raise
                    else:
                        log.warning("{0} (attempt #{1}, retrying)".format(
                            exp_err, str(port_attempt)))
                        continue

                yield client
            log.debug("{} closed".format(msg_con))
            break  # no need to iterate again
    else:
        log.debug("Opening {}".format(msg_con))
        pub_client.open()
        yield pub_client
        log.debug("Closing {}".format(msg_con))

    pub_client.close()
Example #2
0
def ssh_con_fabric(test_vars):
    """Create an SSH connection to the controller."""
    log = logging.getLogger("ssh_con_fabric")

    # SSH connection/client to the public IP.
    pub_client = Connection(test_vars["public_ip"],
                            user=test_vars["controller_user"],
                            connect_kwargs={
                                "key_filename": test_vars["ssh_priv_key"],
                            })

    # If the controller's IP is not the same as the public IP, then we are
    # using a jumpbox to get into the VNET containing the controller. In that
    # case, create an SSH tunnel before connecting to the controller.
    msg_con = "SSH connection to controller ({})".format(
        test_vars["controller_ip"])
    if test_vars["public_ip"] != test_vars["controller_ip"]:
        tunnel_local_port = get_unused_local_port()
        tunnel_remote_port = 22

        msg_con += " via jumpbox ({0}), local port {1}".format(
            test_vars["public_ip"], tunnel_local_port)

        log.debug("Opening {}".format(msg_con))
        with pub_client.forward_local(local_port=tunnel_local_port,
                                      remote_port=tunnel_remote_port,
                                      remote_host=test_vars["controller_ip"]):
            client = Connection("127.0.0.1",
                                user=test_vars["controller_user"],
                                port=tunnel_local_port,
                                connect_kwargs={
                                    "key_filename": test_vars["ssh_priv_key"],
                                })
            client.open()
            yield client
        log.debug("{} closed".format(msg_con))
    else:
        log.debug("Opening {}".format(msg_con))
        pub_client.open()
        yield pub_client
        log.debug("Closing {}".format(msg_con))

    pub_client.close()
Example #3
0
    def test_artifacts_collect(self, averecmd_params, scp_con,
                               test_vars):  # noqa: F811, E501
        """
        Collect test artifacts (node logs, rolling trace) from each node.
        Artifacts are stored to local directories.
        """
        log = logging.getLogger("test_collect_artifacts")
        artifacts_dir = "vfxt_artifacts_" + test_vars["atd_obj"].deploy_id
        os.makedirs(artifacts_dir, exist_ok=True)

        log.debug("Copying logs from controller to {}".format(artifacts_dir))
        for lf in [
                "vfxt.log", "enablecloudtrace.log",
                "create_cluster_command.log"
        ]:
            scp_con.get("~/" + lf, artifacts_dir)

        log.debug("Copying SSH keys to the controller")
        scp_con.put(test_vars["ssh_priv_key"], "~/.ssh/.")
        scp_con.put(test_vars["ssh_pub_key"], "~/.ssh/.")

        nodes = run_averecmd(**averecmd_params, method="node.list")
        log.debug("Nodes found: {}".format(nodes))
        last_error = None
        for node in nodes:
            node_dir = artifacts_dir + "/" + node
            node_dir_log = node_dir + "/log"
            node_dir_trace = node_dir + "/trace"
            log.debug("node_dir_log = {}, node_dir_trace = {}".format(
                node_dir_log, node_dir_trace))

            # make local directories to store downloaded artifacts
            os.makedirs(node_dir_trace, exist_ok=True)
            os.makedirs(node_dir_log, exist_ok=True)

            # get this node's primary cluster IP address
            node_ip = run_averecmd(**averecmd_params,
                                   method="node.get",
                                   args=node)[node]["primaryClusterIP"]["IP"]

            log.debug("Tunneling to node {} using IP {}".format(node, node_ip))

            # get_unused_local_port actually uses the port to know it's
            # available before making it available again and returning the
            # port number. Rarely, there is a race where the open() call
            # below fails because the port is not yet fully available
            # again. In those cases, try getting a new port.
            for port_attempt in range(1, 11):
                tunnel_local_port = get_unused_local_port()
                with Connection(test_vars["public_ip"],
                                user=test_vars["controller_user"],
                                connect_kwargs={
                                    "key_filename": test_vars["ssh_priv_key"],
                                }).forward_local(local_port=tunnel_local_port,
                                                 remote_port=22,
                                                 remote_host=node_ip):
                    node_c = Connection("127.0.0.1",
                                        user="******",
                                        port=tunnel_local_port,
                                        connect_kwargs={
                                            "password":
                                            os.environ["AVERE_ADMIN_PW"]
                                        })
                    try:
                        node_c.open()

                        # If port_attempt > 1, last_error had the exception
                        # from the last iteration. Clear it.
                        last_error = None
                    except NoValidConnectionsError as ex:
                        last_error = ex
                        exp_err = "Unable to connect to port {} on 127.0.0.1".format(
                            tunnel_local_port)
                        if exp_err not in str(ex):
                            raise
                        else:
                            log.warning("{0} (attempt #{1}, retrying)".format(
                                exp_err, str(port_attempt)))
                            continue  # iterate

                    scp_client = SCPClient(node_c.transport)
                    try:
                        # Calls below catch exceptions and report them to the
                        # error log, but then continue. This is because a
                        # failure to collect artifacts on one node should not
                        # prevent collection from other nodes. After collection
                        # has completed, the last exception will be raised.

                        # list of files and directories to download
                        to_collect = [
                            "/var/log/messages",
                            "/var/log/xmlrpc.log",

                            # assumes rolling trace was enabled during deploy
                            "/support/trace/rolling",

                            # TODO: 2019-0219: turned off for now
                            # "/support/gsi",
                            # "/support/cores",
                        ]
                        for tc in to_collect:
                            log.debug("SCP'ing {} from node {} to {}".format(
                                tc, node, node_dir_log))
                            try:
                                scp_client.get(tc,
                                               node_dir_log,
                                               recursive=True)
                            except Exception as ex:
                                log.error("({}) Exception caught: {}".format(
                                    node, ex))
                                last_error = ex
                    finally:
                        scp_client.close()
                log.debug("Connections to node {} closed".format(node))
                break  # no need to iterate again

        if last_error:
            log.error("See previous error(s) above. Raising last exception.")
            raise last_error
Example #4
0
    def test_update_reg_clients_hosts(self, test_vars):
        """
        Updates /etc/hosts on the STAF clients so they can contact the STAF
        server.
        """
        log = logging.getLogger("test_update_reg_clients_hosts")
        atd = test_vars["atd_obj"]
        commands = """
            cp /etc/hosts .
            echo ' '                >> hosts
            echo '# STAF server IP' >> hosts
            echo '{0} staf'         >> hosts
            sudo mv hosts /etc/hosts
            echo '#!/bin/bash' > ~/hostdb_entries.sh
            chmod 755 ~/hostdb_entries.sh
            echo "cd ~/Avere-sv" >> ~/hostdb_entries.sh
            echo "source /usr/sv/env/bin/activate" >> ~/hostdb_entries.sh
            echo "export PYTHONPATH=~/Avere-sv:~/Avere-sv/averesv:$PYTHONPATH:$PATH" >> ~/hostdb_entries.sh
            echo "averesv/hostdb.py -a vfxt -m {1} -p '{2}'" >> ~/hostdb_entries.sh
        """.format(test_vars["staf_server_priv_ip"],
                   test_vars["cluster_mgmt_ip"],
                   os.environ["AVERE_ADMIN_PW"]).split("\n")

        # Add hostdb entry calls for each regression client.
        for i, staf_client_ip in enumerate(test_vars["staf_client_priv_ips"]):
            commands.append(
                "echo 'averesv/hostdb.py -L regclient{0} -m {1}' >> ~/hostdb_entries.sh"
                .format(i, staf_client_ip))

        # Get the storage account's access key and add that hostdb entry, too.
        sa_key = atd.st_client.storage_accounts.list_keys(
            atd.resource_group, test_vars["storage_account"]).keys[0].value
        commands.append(
            "echo 'averesv/hostdb.py -s {0}.blob.core.windows.net -m {0}.blob.core.windows.net -M az --cloudCreds \"{0}::{1}\"' >> ~/hostdb_entries.sh"
            .format(test_vars["storage_account"], sa_key))

        last_error = None
        for staf_client_ip in test_vars["staf_client_priv_ips"]:
            for port_attempt in range(1, 11):
                tunnel_local_port = get_unused_local_port()
                with Connection(test_vars["public_ip"],
                                user=test_vars["controller_user"],
                                connect_kwargs={
                                    "key_filename": test_vars["ssh_priv_key"],
                                }).forward_local(local_port=tunnel_local_port,
                                                 remote_port=22,
                                                 remote_host=staf_client_ip):
                    node_c = Connection("127.0.0.1",
                                        user=test_vars["controller_user"],
                                        port=tunnel_local_port,
                                        connect_kwargs={
                                            "key_filename":
                                            test_vars["ssh_priv_key"],
                                        })
                    try:
                        node_c.open()

                        # If port_attempt > 1, last_error had the exception
                        # from the last iteration. Clear it.
                        last_error = None
                    except NoValidConnectionsError as ex:
                        last_error = ex
                        exp_err = "Unable to connect to port {} on 127.0.0.1".format(
                            tunnel_local_port)
                        if exp_err not in str(ex):
                            raise
                        else:
                            log.warning("{0} (attempt #{1}, retrying)".format(
                                exp_err, str(port_attempt)))
                            continue  # iterate

                    run_ssh_commands(node_c.client, commands)

                    # Copy SSH keys to the client.
                    scp_cli = SCPClient(node_c.transport)
                    scp_cli.put(test_vars["ssh_priv_key"], "~/.ssh/id_rsa")
                    scp_cli.put(test_vars["ssh_pub_key"], "~/.ssh/id_rsa.pub")
                    scp_cli.close()
                log.debug("Connection to {} closed".format(staf_client_ip))
                break  # no need to iterate again

            if last_error:
                log.error(
                    "See previous error(s) above. Raising last exception.")
                raise last_error