コード例 #1
0
def wait_till_specific_host_is_in_stage(
    client,
    cluster_id: str,
    host_name: str,
    stages: List[str],
    nodes_count: int = 1,
    timeout: int = consts.CLUSTER_INSTALLATION_TIMEOUT / 2,
    interval: int = 5,
):
    log.info(f"Wait till {host_name} host is in stage {stages}")
    try:
        waiting.wait(
            lambda: utils.are_host_progress_in_stage(
                [client.get_host_by_name(cluster_id, host_name)],
                stages,
                nodes_count,
            ),
            timeout_seconds=timeout,
            sleep_seconds=interval,
            waiting_for=f"Node to be in of the stage {stages}",
        )
    except BaseException:
        hosts = [client.get_host_by_name(cluster_id, host_name)]
        log.error(
            f"All nodes stages: "
            f"{[host['progress']['current_stage'] for host in hosts]} "
            f"when waited for {stages}"
        )
        raise
コード例 #2
0
ファイル: utils.py プロジェクト: mhrivnak/assisted-test-infra
def wait_till_cluster_is_in_status(
    client,
    cluster_id,
    statuses: List[str],
    timeout=consts.NODES_REGISTERED_TIMEOUT,
    interval=30,
    break_statuses: List[str] = None,
):
    log.info("Wait till cluster %s is in status %s", cluster_id, statuses)
    try:
        if break_statuses:
            statuses += break_statuses
        waiting.wait(
            lambda: is_cluster_in_status(client=client, cluster_id=cluster_id, statuses=statuses),
            timeout_seconds=timeout,
            sleep_seconds=interval,
            waiting_for=f"Cluster to be in status {statuses}",
        )
        if break_statuses and is_cluster_in_status(client, cluster_id, break_statuses):
            raise BaseException(
                f"Stop installation process, " f"cluster is in status {client.cluster_get(cluster_id).status}"
            )
    except BaseException:
        log.error("Cluster status is: %s", client.cluster_get(cluster_id).status)
        raise
コード例 #3
0
def wait_till_at_least_one_host_is_in_stage(
    client,
    cluster_id,
    stages,
    nodes_count=1,
    timeout=consts.CLUSTER_INSTALLATION_TIMEOUT / 2,
    interval=consts.DEFAULT_CHECK_STATUSES_INTERVAL,
):
    log.info(f"Wait till {nodes_count} node is in stage {stages}")
    try:
        waiting.wait(
            lambda: utils.are_host_progress_in_stage(
                client.get_cluster_hosts(cluster_id),
                stages,
                nodes_count,
            ),
            timeout_seconds=timeout,
            sleep_seconds=interval,
            waiting_for=f"Node to be in of the stage {stages}",
        )
    except BaseException:
        hosts = client.get_cluster_hosts(cluster_id)
        log.error(
            f"All nodes stages: "
            f"{[host['progress']['current_stage'] for host in hosts]} "
            f"when waited for {stages}"
        )
        raise
コード例 #4
0
def _are_hosts_in_status(hosts,
                         nodes_count,
                         statuses,
                         status_info="",
                         fall_on_error_status=True):
    hosts_in_status = [
        host for host in hosts
        if (host["status"] in statuses
            and host["status_info"].startswith(status_info))
    ]
    if len(hosts_in_status) >= nodes_count:
        return True
    elif fall_on_error_status and len(
        [host
         for host in hosts if host["status"] == consts.NodesStatus.ERROR]) > 0:
        hosts_in_error = [(i, host["id"], host["requested_hostname"],
                           host["role"], host["status"], host["status_info"])
                          for i, host in enumerate(hosts, start=1)
                          if host["status"] == consts.NodesStatus.ERROR]
        log.error(
            "Some of the hosts are in insufficient or error status. Hosts in error %s",
            hosts_in_error)
        raise InstallationFailedError()

    log.info(
        "Asked hosts to be in one of the statuses from %s and currently hosts statuses are %s",
        statuses,
        host_statuses(hosts),
    )
    return False
コード例 #5
0
def validate_dns(client, cluster_id):
    if not args.managed_dns_domains:
        # 'set_dns' (using dnsmasq) is invoked after nodes_flow
        return

    cluster = client.cluster_get(cluster_id)
    api_address = "api.{}.{}".format(cluster.name, cluster.base_dns_domain)
    ingress_address = "ingress.apps.{}.{}".format(cluster.name, cluster.base_dns_domain)
    log.info(
        "Validating resolvability of the following domains: %s -> %s, %s -> %s",
        api_address,
        cluster.api_vip,
        ingress_address,
        cluster.ingress_vip,
    )
    try:
        api_answers = dns.resolver.resolve(api_address, "A")
        ingress_answers = dns.resolver.resolve(ingress_address, "A")
        api_vip = str(api_answers[0])
        ingress_vip = str(ingress_answers[0])

        if api_vip != cluster.api_vip or ingress_vip != cluster.ingress_vip:
            raise Exception("DNS domains are not resolvable")

        log.info("DNS domains are resolvable")
    except Exception as e:
        log.error("Failed to resolve DNS domains")
        raise e
コード例 #6
0
def are_libvirt_nodes_in_cluster_hosts(client, cluster_id, num_nodes):
    try:
        hosts_macs = client.get_hosts_id_with_macs(cluster_id)
    except BaseException as e:
        log.error("Failed to get nodes macs for cluster: %s", cluster_id)
        return False
    num_macs = len([mac for mac in hosts_macs if mac != ""])
    return num_macs >= num_nodes
コード例 #7
0
 def apply(self, refresh: bool = True) -> None:
     return_value, output, err = self.tf.apply(no_color=IsFlagged,
                                               refresh=refresh,
                                               input=False,
                                               skip_plan=True)
     if return_value != 0:
         message = f"Terraform apply failed with return value {return_value}, output {output} , error {err}"
         log.error(message)
         raise Exception(message)
コード例 #8
0
 def are_libvirt_nodes_in_cluster_hosts(self) -> bool:
     try:
         hosts_macs = self.api_client.get_hosts_id_with_macs(
             self.config.cluster_id)
     except BaseException:
         log.error("Failed to get nodes macs for cluster: %s",
                   self.config.cluster_id)
         return False
     num_macs = len([mac for mac in hosts_macs if mac != ""])
     return num_macs >= self.config.day2_workers_count
コード例 #9
0
ファイル: utils.py プロジェクト: mhrivnak/assisted-test-infra
def get_assisted_controller_status(kubeconfig):
    log.info("Getting controller status")
    command = (
        f"oc --insecure-skip-tls-verify --kubeconfig={kubeconfig} --no-headers=true -n assisted-installer "
        f"get pods -l job-name=assisted-installer-controller"
    )
    response = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    if response.returncode != 0:
        log.error(f"failed to get controller status: {response.stderr}")
        return b""

    log.info(f"{response.stdout}")
    return response.stdout
コード例 #10
0
    def render_worker_live_iso_ignition(install_device: str):
        """
        The worker live iso ignition file is embedded in the live ISO for the worker
        and is responsible for:
            - Copying the worker.ign file into the live filesystem
            - Creating a one-shot systemd unit service which runs coreos-installer with the worker.ign
            - Rebooting the node once the operating system has been written to disk

        The worker then starts with the installed RHCOS+worker.ign and attempts to join the cluster

        The reason we don't simply boot the live ISO with the worker.ign as
        ignition is because an RHCOS OS with worker.ign has to be written to
        disk, worker.ign is not meant to be the ignition of the live ISO
        itself. Basically, the live ISO phase is just a temporary operating
        system for the user to shell into and run coreos-installer to install
        the actual operating system used for OCP. In this test we just automate
        this manual process using our own ignition file, which will
        automatically run coreos-installer within the live operating system and
        reboot the node for us.

        @param install_device The path of the disk to install RHCOS on (e.g. /dev/vda)
        """
        with open(os.path.join(RESOURCES_DIR, WORKER_LIVE_IGNITION_TEMPLATE),
                  "r") as f:
            live_iso_ignition_template_contents = f.read()

        with open(os.path.join(RESOURCES_DIR, WORKER_INSTALL_SCRIPT),
                  "rb") as f:
            worker_install_script_contents = f.read()

        try:
            with open(os.path.join(IBIP_DIR, "worker.ign"), "rb") as f:
                worker_ignition_contents = f.read()
        except FileNotFoundError:
            log.error(
                "The worker.ign file is only generated in OCP 4.11 and above, "
                "this test is not meant to run on earlier versions")
            raise

        jinja2.filters.FILTERS["b64encode_utf8"] = lambda s: base64.b64encode(
            s).decode("utf-8")

        return jinja2.Template(live_iso_ignition_template_contents).render(
            ssh_public_key=os.environ["SSH_PUB_KEY"],
            worker_ign_contents=worker_ignition_contents,
            install_sh_contents=worker_install_script_contents,
            install_device=install_device,
        )
コード例 #11
0
def wait_till_nodes_are_ready(nodes_count, network_name):
    log.info("Wait till %s nodes will be ready and have ips", nodes_count)
    try:
        waiting.wait(
            lambda: len(get_network_leases(network_name)) >= nodes_count,
            timeout_seconds=consts.NODES_REGISTERED_TIMEOUT * nodes_count,
            sleep_seconds=10,
            waiting_for="Nodes to have ips",
        )
        log.info("All nodes have booted and got ips")
    except BaseException:
        log.error(
            "Not all nodes are ready. Current dhcp leases are %s",
            get_network_leases(network_name),
        )
        raise
コード例 #12
0
def get_libvirt_nodes_mac_role_ip_and_name(network_name):
    nodes_data = {}
    try:
        leases = get_network_leases(network_name)
        for lease in leases:
            nodes_data[lease["mac"]] = {
                "ip":
                lease["ipaddr"],
                "name":
                lease["hostname"],
                "role":
                consts.NodeRoles.WORKER if consts.NodeRoles.WORKER
                in lease["hostname"] else consts.NodeRoles.MASTER,
            }
        return nodes_data
    except BaseException:
        log.error(
            "Failed to get nodes macs from libvirt. Output is %s",
            get_network_leases(network_name),
        )
        raise
コード例 #13
0
def wait_for_logs_complete(client,
                           cluster_id,
                           timeout,
                           interval=60,
                           check_host_logs_only=False):
    log.info("wait till logs of cluster %s are collected (or timed-out)",
             cluster_id)
    statuses = ["completed", "timeout"]
    try:
        waiting.wait(
            lambda: _are_logs_in_status(client=client,
                                        cluster_id=cluster_id,
                                        statuses=statuses,
                                        check_host_logs_only=
                                        check_host_logs_only),
            timeout_seconds=timeout,
            sleep_seconds=interval,
            waiting_for=f"Logs to be in status {statuses}",
        )
        log.info("logs are in expected state")
    except BaseException:
        log.error("waiting for logs expired after %d", timeout)
        raise
コード例 #14
0
ファイル: utils.py プロジェクト: mhrivnak/assisted-test-infra
def are_hosts_in_status(hosts, nodes_count, statuses, status_info="", fall_on_error_status=True):
    hosts_in_status = [
        host for host in hosts if (host["status"] in statuses and host["status_info"].startswith(status_info))
    ]
    if len(hosts_in_status) >= nodes_count:
        return True
    elif fall_on_error_status and len([host for host in hosts if host["status"] == consts.NodesStatus.ERROR]) > 0:
        hosts_in_error = [
            (i, host["id"], host["requested_hostname"], host["role"], host["status"], host["status_info"])
            for i, host in enumerate(hosts, start=1)
            if host["status"] == consts.NodesStatus.ERROR
        ]
        log.error("Some of the hosts are in insufficient or error status. Hosts in error %s", hosts_in_error)
        raise Exception("All the nodes must be in valid status, but got some in error")

    log.info(
        "Asked hosts to be in one of the statuses from %s and currently hosts statuses are %s",
        statuses,
        [
            (i, host["id"], host.get("requested_hostname"), host.get("role"), host["status"], host["status_info"])
            for i, host in enumerate(hosts, start=1)
        ],
    )
    return False
コード例 #15
0
    def add_interface(self, node_name, network_name, target_interface):
        """
        Create an interface using given network name, return created interface's mac address.
        Note: Do not use the same network for different tests
        """
        log.info(
            f"Creating new interface attached to network: {network_name}, for node: {node_name}"
        )
        net_leases = self.list_leases(network_name)
        mac_addresses = []
        for lease in net_leases:
            mac_addresses.append(lease["mac"])
        command = f"virsh attach-interface {node_name} network {network_name} --target {target_interface} --persistent"
        utils.run_command(command)
        try:
            waiting.wait(
                lambda: len(self.list_leases(network_name)) > len(mac_addresses
                                                                  ),
                timeout_seconds=30,
                sleep_seconds=2,
                waiting_for="Wait for network lease",
            )
        except waiting.exceptions.TimeoutExpired:
            log.error("Network lease wasnt found for added interface")
            raise

        mac_address = ""
        new_net_leases = self.list_leases(network_name)
        for lease in new_net_leases:
            if not lease["mac"] in mac_addresses:
                mac_address = lease["mac"]
                break
        log.info(
            f"Successfully attached interface, network: {network_name}, mac: {mac_address}, for node:"
            f" {node_name}")
        return mac_address