Exemple #1
0
    def log_collection(self, master_ip: Optional[str]):
        """
        Collects all sorts of logs about the installation process

        @param master_ip The IP address of the master node. Used to SSH into the node when doing installer gather.
                         When not given, installer gather log collection is skipped.
        """
        etype, _value, _tb = sys.exc_info()

        log.info(
            f"Collecting logs after a {('failed', 'successful')[etype is None]} installation"
        )

        with SuppressAndLog(Exception):
            log.info("Gathering sosreport data from host...")
            gather_sosreport_data(output_dir=IBIP_DIR)

        if master_ip is not None:
            with SuppressAndLog(Exception):
                log.info("Gathering information via installer-gather...")
                utils.recreate_folder(INSTALLER_GATHER_DIR,
                                      force_recreate=True)
                self.installer_gather(
                    ip=master_ip,
                    ssh_key=consts.DEFAULT_SSH_PRIVATE_KEY_PATH,
                    out_dir=INSTALLER_GATHER_DIR)

        with SuppressAndLog(Exception):
            log.info("Gathering information via must-gather...")
            download_must_gather(KUBE_CONFIG, IBIP_DIR)
Exemple #2
0
def is_update_needed(output_folder: str, update_on_events_update: bool,
                     client: InventoryClient, cluster: dict):
    if not os.path.isdir(output_folder):
        return True

    if not update_on_events_update:
        return False

    destination_event_file_path = get_cluster_events_path(
        cluster, output_folder)
    with tempfile.NamedTemporaryFile() as latest_event_tp:
        with SuppressAndLog(assisted_service_client.rest.ApiException):
            client.download_cluster_events(cluster["id"], latest_event_tp.name)

        if filecmp.cmp(destination_event_file_path, latest_event_tp.name):
            latest_event_tp.close()
            log.info(f"no new events found for {destination_event_file_path}")
            need_update = False
        else:
            log.info(
                f"update needed, new events found, deleting {destination_event_file_path}"
            )
            os.remove(destination_event_file_path)
            latest_event_tp.close()
            need_update = True
    return need_update
Exemple #3
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
        ipxe_server,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self.__set_up_proxy_server(cluster, cluster_configuration,
                                       proxy_server)

        if global_variables.ipxe_boot:
            infra_env = cluster.generate_infra_env()
            ipxe_server_controller = ipxe_server(name="ipxe_controller",
                                                 api_client=cluster.api_client)
            ipxe_server_controller.start(infra_env_id=infra_env.id,
                                         cluster_name=cluster.name)

            ipxe_server_url = f"http://{consts.DEFAULT_IPXE_SERVER_IP}:{consts.DEFAULT_IPXE_SERVER_PORT}/{cluster.name}"
            network_name = cluster.nodes.get_cluster_network()
            libvirt_controller = LibvirtController(
                config=cluster.nodes.controller,
                entity_config=cluster_configuration)
            libvirt_controller.set_ipxe_url(network_name=network_name,
                                            ipxe_url=ipxe_server_url)

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #4
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
        ipxe_server: Callable,
        tang_server: Callable,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        if cluster_configuration.disk_encryption_mode == consts.DiskEncryptionMode.TANG:
            self._start_tang_server(tang_server, cluster_configuration)

        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self.__set_up_proxy_server(cluster, cluster_configuration,
                                       proxy_server)

        if global_variables.ipxe_boot:
            infra_env = cluster.generate_infra_env()
            ipxe_server_controller = ipxe_server(name="ipxe_controller",
                                                 api_client=cluster.api_client)
            ipxe_server_controller.run(infra_env_id=infra_env.id,
                                       cluster_name=cluster.name)
            cluster_configuration.iso_download_path = utils.get_iso_download_path(
                infra_env_configuration.entity_name.get())

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #5
0
def main():
    parser = ArgumentParser(description="Logs junit parser")
    parser.add_argument("--src", help="Logs dir source", type=str)
    parser.add_argument("--dst", help="Junit XML report destination", type=str)
    args = parser.parse_args()

    report_dir = Path(args.dst)
    report_dir.mkdir(exist_ok=True)

    with SuppressAndLog(BaseException):
        log.info(f"Parsing logs from `{args.src}` to `{report_dir}`")
        LogsConverter.export_service_logs_to_junit_suites(
            Path(args.src), report_dir)

    with SuppressAndLog(BaseException):
        log.info(f"Parsing service events from `{args.src}` to `{report_dir}`")
        EventsConverter.export_service_events_to_junit_suite(
            Path(args.src), report_dir)
Exemple #6
0
def download_logs_kube_api(api_client: ApiClient, cluster_name: str,
                           namespace: str, dest: str, must_gather: bool,
                           management_kubeconfig: str):

    cluster_deployment = ClusterDeployment(
        kube_api_client=api_client,
        name=cluster_name,
        namespace=namespace,
    )

    agent_cluster_install = AgentClusterInstall(
        kube_api_client=api_client,
        name=cluster_deployment.get()["spec"]["clusterInstallRef"]["name"],
        namespace=namespace,
    )

    output_folder = os.path.join(dest, f"{cluster_name}")
    recreate_folder(output_folder)

    try:
        with SuppressAndLog(requests.exceptions.RequestException,
                            ConnectionError):
            collect_debug_info_from_cluster(cluster_deployment,
                                            agent_cluster_install,
                                            output_folder)

        if must_gather:
            recreate_folder(os.path.join(output_folder, "must-gather"))
            with SuppressAndLog(Exception):
                # in case of hypershift
                if namespace.startswith("clusters"):
                    log.info("Dumping hypershift files")
                    hypershift = HyperShift(name=cluster_name)
                    hypershift.dump(os.path.join(output_folder, "dump"),
                                    management_kubeconfig)
                else:
                    _must_gather_kube_api(cluster_name, cluster_deployment,
                                          agent_cluster_install, output_folder)

    finally:
        run_command(f"chmod -R ugo+rx '{output_folder}'")
Exemple #7
0
    def cluster(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        infra_env_configuration: InfraEnvConfig,
        proxy_server,
        prepare_nodes_network: Nodes,
        cluster_configuration: ClusterConfig,
    ):
        log.debug(
            f"--- SETUP --- Creating cluster for test: {request.node.name}\n")
        cluster = Cluster(
            api_client=api_client,
            config=cluster_configuration,
            infra_env_config=infra_env_configuration,
            nodes=prepare_nodes_network,
        )

        if self._does_need_proxy_server(prepare_nodes_network):
            self._set_up_proxy_server(cluster, cluster_configuration,
                                      proxy_server)

        yield cluster

        if self._is_test_failed(request):
            log.info(
                f"--- TEARDOWN --- Collecting Logs for test: {request.node.name}\n"
            )
            self.collect_test_logs(cluster, api_client, request, cluster.nodes)

            if global_variables.test_teardown:
                if cluster.is_installing() or cluster.is_finalizing():
                    cluster.cancel_install()

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                cluster.deregister_infraenv()

            with suppress(ApiException):
                log.info(
                    f"--- TEARDOWN --- deleting created cluster {cluster.id}\n"
                )
                cluster.delete()
Exemple #8
0
    def infra_env(
        self,
        api_client: InventoryClient,
        request: FixtureRequest,
        proxy_server,
        prepare_infraenv_nodes_network: Nodes,
        infra_env_configuration: InfraEnvConfig,
    ):
        log.debug(
            f"--- SETUP --- Creating InfraEnv for test: {request.node.name}\n")
        infra_env = InfraEnv(api_client=api_client,
                             config=infra_env_configuration,
                             nodes=prepare_infraenv_nodes_network)

        yield infra_env
        log.info("--- TEARDOWN --- Infra env\n")

        if global_variables.test_teardown:
            with SuppressAndLog(ApiException):
                infra_env.deregister()
Exemple #9
0
 def delete_dnsmasq_conf_file(self, cluster_name):
     with SuppressAndLog(FileNotFoundError):
         fname = f"/etc/NetworkManager/dnsmasq.d/openshift-{cluster_name}.conf"
         log.info(f"--- TEARDOWN --- deleting dnsmasq file: {fname}\n")
         os.remove(fname)
Exemple #10
0
def download_logs(
    client: InventoryClient,
    cluster: dict,
    dest: str,
    must_gather: bool,
    update_by_events: bool = False,
    retry_interval: int = RETRY_INTERVAL,
):
    if "hosts" not in cluster or len(cluster["hosts"]) == 0:
        cluster["hosts"] = client.get_cluster_hosts(cluster_id=cluster["id"])

    output_folder = get_logs_output_folder(dest, cluster)
    if not is_update_needed(output_folder, update_by_events, client, cluster):
        log.info(f"Skipping, no need to update {output_folder}.")
        return

    recreate_folder(output_folder)
    recreate_folder(os.path.join(output_folder, "cluster_files"))

    try:
        write_metadata_file(client, cluster,
                            os.path.join(output_folder, "metadata.json"))

        with SuppressAndLog(requests.exceptions.RequestException,
                            ConnectionError, KeyboardInterrupt):
            client.download_metrics(os.path.join(output_folder, "metrics.txt"))

        for cluster_file in (
                "bootstrap.ign",
                "master.ign",
                "worker.ign",
                "install-config.yaml",
        ):
            with SuppressAndLog(assisted_service_client.rest.ApiException,
                                KeyboardInterrupt):
                client.download_and_save_file(
                    cluster["id"], cluster_file,
                    os.path.join(output_folder, "cluster_files", cluster_file))

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            download_manifests(client, cluster["id"], output_folder)

        infra_env_list = set()
        for host_id, infra_env_id in map(
                lambda host: (host["id"], host["infra_env_id"]),
                cluster["hosts"]):
            with SuppressAndLog(assisted_service_client.rest.ApiException,
                                KeyboardInterrupt):
                client.download_host_ignition(
                    infra_env_id, host_id,
                    os.path.join(output_folder, "cluster_files"))
            if infra_env_id not in infra_env_list:
                infra_env_list.add(infra_env_id)
                with SuppressAndLog(assisted_service_client.rest.ApiException,
                                    KeyboardInterrupt):
                    client.download_infraenv_events(
                        infra_env_id,
                        get_infraenv_events_path(infra_env_id, output_folder))

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            client.download_cluster_events(
                cluster["id"], get_cluster_events_path(cluster, output_folder))
            shutil.copy2(
                os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "events.html"), output_folder)

        with SuppressAndLog(assisted_service_client.rest.ApiException,
                            KeyboardInterrupt):
            are_masters_in_configuring_state = are_host_progress_in_stage(
                cluster["hosts"], [HostsProgressStages.CONFIGURING], 2)
            are_masters_in_join_or_done_state = are_host_progress_in_stage(
                cluster["hosts"],
                [HostsProgressStages.JOINED, HostsProgressStages.DONE], 2)
            max_retries = MUST_GATHER_MAX_RETRIES if are_masters_in_join_or_done_state else MAX_RETRIES
            is_controller_expected = cluster[
                "status"] == ClusterStatus.INSTALLED or are_masters_in_configuring_state
            min_number_of_logs = min_number_of_log_files(
                cluster, is_controller_expected)

            for i in range(max_retries):
                cluster_logs_tar = os.path.join(
                    output_folder, f"cluster_{cluster['id']}_logs.tar")

                with suppress(FileNotFoundError):
                    os.remove(cluster_logs_tar)

                client.download_cluster_logs(cluster["id"], cluster_logs_tar)
                try:
                    verify_logs_uploaded(
                        cluster_logs_tar,
                        min_number_of_logs,
                        installation_success=(
                            cluster["status"] == ClusterStatus.INSTALLED),
                        check_oc=are_masters_in_join_or_done_state,
                    )
                    break
                except AssertionError as ex:
                    log.warning("Cluster logs verification failed: %s", ex)

                    # Skip sleeping on last retry
                    if i < MAX_RETRIES - 1:
                        log.info(f"Going to retry in {retry_interval} seconds")
                        time.sleep(retry_interval)

        kubeconfig_path = os.path.join(output_folder, "kubeconfig-noingress")

        with SuppressAndLog(assisted_service_client.rest.ApiException):
            client.download_kubeconfig_no_ingress(cluster["id"],
                                                  kubeconfig_path)

            if must_gather:
                config_etc_hosts(
                    cluster["name"],
                    cluster["base_dns_domain"],
                    client.get_api_vip(cluster, cluster["id"]),
                )
                download_must_gather(kubeconfig_path, output_folder)

    finally:
        run_command(f"chmod -R ugo+rx '{output_folder}'")