Example #1
0
def new_tenant_client(test_env,
                      name: str,
                      tenant: str,
                      docker: bool = False,
                      network: str = "mender") -> MenderDevice:
    """Create new Mender client in the test environment with the given name for the given tenant.

    The passed test_env must implement new_tenant_client and/or new_tenant_docker_client.

    This helper attaches the recently created Mender client to the test environment, so that systemd
    logs can be printed on test failures.
    """

    pre_existing_clients = set(test_env.get_mender_clients(network=network))
    if docker:
        test_env.new_tenant_docker_client(name, tenant)
    else:
        test_env.new_tenant_client(name, tenant)
    all_clients = set(test_env.get_mender_clients(network=network))
    new_client = all_clients - pre_existing_clients
    assert len(new_client) == 1
    device = MenderDevice(new_client.pop())
    if hasattr(test_env, "device_group"):
        test_env.device_group.append(device)
    else:
        test_env.device = device
        test_env.device_group = MenderDeviceGroup(
            test_env.get_mender_clients(network=network))
    return device
Example #2
0
    def do_test_deployment_gateway_and_one_device(
        self,
        env,
        valid_image_with_mender_conf,
        image_with_mender_conf_and_mender_gateway_conf,
    ):
        mender_device = env.device
        mender_gateway = env.device_gateway
        devauth = DeviceAuthV2(env.auth)
        deploy = Deployments(env.auth, devauth)

        mender_device_mender_conf = mender_device.run(
            "cat /etc/mender/mender.conf")
        mender_gateway_gateway_conf = mender_gateway.run(
            "cat /etc/mender/mender-gateway.conf")
        mender_gateway_mender_conf = mender_gateway.run(
            "cat /etc/mender/mender.conf")

        host_ip = env.get_virtual_network_host_ip()

        ip_to_device_id = Helpers.ip_to_device_id_map(
            MenderDeviceGroup(
                [mender_device.host_string, mender_gateway.host_string]),
            devauth=devauth,
        )

        mender_gateway_image = image_with_mender_conf_and_mender_gateway_conf(
            "mender-gateway-image-full-cmdline-%s.ext4" %
            conftest.machine_name,
            mender_gateway_mender_conf,
            mender_gateway_gateway_conf,
        )

        def update_device():
            device_id = ip_to_device_id[mender_device.host_string]
            update_image(
                mender_device,
                host_ip,
                expected_mender_clients=1,
                install_image=valid_image_with_mender_conf(
                    mender_device_mender_conf),
                devauth=devauth,
                deploy=deploy,
                devices=[device_id],
            )

        gateway_id = ip_to_device_id[mender_gateway.host_string]
        deployment_id, _ = common_update_procedure(
            mender_gateway_image,
            devices=[gateway_id],
            devauth=devauth,
            deploy=deploy,
            deployment_triggered_callback=update_device,
            verify_status=False,
        )

        deploy.check_expected_statistics(deployment_id, "success", 1)
        deploy.check_expected_status("finished", deployment_id)
Example #3
0
def standard_setup_two_clients_bootstrapped(request):
    env = container_factory.getStandardSetup(num_clients=2)
    request.addfinalizer(env.teardown)

    env.setup()

    env.device_group = MenderDeviceGroup(env.get_mender_clients())
    env.device_group.ssh_is_opened()

    reset_mender_api(env)
    auth_v2.accept_devices(2)

    return env
Example #4
0
def standard_setup_two_clients_bootstrapped_with_gateway(request):
    env = container_factory.get_standard_setup_with_gateway(num_clients=2)
    request.addfinalizer(env.teardown)

    env.setup()

    env.device_group = MenderDeviceGroup(
        env.get_mender_clients(network="mender_local"))
    env.device_group.ssh_is_opened()

    env.device_gateway = MenderDevice(
        env.get_mender_gateways(network="mender")[0])
    env.device_gateway.ssh_is_opened()

    reset_mender_api(env)
    # Three devices: two client devices and the gateway device (which also runs mender client)
    devauth.accept_devices(3)

    env.auth = auth
    return env
Example #5
0
    def do_test_deployment_one_device(self, env, valid_image_with_mender_conf):
        mender_device = env.device
        devauth = DeviceAuthV2(env.auth)
        deploy = Deployments(env.auth, devauth)

        host_ip = env.get_virtual_network_host_ip()
        mender_conf = mender_device.run("cat /etc/mender/mender.conf")

        device_id = Helpers.ip_to_device_id_map(
            MenderDeviceGroup([mender_device.host_string]), devauth=devauth,
        )[mender_device.host_string]

        update_image(
            mender_device,
            host_ip,
            expected_mender_clients=1,
            install_image=valid_image_with_mender_conf(mender_conf),
            devauth=devauth,
            deploy=deploy,
            devices=[device_id],
        )
    def test_state_scripts(
        self,
        class_persistent_setup_client_state_scripts_update_module,
        description,
        test_set,
    ):
        """Test that state scripts are executed in right order, and that errors
        are treated like they should."""

        mender_device = class_persistent_setup_client_state_scripts_update_module.device
        work_dir = "test_state_scripts.%s" % mender_device.host_string
        deployment_id = None
        client_service_name = mender_device.get_client_service_name()
        try:
            script_content = '#!/bin/sh\n\necho "`date --rfc-3339=seconds` $(basename $0)" >> /data/test_state_scripts.log\n'
            script_failure_content = script_content + "exit 1\n"

            # Make rootfs-scripts and put them in rootfs image.
            rootfs_script_dir = os.path.join(work_dir, "rootfs-scripts")
            shutil.rmtree(work_dir, ignore_errors=True)
            os.mkdir(work_dir)
            os.mkdir(rootfs_script_dir)

            for script in self.scripts:
                if script.startswith("Artifact"):
                    # This is a script for the artifact, skip this one.
                    continue
                with open(os.path.join(rootfs_script_dir, script), "w") as fd:
                    if script in test_set["FailureScript"]:
                        fd.write(script_failure_content)
                    else:
                        fd.write(script_content)
                    os.fchmod(fd.fileno(), 0o0755)

            # Write this again in case it was corrupted above.
            with open(os.path.join(rootfs_script_dir, "version"), "w") as fd:
                fd.write("2")

            # Then zip and copy them to QEMU host.
            subprocess.check_call(
                ["tar", "czf", "../rootfs-scripts.tar.gz", "."],
                cwd=rootfs_script_dir)
            # Stop client first to avoid race conditions.
            mender_device.run("systemctl stop %s" % client_service_name)
            try:
                mender_device.put(os.path.join(work_dir,
                                               "rootfs-scripts.tar.gz"),
                                  remote_path="/")
                mender_device.run("mkdir -p cd /etc/mender/scripts && " +
                                  "cd /etc/mender/scripts && " +
                                  "tar xzf /rootfs-scripts.tar.gz && " +
                                  "rm -f /rootfs-scripts.tar.gz")
            finally:
                mender_device.run("systemctl start %s" % client_service_name)

            # Put artifact-scripts in the artifact.
            artifact_script_dir = os.path.join(work_dir, "artifact-scripts")
            os.mkdir(artifact_script_dir)
            for script in self.scripts:
                if not script.startswith("Artifact"):
                    # Not an artifact script, skip this one.
                    continue
                with open(os.path.join(artifact_script_dir, script),
                          "w") as fd:
                    if script in test_set["FailureScript"]:
                        fd.write(script_failure_content)
                    else:
                        fd.write(script_content)
                    if test_set.get("CorruptDataScriptVersionIn") == script:
                        fd.write(
                            "printf '1000' > /data/mender/scripts/version\n")
                    if test_set.get("CorruptEtcScriptVersionIn") == script:
                        fd.write(
                            "printf '1000' > /etc/mender/scripts/version\n")
                    if test_set.get("RestoreEtcScriptVersionIn") == script:
                        fd.write("printf '2' > /etc/mender/scripts/version\n")

            # Callback for our custom artifact maker
            def make_artifact(filename, artifact_name):
                return image.make_module_artifact(
                    "module-state-scripts-test",
                    conftest.machine_name,
                    artifact_name,
                    filename,
                    scripts=[artifact_script_dir],
                )

            # Now create the artifact, and make the deployment.
            device_id = Helpers.ip_to_device_id_map(
                MenderDeviceGroup([mender_device.host_string
                                   ]))[mender_device.host_string]
            deployment_id = common_update_procedure(
                verify_status=False,
                devices=[device_id],
                scripts=[artifact_script_dir],
                make_artifact=make_artifact,
            )[0]
            if test_set["ExpectedStatus"] is None:
                # In this case we don't expect the deployment to even be
                # attempted, presumably due to failing Idle/Sync/Download
                # scripts on the client. So no deployment checking. Just wait
                # until there is at least one Error script in the log, which
                # will always be the case if ExpectedStatus is none (since one
                # of them is preventing the update from being attempted).
                def fetch_info(cmd_list):
                    all_output = ""
                    for cmd in cmd_list:
                        output = mender_device.run(cmd, warn_only=True)
                        logger.error("%s:\n%s" % (cmd, output))
                        all_output += "%s\n" % output
                    return all_output

                info_query = [
                    "cat /data/test_state_scripts.log 1>&2",
                    "journalctl -u %s" % client_service_name,
                    "top -n5 -b",
                    "ls -l /proc/`pgrep mender`/fd",
                    "for fd in /proc/`pgrep mender`/fdinfo/*; do echo $fd:; cat $fd; done",
                ]
                starttime = time.time()
                while starttime + 60 * 60 >= time.time():
                    output = mender_device.run(
                        "grep Error /data/test_state_scripts.log",
                        warn_only=True)
                    if output.rstrip() != "":
                        # If it succeeds, stop.
                        break
                    else:
                        fetch_info(info_query)
                        time.sleep(10)
                        continue
                else:
                    info = fetch_info(info_query)
                    pytest.fail(
                        'Waited too long for "Error" to appear in log:\n%s' %
                        info)
            else:
                deploy.check_expected_statistics(deployment_id,
                                                 test_set["ExpectedStatus"], 1)

            # Always give the client a little bit of time to settle in the base
            # state after an update.
            time.sleep(10)

            output = mender_device.run("cat /data/test_state_scripts.log")
            self.verify_script_log_correct(test_set, output.split("\n"))

        except:
            output = mender_device.run("cat /data/mender/deployment*.log",
                                       warn_only=True)
            logger.info(output)
            raise

        finally:
            shutil.rmtree(work_dir, ignore_errors=True)
            if deployment_id:
                try:
                    deploy.abort(deployment_id)
                except:
                    pass
            mender_device.run(
                ("systemctl stop %s && " +
                 "rm -f /data/test_state_scripts.log && " +
                 "rm -rf /etc/mender/scripts && " +
                 "rm -rf /data/mender/scripts && " + "systemctl start %s") %
                (client_service_name, client_service_name))
    def test_reboot_recovery(
        self,
        class_persistent_setup_client_state_scripts_update_module,
        description,
        test_set,
    ):

        mender_device = class_persistent_setup_client_state_scripts_update_module.device
        work_dir = "test_state_scripts.%s" % mender_device.host_string

        script_content = (
            '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n'
        )

        script_failure_content = (script_content +
                                  "sync\necho b > /proc/sysrq-trigger\n"
                                  )  # flush to disk before killing

        # This is only needed in the case: die commit-leave,
        # otherwise the device will get stuck in a boot-reboot loop
        script_reboot_once = """#!/bin/sh
        if [ $(grep -c $(basename $0) /data/test_state_scripts.log) -eq 0 ]; then
            echo "$(basename $0)" >> /data/test_state_scripts.log && sync && echo b > /proc/sysrq-trigger
        fi
        echo "$(basename $0)" >> /data/test_state_scripts.log
        exit 0"""

        # Put artifact-scripts in the artifact.
        artifact_script_dir = os.path.join(work_dir, "artifact-scripts")

        if os.path.exists(work_dir):
            shutil.rmtree(work_dir, ignore_errors=True)

        os.mkdir(work_dir)
        os.mkdir(artifact_script_dir)

        for script in test_set.get("ScriptOrder"):
            if not script.startswith("Artifact"):
                # Not an artifact script, skip this one.
                continue
            with open(os.path.join(artifact_script_dir, script), "w") as fd:
                if script in test_set.get("RebootScripts", []):
                    fd.write(script_failure_content)
                if script in test_set.get("RebootOnceScripts", []):
                    fd.write(script_reboot_once)
                else:
                    fd.write(script_content)

        # Now create the artifact, and make the deployment.
        device_id = Helpers.ip_to_device_id_map(
            MenderDeviceGroup([mender_device.host_string
                               ]))[mender_device.host_string]

        host_ip = (class_persistent_setup_client_state_scripts_update_module.
                   get_virtual_network_host_ip())

        def make_artifact(filename, artifact_name):
            return image.make_module_artifact(
                "module-state-scripts-test",
                conftest.machine_name,
                artifact_name,
                filename,
                scripts=[artifact_script_dir],
            )

        with mender_device.get_reboot_detector(host_ip) as reboot_detector:

            common_update_procedure(
                verify_status=True,
                devices=[device_id],
                scripts=[artifact_script_dir],
                make_artifact=make_artifact,
            )

            try:
                reboot_detector.verify_reboot_performed()

                # wait until the last script has been run
                logger.debug("Wait until the last script has been run")
                script_logs = ""
                timeout = time.time() + 60 * 60
                while timeout >= time.time():
                    time.sleep(3)
                    try:
                        script_logs = mender_device.run(
                            "cat /data/test_state_scripts.log")
                        if test_set.get(
                                "ExpectedScriptFlow")[-1] in script_logs:
                            break
                    except EOFError:
                        # In some cases the SSH connection raises here EOF due to the
                        # client simulating powerloss. The test will just retry
                        pass
                else:
                    pytest.fail(
                        "Timeout waiting for ExpectedScriptFlow in state scripts. Expected %s, got %s"
                        % (
                            test_set.get("ExpectedScriptFlow"),
                            ", ".join(script_logs.rstrip().split("\n")),
                        ))

                assert script_logs.split() == test_set.get(
                    "ExpectedScriptFlow")

            except:
                output = mender_device.run("cat /data/mender/deployment*.log",
                                           warn_only=True)
                logger.info(output)
                raise

            finally:
                client_service_name = mender_device.get_client_service_name()
                mender_device.run(
                    ("systemctl stop %s && " +
                     "rm -f /data/test_state_scripts.log && " +
                     "rm -rf /etc/mender/scripts && " +
                     "rm -rf /data/mender/scripts && " + "systemctl start %s")
                    % (client_service_name, client_service_name))
    def test_reboot_recovery(self, standard_setup_one_client_bootstrapped,
                             description, test_set, valid_image):

        mender_device = standard_setup_one_client_bootstrapped.device
        work_dir = "test_state_scripts.%s" % mender_device.host_string

        script_content = (
            '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n'
        )

        script_failure_content = (script_content +
                                  "sync\necho b > /proc/sysrq-trigger\n"
                                  )  # flush to disk before killing

        # This is only needed in the case: die commit-leave,
        # otherwise the device will get stuck in a boot-reboot loop
        script_reboot_once = """#!/bin/sh
        if [ $(grep -c $(basename $0) /data/test_state_scripts.log) -eq 0 ]; then
            echo "$(basename $0)" >> /data/test_state_scripts.log && sync && echo b > /proc/sysrq-trigger
        fi
        echo "$(basename $0)" >> /data/test_state_scripts.log
        exit 0"""

        # Put artifact-scripts in the artifact.
        artifact_script_dir = os.path.join(work_dir, "artifact-scripts")

        if os.path.exists(work_dir):
            shutil.rmtree(work_dir, ignore_errors=True)

        os.mkdir(work_dir)
        os.mkdir(artifact_script_dir)

        new_rootfs = os.path.join(work_dir, "rootfs.ext4")
        shutil.copy(valid_image, new_rootfs)

        ps = subprocess.Popen(["debugfs", "-w", new_rootfs],
                              stdin=subprocess.PIPE)
        ps.stdin.write(b"cd /etc/mender\n" b"mkdir scripts\n" b"cd scripts\n")
        ps.stdin.close()
        ps.wait()

        for script in test_set.get("ScriptOrder"):
            if not script.startswith("Artifact"):
                # Not an artifact script, skip this one.
                continue
            with open(os.path.join(artifact_script_dir, script), "w") as fd:
                if script in test_set.get("RebootScripts", []):
                    fd.write(script_failure_content)
                if script in test_set.get("RebootOnceScripts", []):
                    fd.write(script_reboot_once)
                else:
                    fd.write(script_content)

        # Now create the artifact, and make the deployment.
        device_id = Helpers.ip_to_device_id_map(
            MenderDeviceGroup([mender_device.host_string
                               ]))[mender_device.host_string]

        host_ip = standard_setup_one_client_bootstrapped.get_virtual_network_host_ip(
        )
        with mender_device.get_reboot_detector(host_ip) as reboot_detector:

            common_update_procedure(
                install_image=new_rootfs,
                verify_status=True,
                devices=[device_id],
                scripts=[artifact_script_dir],
            )[0]

            try:

                orig_part = mender_device.get_active_partition()

                # handle case where the client has not finished the update
                # path on the committed partition, but new partition is installed,
                # thus we will not get a valid entrypoint into the uncommitted parition(reboot_leave)
                # and the client will thus reboot straight after starting, and u-boot will
                # fall back to the committed partition
                if test_set.get("DoubleReboot", False):
                    reboot_detector.verify_reboot_performed(
                        number_of_reboots=2)
                else:
                    reboot_detector.verify_reboot_performed()

                # wait until the last script has been run
                logger.debug("Wait until the last script has been run")
                script_logs = ""
                timeout = time.time() + 60 * 60
                while timeout >= time.time():
                    time.sleep(3)
                    try:
                        script_logs = mender_device.run(
                            "cat /data/test_state_scripts.log")
                        if test_set.get(
                                "ExpectedScriptFlow")[-1] in script_logs:
                            break
                    except EOFError:
                        # In some cases the SSH connection raises here EOF due to the
                        # client simulating powerloss. The test will just retry
                        pass
                else:
                    pytest.fail(
                        "Timeout waiting for ExpectedScriptFlow in state scripts. Expected %s, got %s"
                        % (
                            test_set.get("ExpectedScriptFlow"),
                            ", ".join(script_logs.rstrip().split("\n")),
                        ))

                # make sure the client ended up on the right partition
                if "OtherPartition" in test_set.get("ExpectedFinalPartition",
                                                    []):
                    assert orig_part != mender_device.get_active_partition()
                else:
                    assert orig_part == mender_device.get_active_partition()

                assert script_logs.split() == test_set.get(
                    "ExpectedScriptFlow")

            except:
                output = mender_device.run("cat /data/mender/deployment*.log",
                                           warn_only=True)
                logger.info(output)
                raise

            finally:
                client_service_name = mender_device.get_client_service_name()
                mender_device.run(
                    ("systemctl stop %s && " +
                     "rm -f /data/test_state_scripts.log && " +
                     "rm -rf /etc/mender/scripts && " +
                     "rm -rf /data/mender/scripts && " + "systemctl start %s")
                    % (client_service_name, client_service_name))