Example #1
0
def update_image_successful(install_image,
                            regenerate_image_id=True,
                            signed=False,
                            skip_reboot_verification=False,
                            expected_mender_clients=1,
                            pre_upload_callback=lambda: None,
                            pre_deployment_callback=lambda: None,
                            deployment_triggered_callback=lambda: None,
                            compression_type="gzip"):
    """
        Perform a successful upgrade, and assert that deployment status/logs are correct.

        A reboot is performed, and running partitions have been swapped.
        Deployment status will be set as successful for device.
        Logs will not be retrieved, and result in 404.
    """

    previous_inactive_part = Helpers.get_passive_partition()
    with Helpers.RebootDetector() as reboot:
        deployment_id, expected_image_id = common_update_procedure(
            install_image,
            regenerate_image_id,
            signed=signed,
            pre_deployment_callback=pre_deployment_callback,
            deployment_triggered_callback=deployment_triggered_callback,
            compression_type=compression_type)
        reboot.verify_reboot_performed()

    with Helpers.RebootDetector() as reboot:
        try:
            assert Helpers.get_active_partition() == previous_inactive_part
        except AssertionError:
            logs = []
            for d in adm.get_devices():
                logs.append(deploy.get_logs(d["device_id"], deployment_id))

            pytest.fail(
                "device did not flip partitions during update, here are the device logs:\n\n %s"
                % (logs))

        deploy.check_expected_statistics(deployment_id, "success",
                                         expected_mender_clients)

        for d in adm.get_devices():
            deploy.get_logs(d["device_id"], deployment_id, expected_status=404)

        if not skip_reboot_verification:
            reboot.verify_reboot_not_performed()

    assert Helpers.yocto_id_installed_on_machine() == expected_image_id

    deploy.check_expected_status("finished", deployment_id)

    # make sure backend recognizes signed and unsigned images
    artifact_id = deploy.get_deployment(deployment_id)["artifacts"][0]
    artifact_info = deploy.get_artifact_details(artifact_id)
    assert artifact_info[
        "signed"] is signed, "image was not correct recognized as signed/unsigned"

    return deployment_id
def update_image_failed(install_image="broken_update.ext4", expected_mender_clients=1):
    """
        Perform a upgrade using a broken image (random data)
        The device will reboot, uboot will detect this is not a bootable image, and revert to the previous partition.
        The resulting upgrade will be considered a failure.
    """

    devices_accepted = get_mender_clients()
    original_image_id = Helpers.yocto_id_installed_on_machine()

    previous_active_part = Helpers.get_active_partition()
    with Helpers.RebootDetector() as reboot:
        deployment_id, _ = common_update_procedure(install_image, broken_image=True)
        reboot.verify_reboot_performed()

    with Helpers.RebootDetector() as reboot:
        assert Helpers.get_active_partition() == previous_active_part

        deploy.check_expected_statistics(deployment_id, "failure", expected_mender_clients)

        for d in adm.get_devices():
            assert "got invalid entrypoint into the state machine" in deploy.get_logs(d["device_id"], deployment_id)

        assert Helpers.yocto_id_installed_on_machine() == original_image_id
        reboot.verify_reboot_not_performed()

    deploy.check_expected_status("finished", deployment_id)
Example #3
0
    def test_update_image_id_already_installed(
        self, install_image=conftest.get_valid_image()):
        """Uploading an image with an incorrect name set results in failure and rollback."""

        if not env.host_string:
            execute(self.test_update_image_id_already_installed,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        with Helpers.RebootDetector() as reboot:
            deployment_id, expected_image_id = common_update_procedure(
                install_image, True)
            reboot.verify_reboot_performed()

        devices_accepted_id = [
            device["id"] for device in auth_v2.get_devices_status("accepted")
        ]
        deployment_id = deploy.trigger_deployment(
            name="New valid update",
            artifact_name=expected_image_id,
            devices=devices_accepted_id)

        deploy.check_expected_statistics(deployment_id, "already-installed",
                                         len(get_mender_clients()))
        deploy.check_expected_status("finished", deployment_id)
    def test_reject_bootstrap(self):
        """Make sure a rejected device does not perform an upgrade, and that it gets it's auth token removed"""
        if not env.host_string:
            execute(self.test_reject_bootstrap, hosts=get_mender_clients())
            return

        # iterate over devices and reject them
        for device in adm.get_devices():
            adm.set_device_status(device["id"], "rejected")
            logging.info("Rejecting DeviceID: %s" % device["id"])

        adm.check_expected_status("rejected", len(get_mender_clients()))

        with Helpers.RebootDetector() as reboot:
            try:
                deployment_id, _ = common_update_procedure(
                    install_image=conftest.get_valid_image())
            except AssertionError:
                logging.info("Failed to deploy upgrade to rejected device.")
                reboot.verify_reboot_not_performed()

            else:
                # use assert to fail, so we can get backend logs
                assert False, "No error while trying to deploy to rejected device"

        # authtoken has been removed from mender-store
        run("strings /data/mender/mender-store | grep -q 'authtoken' || false")

        # re-accept device after test is done
        adm.accept_devices(1)
Example #5
0
    def test_deployed_during_network_outage(
        self, install_image=conftest.get_valid_image()):
        """
            Install a valid upgrade image while there is no network availability on the device
            Re-establishing the network connectivity results in the upgrade to be triggered.

            Emulate a flaky network connection, and ensure that the deployment still succeeds.
        """
        if not env.host_string:
            execute(self.test_deployed_during_network_outage,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        Helpers.gateway_connectivity(False)
        with Helpers.RebootDetector() as reboot:
            deployment_id, expected_yocto_id = common_update_procedure(
                install_image, verify_status=False)
            time.sleep(60)

            for i in range(5):
                time.sleep(5)
                Helpers.gateway_connectivity(i % 2 == 0)
            Helpers.gateway_connectivity(True)

            logging.info("Network stabilized")
            reboot.verify_reboot_performed()
            deploy.check_expected_statistics(deployment_id, "success",
                                             len(get_mender_clients()))

        assert Helpers.yocto_id_installed_on_machine() == expected_yocto_id
Example #6
0
    def test_image_download_retry_hosts_broken(
        self, install_image=conftest.get_valid_image()):
        """
            Block storage host (minio) by modifying the hosts file.
        """

        if not env.host_string:
            execute(self.test_image_download_retry_hosts_broken,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        inactive_part = Helpers.get_passive_partition()

        run("echo '1.1.1.1 s3.docker.mender.io' >> /etc/hosts"
            )  # break s3 connectivity before triggering deployment
        with Helpers.RebootDetector() as reboot:
            deployment_id, new_yocto_id = common_update_procedure(
                install_image)

            self.wait_for_download_retry_attempts()
            run("sed -i.bak '/1.1.1.1/d' /etc/hosts")

            reboot.verify_reboot_performed()
            assert Helpers.get_active_partition() == inactive_part
            assert Helpers.yocto_id_installed_on_machine() == new_yocto_id
            reboot.verify_reboot_not_performed()
Example #7
0
    def test_large_update_image(self):
        """Installing an image larger than the passive/active parition size should result in a failure."""
        if not env.host_string:
            execute(self.test_large_update_image, hosts=get_mender_clients())
            return

        with Helpers.RebootDetector() as reboot:
            deployment_id, _ = common_update_procedure(install_image="large_image.dat", regenerate_image_id=False, broken_image=True)
            deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients()))
            reboot.verify_reboot_not_performed()
            deploy.check_expected_status("finished", deployment_id)
Example #8
0
    def abort_deployment(self, abort_step=None, mender_performs_reboot=False):
        """
            Trigger a deployment, and cancel it within 15 seconds, make sure no deployment is performed.

            Args:
                mender_performs_reboot: if set to False, a manual reboot is performed and
                                            checks are performed.
                                        if set to True, wait until device is rebooted.
        """
        if not env.host_string:
            execute(self.abort_deployment,
                    abort_step=abort_step,
                    mender_performs_reboot=mender_performs_reboot,
                    hosts=get_mender_clients())
            return

        install_image = conftest.get_valid_image()
        expected_partition = Helpers.get_active_partition()
        expected_image_id = Helpers.yocto_id_installed_on_machine()
        with Helpers.RebootDetector() as reboot:
            deployment_id, _ = common_update_procedure(install_image,
                                                       verify_status=False)

            if abort_step is not None:
                deploy.check_expected_statistics(deployment_id, abort_step,
                                                 len(get_mender_clients()))
            deploy.abort(deployment_id)
            deploy.check_expected_statistics(deployment_id, "aborted",
                                             len(get_mender_clients()))

            # no deployment logs are sent by the client, is this expected?
            for d in auth_v2.get_devices():
                deploy.get_logs(d["id"], deployment_id, expected_status=404)

            if mender_performs_reboot:
                # If Mender performs reboot, we need to wait for it to reboot
                # back into the original filesystem.
                reboot.verify_reboot_performed(number_of_reboots=2)
            else:
                # Else we reboot ourselves, just to make sure that we have not
                # unintentionally switched to the new partition.
                reboot.verify_reboot_not_performed()
                run("( sleep 10 ; reboot ) 2>/dev/null >/dev/null &")
                reboot.verify_reboot_performed()

        assert Helpers.get_active_partition() == expected_partition
        assert Helpers.yocto_id_installed_on_machine() == expected_image_id
        deploy.check_expected_status("finished", deployment_id)
    def test_reject_bootstrap(self):
        """Make sure a rejected device does not perform an upgrade, and that it gets it's auth token removed"""
        if not env.host_string:
            execute(self.test_reject_bootstrap, hosts=get_mender_clients())
            return

        # iterate over devices and reject them
        for device in adm.get_devices():
            adm.set_device_status(device["id"], "rejected")
            logging.info("Rejecting DeviceID: %s" % device["id"])

        adm.check_expected_status("rejected", len(get_mender_clients()))

        with Helpers.RebootDetector() as reboot:
            try:
                deployment_id, _ = common_update_procedure(
                    install_image=conftest.get_valid_image())
            except AssertionError:
                logging.info("Failed to deploy upgrade to rejected device.")
                reboot.verify_reboot_not_performed()

            else:
                # use assert to fail, so we can get backend logs
                pytest.fail(
                    "no error while trying to deploy to rejected device")
                return

        finished = False
        # wait until auththoken is removed from file
        for _ in range(10):
            with settings(abort_exception=Exception):
                try:
                    run("journalctl -u mender -l -n 3 | grep -q 'authentication request rejected'"
                        )
                except:
                    time.sleep(30)
                else:
                    finished = True
                    break

        adm.accept_devices(1)

        if not finished:
            pytest.fail("failed to remove authtoken from mender-store file")
Example #10
0
    def test_deployment_abortion_success(self):
        # maybe an acceptance test is enough for this check?

        if not env.host_string:
            execute(self.test_deployment_abortion_success,
                    hosts=get_mender_clients())
            return

        install_image = conftest.get_valid_image()
        with Helpers.RebootDetector() as reboot:
            deployment_id, _ = common_update_procedure(install_image)

            reboot.verify_reboot_performed()

        deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients()))
        time.sleep(5)

        deploy.abort_finished_deployment(deployment_id)
        deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients()))
        deploy.check_expected_status("finished", deployment_id)
Example #11
0
    def test_update_image_recovery(self,
                                   install_image=conftest.get_valid_image()):
        """
            Install an update, and reboot the system when we detect it's being copied over to the inactive parition.

            The test should result in a failure.
        """
        if not env.host_string:
            execute(self.test_update_image_recovery,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        installed_yocto_id = Helpers.yocto_id_installed_on_machine()

        inactive_part = Helpers.get_passive_partition()
        with Helpers.RebootDetector() as reboot:
            deployment_id, _ = common_update_procedure(install_image)
            active_part = Helpers.get_active_partition()

            for i in range(60):
                time.sleep(0.5)
                with quiet():
                    # make sure we are writing to the inactive partition
                    output = run("fuser -mv %s" % (inactive_part))
                if output.return_code == 0:
                    run("killall -s 9 mender")
                    with settings(warn_only=True):
                        run("( sleep 3 ; reboot ) 2>/dev/null >/dev/null &")
                    break

            logging.info("Waiting for system to finish reboot")
            reboot.verify_reboot_performed()
            assert Helpers.get_active_partition() == active_part
            deploy.check_expected_statistics(deployment_id, "failure",
                                             len(get_mender_clients()))
            reboot.verify_reboot_not_performed()

        assert Helpers.yocto_id_installed_on_machine() == installed_yocto_id
Example #12
0
    def test_update_image_breaks_networking(
        self,
        install_image="core-image-full-cmdline-%s-broken-network.ext4" %
        conftest.machine_name):
        """
            Install an image without systemd-networkd binary existing.
            The network will not function, mender will not be able to send any logs.

            The expected status is the update will rollback, and be considered a failure
        """
        if not env.host_string:
            execute(self.test_update_image_breaks_networking,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        with Helpers.RebootDetector() as reboot:
            deployment_id, _ = common_update_procedure(install_image)
            reboot.verify_reboot_performed(
            )  # since the network is broken, two reboots will be performed, and the last one will be detected
            deploy.check_expected_statistics(deployment_id, "failure",
                                             len(get_mender_clients()))
Example #13
0
    def test_reboot_recovery(self, description, test_set):
        if not env.host_string:
            execute(self.test_reboot_recovery,
                    description,
                    test_set,
                    hosts=get_mender_clients())
            return

        client = env.host_string
        work_dir = "test_state_scripts.%s" % client

        script_content = '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n'

        script_failure_content = script_content + 'sync\necho b > /proc/sysrq-trigger\n'  # flush to disk before killing

        # This is only needed in the case: die commit-leave,
        # otherwise the device will get stuck in a boot-reboot loop
        script_reboot_once = ('''#!/bin/sh
        if [ $(grep -c $(basename $0) /data/test_state_scripts.log) -eq 0 ]; then
            echo "$(basename $0)" >> /data/test_state_scripts.log && sync && echo b > /proc/sysrq-trigger
        fi
        echo "$(basename $0)" >> /data/test_state_scripts.log
        exit 0''')
        script_error_content = script_content + "exit 1"
        broken_image = test_set.get("Rollback", False)

        # Put artifact-scripts in the artifact.
        artifact_script_dir = os.path.join(work_dir, "artifact-scripts")

        if os.path.exists(work_dir):
            shutil.rmtree(work_dir, ignore_errors=True)

        os.mkdir(work_dir)
        os.mkdir(artifact_script_dir)

        new_rootfs = os.path.join(work_dir, "rootfs.ext4")
        shutil.copy(conftest.get_valid_image(), new_rootfs)

        ps = subprocess.Popen(["debugfs", "-w", new_rootfs],
                              stdin=subprocess.PIPE)
        ps.stdin.write("cd /etc/mender\n" "mkdir scripts\n" "cd scripts\n")
        ps.stdin.close()
        ps.wait()

        for script in test_set.get("ScriptOrder"):
            if not script.startswith("Artifact"):
                # Not an artifact script, skip this one.
                continue
            with open(os.path.join(artifact_script_dir, script), "w") as fd:
                if script in test_set.get("RebootScripts", []):
                    fd.write(script_failure_content)
                if script in test_set.get("RebootOnceScripts", []):
                    fd.write(script_reboot_once)
                elif script in test_set.get("ErrorScripts", []):
                    fd.write(script_error_content)
                else:
                    fd.write(script_content)

        # Now create the artifact, and make the deployment.
        device_id = Helpers.ip_to_device_id_map([client])[client]

        with Helpers.RebootDetector() as reboot_detector:

            deployment_id = common_update_procedure(
                install_image=new_rootfs,
                broken_image=broken_image,
                verify_status=True,
                devices=[device_id],
                scripts=[artifact_script_dir])[0]

            try:

                orig_part = Helpers.get_active_partition()

                # handle case where the client has not finished the update
                # path on the committed partition, but new partition is installed,
                # thus we will not get a valid entrypoint into the uncommitted parition(reboot_leave)
                # and the client will thus reboot straight after starting, and u-boot will
                # fall back to the committed partition
                if test_set.get("DoubleReboot", False):
                    reboot_detector.verify_reboot_performed(
                        number_of_reboots=2)
                else:
                    reboot_detector.verify_reboot_performed()

                # wait until the last script has been run
                logger.debug("waint until the last script has been run")
                script_logs = ""
                timeout = time.time() + 60 * 60
                while timeout >= time.time():
                    time.sleep(3)
                    script_logs = run("cat /data/test_state_scripts.log")
                    if test_set.get("ExpectedScriptFlow")[-1] in script_logs:
                        break

                # make sure the client ended up on the right partition
                if "OtherPartition" in test_set.get("ExpectedFinalPartition",
                                                    []):
                    assert orig_part != Helpers.get_active_partition()
                else:
                    assert orig_part == Helpers.get_active_partition()

                assert script_logs.split() == test_set.get(
                    "ExpectedScriptFlow")

            finally:
                run("systemctl stop mender && " +
                    "rm -f /data/test_state_scripts.log && " +
                    "rm -rf /etc/mender/scripts && " +
                    "rm -rf /data/mender/scripts && " +
                    "systemctl start mender")
Example #14
0
    def test_image_download_retry_timeout(
        self, test_set, install_image=conftest.get_valid_image()):
        """
            Install an update, and block storage connection when we detect it's
            being copied over to the inactive parition.

            The test should result in a successful download retry.
        """
        if not env.host_string:
            execute(self.test_image_download_retry_timeout,
                    test_set,
                    hosts=get_mender_clients(),
                    install_image=install_image)
            return

        # make tcp timeout quicker, none persistent changes
        run("echo 2 > /proc/sys/net/ipv4/tcp_keepalive_time")
        run("echo 2 > /proc/sys/net/ipv4/tcp_keepalive_intvl")
        run("echo 3 > /proc/sys/net/ipv4/tcp_syn_retries")

        # to speed up timeouting client connection
        run("echo 1 > /proc/sys/net/ipv4/tcp_keepalive_probes")

        inactive_part = Helpers.get_passive_partition()

        with Helpers.RebootDetector() as reboot:
            if test_set['blockAfterStart']:
                # Block after we start the download.
                deployment_id, new_yocto_id = common_update_procedure(
                    install_image)
                for _ in range(60):
                    time.sleep(0.5)
                    with quiet():
                        # make sure we are writing to the inactive partition
                        output = run("fuser -mv %s" % (inactive_part))
                    if output.return_code == 0:
                        break
                else:
                    pytest.fail("Download never started?")

            # use iptables to block traffic to storage
            Helpers.gateway_connectivity(False,
                                         hosts=["s3.docker.mender.io"
                                                ])  # disable connectivity

            if not test_set['blockAfterStart']:
                # Block before we start the download.
                deployment_id, new_yocto_id = common_update_procedure(
                    install_image)

            # re-enable connectivity after 2 retries
            self.wait_for_download_retry_attempts(
                test_set['logMessageToLookFor'])
            Helpers.gateway_connectivity(True,
                                         hosts=["s3.docker.mender.io"
                                                ])  # re-enable connectivity

            reboot.verify_reboot_performed()
            assert Helpers.get_active_partition() == inactive_part
            assert Helpers.yocto_id_installed_on_machine() == new_yocto_id
            reboot.verify_reboot_not_performed()
Example #15
0
    def test_update_device_group(self):
        """
            Perform a successful upgrade on one group of devices, and assert that:
            * deployment status/logs are correct.
            * only the correct group is updated, not the other one.

            A reboot is performed, and running partitions have been swapped.
            Deployment status will be set as successful for device.
            Logs will not be retrieved, and result in 404.
        """

        # Beware that there will two parallel things going on below, one for
        # each group, hence a lot of separate execute() calls for each. We aim
        # to update the group alpha, not beta.

        clients = get_mender_clients()
        assert (len(clients) == 2)
        alpha = clients[0]
        bravo = clients[1]

        ip_to_device_id = Helpers.ip_to_device_id_map(clients)
        id_alpha = ip_to_device_id[alpha]
        id_bravo = ip_to_device_id[bravo]
        print("ID of alpha host: %s\nID of bravo host: %s" %
              (id_alpha, id_bravo))

        ret = execute(Helpers.get_passive_partition, hosts=clients)
        pass_part_alpha = ret[alpha]
        pass_part_bravo = ret[bravo]

        inv.put_device_in_group(id_alpha, "Update")

        reboot = {alpha: None, bravo: None}
        with Helpers.RebootDetector(alpha) as reboot[
                alpha], Helpers.RebootDetector(bravo) as reboot[bravo]:

            deployment_id, expected_image_id = common_update_procedure(
                conftest.get_valid_image(), devices=[id_alpha])

            @parallel
            def verify_reboot_performed_for_alpha_only():
                if env.host_string == alpha:
                    reboot[alpha].verify_reboot_performed()
                elif env.host_string == bravo:
                    # Extra long wait here, because a real update takes quite a lot
                    # of time.
                    reboot[bravo].verify_reboot_not_performed(300)
                else:
                    raise Exception(
                        "verify_reboot_performed_for_alpha_only() called with unknown host"
                    )

            execute(verify_reboot_performed_for_alpha_only, hosts=clients)

        ret = execute(Helpers.get_passive_partition, hosts=clients)
        assert ret[alpha] != pass_part_alpha
        assert ret[bravo] == pass_part_bravo
        ret = execute(Helpers.get_active_partition, hosts=clients)
        assert ret[alpha] == pass_part_alpha
        assert ret[bravo] != pass_part_bravo

        deploy.check_expected_statistics(deployment_id,
                                         expected_status="success",
                                         expected_count=1)

        # No logs for either host: alpha because it was successful, bravo
        # because it should never have attempted an update in the first place.
        for id in [id_alpha, id_bravo]:
            deploy.get_logs(id, deployment_id, expected_status=404)

        assert execute(Helpers.yocto_id_installed_on_machine,
                       hosts=alpha)[alpha] == expected_image_id
        assert execute(Helpers.yocto_id_installed_on_machine,
                       hosts=bravo)[bravo] != expected_image_id

        # Important: Leave the groups as you found them: Empty.
        inv.delete_device_from_group(id_alpha, "Update")