def test_multi_tenancy_deployment_aborting(self): """ Simply make sure we are able to run the multi tenancy setup and bootstrap 2 different devices to different tenants """ auth.reset_auth_token() users = [ { "email": "*****@*****.**", "password": "******", "username": "******", "container": "mender-client-deployment-aborting-1", } ] for user in users: auth.new_tenant(user["username"], user["email"], user["password"]) t = auth.current_tenant["tenant_token"] new_tenant_client(user["container"], t) adm.accept_devices(1) for user in users: deployment_id, _ = common_update_procedure(install_image=conftest.get_valid_image()) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", 1) execute(self.mender_log_contains_aborted_string, hosts=get_mender_client_by_container_name(user["container"]))
def update_image_failed(install_image="broken_update.ext4"): """ Perform a upgrade using a broken image (random data) The device will reboot, uboot will detect this is not a bootable image, and revert to the previous partition. The resulting upgrade will be considered a failure. """ devices_accepted = get_mender_clients() original_image_id = Helpers.yocto_id_installed_on_machine() previous_active_part = Helpers.get_active_partition() deployment_id, _ = common_update_procedure(install_image, broken_image=True) Helpers.verify_reboot_performed() assert Helpers.get_active_partition() == previous_active_part deploy.check_expected_statistics(deployment_id, "failure", len(devices_accepted)) for d in adm.get_devices(): assert "running rollback image" in deploy.get_logs( d["device_id"], deployment_id) assert Helpers.yocto_id_installed_on_machine() == original_image_id Helpers.verify_reboot_not_performed() deploy.check_expected_status("finished", deployment_id)
def update_image_failed(install_image="broken_update.ext4", expected_mender_clients=1): """ Perform a upgrade using a broken image (random data) The device will reboot, uboot will detect this is not a bootable image, and revert to the previous partition. The resulting upgrade will be considered a failure. """ devices_accepted = get_mender_clients() original_image_id = Helpers.yocto_id_installed_on_machine() previous_active_part = Helpers.get_active_partition() with Helpers.RebootDetector() as reboot: deployment_id, _ = common_update_procedure(install_image, broken_image=True) reboot.verify_reboot_performed() with Helpers.RebootDetector() as reboot: assert Helpers.get_active_partition() == previous_active_part deploy.check_expected_statistics(deployment_id, "failure", expected_mender_clients) for d in auth_v2.get_devices(): assert "got invalid entrypoint into the state machine" in deploy.get_logs( d["id"], deployment_id) assert Helpers.yocto_id_installed_on_machine() == original_image_id reboot.verify_reboot_not_performed() deploy.check_expected_status("finished", deployment_id)
def test_deployed_during_network_outage( self, install_image=conftest.get_valid_image()): """ Install a valid upgrade image while there is no network availability on the device Re-establishing the network connectivity results in the upgrade to be triggered. Emulate a flaky network connection, and ensure that the deployment still succeeds. """ if not env.host_string: execute(self.test_deployed_during_network_outage, hosts=get_mender_clients(), install_image=install_image) return Helpers.gateway_connectivity(False) deployment_id, expected_yocto_id = common_update_proceduce( install_image, verify_status=False) time.sleep(60) for i in range(5): time.sleep(5) Helpers.gateway_connectivity(i % 2 == 0) Helpers.gateway_connectivity(True) logging.info("Network stabilized") Helpers.verify_reboot_performed() deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) assert Helpers.yocto_id_installed_on_machine() == expected_yocto_id
def test_update_image_id_already_installed( self, install_image=conftest.get_valid_image()): """Uploading an image with an incorrect name set results in failure and rollback.""" if not env.host_string: execute(self.test_update_image_id_already_installed, hosts=get_mender_clients(), install_image=install_image) return token = Helpers.place_reboot_token() deployment_id, expected_image_id = common_update_procedure( install_image, True) token.verify_reboot_performed() devices_accepted_id = [ device["device_id"] for device in adm.get_devices_status("accepted") ] deployment_id = deploy.trigger_deployment( name="New valid update", artifact_name=expected_image_id, devices=devices_accepted_id) deploy.check_expected_statistics(deployment_id, "already-installed", len(get_mender_clients())) deploy.check_expected_status("finished", deployment_id)
def update_image_successful(install_image, regenerate_image_id=True, signed=False, skip_reboot_verification=False, expected_mender_clients=1, pre_upload_callback=lambda: None, pre_deployment_callback=lambda: None, deployment_triggered_callback=lambda: None, compression_type="gzip"): """ Perform a successful upgrade, and assert that deployment status/logs are correct. A reboot is performed, and running partitions have been swapped. Deployment status will be set as successful for device. Logs will not be retrieved, and result in 404. """ previous_inactive_part = Helpers.get_passive_partition() with Helpers.RebootDetector() as reboot: deployment_id, expected_image_id = common_update_procedure( install_image, regenerate_image_id, signed=signed, pre_deployment_callback=pre_deployment_callback, deployment_triggered_callback=deployment_triggered_callback, compression_type=compression_type) reboot.verify_reboot_performed() with Helpers.RebootDetector() as reboot: try: assert Helpers.get_active_partition() == previous_inactive_part except AssertionError: logs = [] for d in adm.get_devices(): logs.append(deploy.get_logs(d["device_id"], deployment_id)) pytest.fail( "device did not flip partitions during update, here are the device logs:\n\n %s" % (logs)) deploy.check_expected_statistics(deployment_id, "success", expected_mender_clients) for d in adm.get_devices(): deploy.get_logs(d["device_id"], deployment_id, expected_status=404) if not skip_reboot_verification: reboot.verify_reboot_not_performed() assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id) # make sure backend recognizes signed and unsigned images artifact_id = deploy.get_deployment(deployment_id)["artifacts"][0] artifact_info = deploy.get_artifact_details(artifact_id) assert artifact_info[ "signed"] is signed, "image was not correct recognized as signed/unsigned" return deployment_id
def test_large_update_image(self): """Installing an image larger than the passive/active parition size should result in a failure.""" if not env.host_string: execute(self.test_large_update_image, hosts=get_mender_clients()) return with Helpers.RebootDetector() as reboot: deployment_id, _ = common_update_procedure(install_image="large_image.dat", regenerate_image_id=False, broken_image=True) deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients())) reboot.verify_reboot_not_performed() deploy.check_expected_status("finished", deployment_id)
def abort_deployment(self, abort_step=None, mender_performs_reboot=False): """ Trigger a deployment, and cancel it within 15 seconds, make sure no deployment is performed. Args: mender_performs_reboot: if set to False, a manual reboot is performed and checks are performed. if set to True, wait until device is rebooted. """ if not env.host_string: execute(self.abort_deployment, abort_step=abort_step, mender_performs_reboot=mender_performs_reboot, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() expected_partition = Helpers.get_active_partition() expected_image_id = Helpers.yocto_id_installed_on_machine() with Helpers.RebootDetector() as reboot: deployment_id, _ = common_update_procedure(install_image, verify_status=False) if abort_step is not None: deploy.check_expected_statistics(deployment_id, abort_step, len(get_mender_clients())) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", len(get_mender_clients())) # no deployment logs are sent by the client, is this expected? for d in auth_v2.get_devices(): deploy.get_logs(d["id"], deployment_id, expected_status=404) if mender_performs_reboot: # If Mender performs reboot, we need to wait for it to reboot # back into the original filesystem. reboot.verify_reboot_performed(number_of_reboots=2) else: # Else we reboot ourselves, just to make sure that we have not # unintentionally switched to the new partition. reboot.verify_reboot_not_performed() run("( sleep 10 ; reboot ) 2>/dev/null >/dev/null &") reboot.verify_reboot_performed() assert Helpers.get_active_partition() == expected_partition assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id)
def test_deployment_abortion_success(self): # maybe an acceptance test is enough for this check? if not env.host_string: execute(self.test_deployment_abortion_success, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() deployment_id, _ = common_update_procedure(install_image) Helpers.verify_reboot_performed() deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) deploy.abort_finished_deployment(deployment_id) deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) deploy.check_expected_status("finished", deployment_id)
def abort_deployment(self, abort_step=None, mender_performs_reboot=False): """ Trigger a deployment, and cancel it within 15 seconds, make sure no deployment is performed. Args: mender_performs_reboot: if set to False, a manual reboot is performed and checks are performed. if set to True, wait until device is rebooted. """ if not env.host_string: execute(self.abort_deployment, abort_step=abort_step, mender_performs_reboot=mender_performs_reboot, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() expected_partition = Helpers.get_active_partition() expected_image_id = Helpers.yocto_id_installed_on_machine() token = Helpers.place_reboot_token() deployment_id, _ = common_update_procedure(install_image, verify_status=False) if abort_step is not None: deploy.check_expected_statistics(deployment_id, abort_step, len(get_mender_clients())) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", len(get_mender_clients())) # no deployment logs are sent by the client, is this expected? for d in adm.get_devices(): deploy.get_logs(d["device_id"], deployment_id, expected_status=404) if not mender_performs_reboot: token.verify_reboot_not_performed() run("( sleep 10 ; reboot ) 2>/dev/null >/dev/null &") token.verify_reboot_performed() assert Helpers.get_active_partition() == expected_partition assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id)
def test_unsigned_artifact_fails_deployment(self, standard_setup_with_signed_artifact_client): """ Make sure that an unsigned image fails, and is handled by the backend. Notice that this test needs a fresh new version of the backend, since we installed a signed image earlier without a verification key in mender.conf """ if not env.host_string: execute(self.test_unsigned_artifact_fails_deployment, standard_setup_with_signed_artifact_client, hosts=get_mender_clients()) return deployment_id, _ = common_update_procedure(install_image=conftest.get_valid_image()) deploy.check_expected_status("finished", deployment_id) deploy.check_expected_statistics(deployment_id, "failure", 1) for d in adm.get_devices(): assert "expecting signed artifact, but no signature file found" in \ deploy.get_logs(d["device_id"], deployment_id)
def test_update_image_breaks_networking( self, install_image="core-image-full-cmdline-vexpress-qemu-broken-network.ext4" ): """ Install an image without systemd-networkd binary existing. The network will not function, mender will not be able to send any logs. The expected status is the update will rollback, and be considered a failure """ if not env.host_string: execute(self.test_update_image_breaks_networking, hosts=get_mender_clients(), install_image=install_image) return deployment_id, _ = common_update_proceduce(install_image) Helpers.verify_reboot_performed( ) # since the network is broken, two reboots will be performed, and the last one will be detected deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients()))
def test_update_image_recovery(self, install_image=conftest.get_valid_image()): """ Install an update, and reboot the system when we detect it's being copied over to the inactive parition. The test should result in a failure. """ if not env.host_string: execute(self.test_update_image_recovery, hosts=get_mender_clients(), install_image=install_image) return installed_yocto_id = Helpers.yocto_id_installed_on_machine() inactive_part = Helpers.get_passive_partition() token = Helpers.place_reboot_token() deployment_id, _ = common_update_procedure(install_image) active_part = Helpers.get_active_partition() for i in range(60): time.sleep(0.5) with quiet(): # make sure we are writing to the inactive partition output = run("fuser -mv %s" % (inactive_part)) if output.return_code == 0: run("killall -s 9 mender") with settings(warn_only=True): run("( sleep 3 ; reboot ) 2>/dev/null >/dev/null &") break logging.info("Waiting for system to finish reboot") token.verify_reboot_performed() assert Helpers.get_active_partition() == active_part token = Helpers.place_reboot_token() deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients())) token.verify_reboot_not_performed() assert Helpers.yocto_id_installed_on_machine() == installed_yocto_id
def update_image_successful( install_image=conftest.get_valid_image(), regenerate_image_id=True): """ Perform a successful upgrade, and assert that deployment status/logs are correct. A reboot is performed, and running partitions have been swapped. Deployment status will be set as successful for device. Logs will not be retrieved, and result in 404. """ previous_inactive_part = Helpers.get_passive_partition() deployment_id, expected_image_id = common_update_procedure( install_image, regenerate_image_id) Helpers.verify_reboot_performed() try: assert Helpers.get_active_partition() == previous_inactive_part except AssertionError: logs = [] for d in adm.get_devices(): logs.append(deploy.get_logs(d["device_id"], deployment_id)) pytest.fail( "device did not flip partitions during update, here are the device logs:\n\n %s" % (logs)) deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) for d in adm.get_devices(): deploy.get_logs(d["device_id"], deployment_id, expected_status=404) Helpers.verify_reboot_not_performed() assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id)
def test_state_scripts(self, description, test_set): """Test that state scripts are executed in right order, and that errors are treated like they should.""" if not env.host_string: execute(self.test_state_scripts, description, test_set, hosts=get_mender_clients()) return client = env.host_string work_dir = "test_state_scripts.%s" % client deployment_id = None try: script_content = '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n' script_failure_content = script_content + "exit 1\n" old_active = Helpers.get_active_partition() # Make rootfs-scripts and put them in rootfs image. rootfs_script_dir = os.path.join(work_dir, "rootfs-scripts") shutil.rmtree(work_dir, ignore_errors=True) os.mkdir(work_dir) os.mkdir(rootfs_script_dir) new_rootfs = os.path.join(work_dir, "rootfs.ext4") shutil.copy(conftest.get_valid_image(), new_rootfs) ps = subprocess.Popen(["debugfs", "-w", new_rootfs], stdin=subprocess.PIPE) ps.stdin.write("cd /etc/mender\n" "mkdir scripts\n" "cd scripts\n") with open(os.path.join(rootfs_script_dir, "version"), "w") as fd: if test_set.get('CorruptEtcScriptVersionInUpdate'): fd.write("1000") else: fd.write("2") ps.stdin.write("rm version\n") ps.stdin.write("write %s version\n" % os.path.join(rootfs_script_dir, "version")) for script in self.scripts: if script.startswith("Artifact"): # This is a script for the artifact, skip this one. continue with open(os.path.join(rootfs_script_dir, script), "w") as fd: if script in test_set['FailureScript']: fd.write(script_failure_content) else: fd.write(script_content) os.fchmod(fd.fileno(), 0755) ps.stdin.write( "write %s %s\n" % (os.path.join(rootfs_script_dir, script), script)) ps.stdin.close() ps.wait() # Write this again in case it was corrupted above. with open(os.path.join(rootfs_script_dir, "version"), "w") as fd: fd.write("2") # Then copy them to QEMU host. # Zip them all up to avoid having to copy each and every file, which is # quite slow. subprocess.check_call( ["tar", "czf", "../rootfs-scripts.tar.gz", "."], cwd=rootfs_script_dir) # Stop client first to avoid race conditions. run("systemctl stop mender") try: put(os.path.join(work_dir, "rootfs-scripts.tar.gz"), remote_path="/") run("mkdir -p cd /etc/mender/scripts && " + "cd /etc/mender/scripts && " + "tar xzf /rootfs-scripts.tar.gz && " + "rm -f /rootfs-scripts.tar.gz") finally: run("systemctl start mender") # Put artifact-scripts in the artifact. artifact_script_dir = os.path.join(work_dir, "artifact-scripts") os.mkdir(artifact_script_dir) for script in self.scripts: if not script.startswith("Artifact"): # Not an artifact script, skip this one. continue with open(os.path.join(artifact_script_dir, script), "w") as fd: if script in test_set['FailureScript']: fd.write(script_failure_content) else: fd.write(script_content) if test_set.get("SimulateBootFailureIn") == script: # Simulate that boot failed by immediately forcing a # rollback with U-Boot. fd.write("fw_setenv bootcount 1\n") if test_set.get("CorruptDataScriptVersionIn") == script: fd.write( "printf '1000' > /data/mender/scripts/version\n") # Now create the artifact, and make the deployment. device_id = Helpers.ip_to_device_id_map([client])[client] broken_artifact_id = test_set.get('BrokenArtifactId') if broken_artifact_id is None: broken_artifact_id = False deployment_id = common_update_procedure( install_image=new_rootfs, broken_image=broken_artifact_id, verify_status=False, devices=[device_id], scripts=[artifact_script_dir])[0] if test_set['ExpectedStatus'] is None: # In this case we don't expect the deployment to even be # attempted, presumably due to failing Idle/Sync/Download # scripts on the client. So no deployment checking. Just wait # until there is at least one Error script in the log, which # will always be the case if ExpectedStatus is none (since one # of them is preventing the update from being attempted). def fetch_info(cmd_list): all_output = "" for cmd in cmd_list: with settings(warn_only=True): output = run(cmd) logger.error("%s:\n%s" % (cmd, output)) all_output += "%s\n" % output return all_output info_query = [ "cat /data/test_state_scripts.log 1>&2", "journalctl -u mender", "top -n5 -b", "ls -l /proc/`pgrep mender`/fd", "for fd in /proc/`pgrep mender`/fdinfo/*; do echo $fd:; cat $fd; done", ] starttime = time.time() while starttime + 60 * 60 >= time.time(): with settings(warn_only=True): result = run("grep Error /data/test_state_scripts.log") if result.succeeded: # If it succeeds, stop. break else: fetch_info(info_query) time.sleep(10) continue else: info = fetch_info(info_query) pytest.fail( 'Waited too long for "Error" to appear in log:\n%s' % info) else: deploy.check_expected_statistics(deployment_id, test_set['ExpectedStatus'], 1) # Always give the client a little bit of time to settle in the base # state after an update. time.sleep(10) output = run("cat /data/test_state_scripts.log") self.verify_script_log_correct(test_set, output.split('\n')) new_active = Helpers.get_active_partition() should_switch_partition = (test_set['ExpectedStatus'] == "success") # TODO if test_set.get('SwapPartitionExpectation') is not None: should_switch_partition = not should_switch_partition if should_switch_partition: assert old_active != new_active, "Device did not switch partition as expected!" else: assert old_active == new_active, "Device switched partition which was not expected!" finally: shutil.rmtree(work_dir, ignore_errors=True) if deployment_id: try: deploy.abort(deployment_id) except: pass run("systemctl stop mender && " + "rm -f /data/test_state_scripts.log && " + "rm -rf /etc/mender/scripts && " + "rm -rf /data/mender/scripts && " + "systemctl start mender")
def test_update_device_group(self): """ Perform a successful upgrade on one group of devices, and assert that: * deployment status/logs are correct. * only the correct group is updated, not the other one. A reboot is performed, and running partitions have been swapped. Deployment status will be set as successful for device. Logs will not be retrieved, and result in 404. """ # Beware that there will two parallel things going on below, one for # each group, hence a lot of separate execute() calls for each. We aim # to update the group alpha, not beta. clients = get_mender_clients() assert(len(clients) == 2) alpha = clients[0] bravo = clients[1] ip_to_device_id = Helpers.ip_to_device_id_map(clients) id_alpha = ip_to_device_id[alpha] id_bravo = ip_to_device_id[bravo] print("ID of alpha host: %s\nID of bravo host: %s" % (id_alpha, id_bravo)) ret = execute(Helpers.get_passive_partition, hosts=clients) pass_part_alpha = ret[alpha] pass_part_bravo = ret[bravo] inv.put_device_in_group(id_alpha, "Update") @parallel def place_reboot_tokens(): return Helpers.place_reboot_token() tokens = execute(place_reboot_tokens, hosts=clients) deployment_id, expected_image_id = common_update_procedure(conftest.get_valid_image(), devices=[id_alpha]) @parallel def verify_reboot_performed_for_alpha_only(tokens): if env.host_string == alpha: tokens[alpha].verify_reboot_performed() elif env.host_string == bravo: # Extra long wait here, because a real update takes quite a lot # of time. tokens[bravo].verify_reboot_not_performed(180) else: raise Exception("verify_reboot_performed_for_alpha_only() called with unknown host") execute(verify_reboot_performed_for_alpha_only, tokens, hosts=clients) ret = execute(Helpers.get_passive_partition, hosts=clients) assert ret[alpha] != pass_part_alpha assert ret[bravo] == pass_part_bravo ret = execute(Helpers.get_active_partition, hosts=clients) assert ret[alpha] == pass_part_alpha assert ret[bravo] != pass_part_bravo deploy.check_expected_statistics(deployment_id, expected_status="success", expected_count=1) # No logs for either host: alpha because it was successful, bravo # because it should never have attempted an update in the first place. for id in [id_alpha, id_bravo]: deploy.get_logs(id, deployment_id, expected_status=404) assert execute(Helpers.yocto_id_installed_on_machine, hosts=alpha)[alpha] == expected_image_id assert execute(Helpers.yocto_id_installed_on_machine, hosts=bravo)[bravo] != expected_image_id # Important: Leave the groups as you found them: Empty. inv.delete_device_from_group(id_alpha, "Update")