def new_tenant_client(test_env, name: str, tenant: str, docker: bool = False, network: str = "mender") -> MenderDevice: """Create new Mender client in the test environment with the given name for the given tenant. The passed test_env must implement new_tenant_client and/or new_tenant_docker_client. This helper attaches the recently created Mender client to the test environment, so that systemd logs can be printed on test failures. """ pre_existing_clients = set(test_env.get_mender_clients(network=network)) if docker: test_env.new_tenant_docker_client(name, tenant) else: test_env.new_tenant_client(name, tenant) all_clients = set(test_env.get_mender_clients(network=network)) new_client = all_clients - pre_existing_clients assert len(new_client) == 1 device = MenderDevice(new_client.pop()) if hasattr(test_env, "device_group"): test_env.device_group.append(device) else: test_env.device = device test_env.device_group = MenderDeviceGroup( test_env.get_mender_clients(network=network)) return device
def do_test_deployment_gateway_and_one_device( self, env, valid_image_with_mender_conf, image_with_mender_conf_and_mender_gateway_conf, ): mender_device = env.device mender_gateway = env.device_gateway devauth = DeviceAuthV2(env.auth) deploy = Deployments(env.auth, devauth) mender_device_mender_conf = mender_device.run( "cat /etc/mender/mender.conf") mender_gateway_gateway_conf = mender_gateway.run( "cat /etc/mender/mender-gateway.conf") mender_gateway_mender_conf = mender_gateway.run( "cat /etc/mender/mender.conf") host_ip = env.get_virtual_network_host_ip() ip_to_device_id = Helpers.ip_to_device_id_map( MenderDeviceGroup( [mender_device.host_string, mender_gateway.host_string]), devauth=devauth, ) mender_gateway_image = image_with_mender_conf_and_mender_gateway_conf( "mender-gateway-image-full-cmdline-%s.ext4" % conftest.machine_name, mender_gateway_mender_conf, mender_gateway_gateway_conf, ) def update_device(): device_id = ip_to_device_id[mender_device.host_string] update_image( mender_device, host_ip, expected_mender_clients=1, install_image=valid_image_with_mender_conf( mender_device_mender_conf), devauth=devauth, deploy=deploy, devices=[device_id], ) gateway_id = ip_to_device_id[mender_gateway.host_string] deployment_id, _ = common_update_procedure( mender_gateway_image, devices=[gateway_id], devauth=devauth, deploy=deploy, deployment_triggered_callback=update_device, verify_status=False, ) deploy.check_expected_statistics(deployment_id, "success", 1) deploy.check_expected_status("finished", deployment_id)
def standard_setup_two_clients_bootstrapped(request): env = container_factory.getStandardSetup(num_clients=2) request.addfinalizer(env.teardown) env.setup() env.device_group = MenderDeviceGroup(env.get_mender_clients()) env.device_group.ssh_is_opened() reset_mender_api(env) auth_v2.accept_devices(2) return env
def standard_setup_two_clients_bootstrapped_with_gateway(request): env = container_factory.get_standard_setup_with_gateway(num_clients=2) request.addfinalizer(env.teardown) env.setup() env.device_group = MenderDeviceGroup( env.get_mender_clients(network="mender_local")) env.device_group.ssh_is_opened() env.device_gateway = MenderDevice( env.get_mender_gateways(network="mender")[0]) env.device_gateway.ssh_is_opened() reset_mender_api(env) # Three devices: two client devices and the gateway device (which also runs mender client) devauth.accept_devices(3) env.auth = auth return env
def do_test_deployment_one_device(self, env, valid_image_with_mender_conf): mender_device = env.device devauth = DeviceAuthV2(env.auth) deploy = Deployments(env.auth, devauth) host_ip = env.get_virtual_network_host_ip() mender_conf = mender_device.run("cat /etc/mender/mender.conf") device_id = Helpers.ip_to_device_id_map( MenderDeviceGroup([mender_device.host_string]), devauth=devauth, )[mender_device.host_string] update_image( mender_device, host_ip, expected_mender_clients=1, install_image=valid_image_with_mender_conf(mender_conf), devauth=devauth, deploy=deploy, devices=[device_id], )
def test_state_scripts( self, class_persistent_setup_client_state_scripts_update_module, description, test_set, ): """Test that state scripts are executed in right order, and that errors are treated like they should.""" mender_device = class_persistent_setup_client_state_scripts_update_module.device work_dir = "test_state_scripts.%s" % mender_device.host_string deployment_id = None client_service_name = mender_device.get_client_service_name() try: script_content = '#!/bin/sh\n\necho "`date --rfc-3339=seconds` $(basename $0)" >> /data/test_state_scripts.log\n' script_failure_content = script_content + "exit 1\n" # Make rootfs-scripts and put them in rootfs image. rootfs_script_dir = os.path.join(work_dir, "rootfs-scripts") shutil.rmtree(work_dir, ignore_errors=True) os.mkdir(work_dir) os.mkdir(rootfs_script_dir) for script in self.scripts: if script.startswith("Artifact"): # This is a script for the artifact, skip this one. continue with open(os.path.join(rootfs_script_dir, script), "w") as fd: if script in test_set["FailureScript"]: fd.write(script_failure_content) else: fd.write(script_content) os.fchmod(fd.fileno(), 0o0755) # Write this again in case it was corrupted above. with open(os.path.join(rootfs_script_dir, "version"), "w") as fd: fd.write("2") # Then zip and copy them to QEMU host. subprocess.check_call( ["tar", "czf", "../rootfs-scripts.tar.gz", "."], cwd=rootfs_script_dir) # Stop client first to avoid race conditions. mender_device.run("systemctl stop %s" % client_service_name) try: mender_device.put(os.path.join(work_dir, "rootfs-scripts.tar.gz"), remote_path="/") mender_device.run("mkdir -p cd /etc/mender/scripts && " + "cd /etc/mender/scripts && " + "tar xzf /rootfs-scripts.tar.gz && " + "rm -f /rootfs-scripts.tar.gz") finally: mender_device.run("systemctl start %s" % client_service_name) # Put artifact-scripts in the artifact. artifact_script_dir = os.path.join(work_dir, "artifact-scripts") os.mkdir(artifact_script_dir) for script in self.scripts: if not script.startswith("Artifact"): # Not an artifact script, skip this one. continue with open(os.path.join(artifact_script_dir, script), "w") as fd: if script in test_set["FailureScript"]: fd.write(script_failure_content) else: fd.write(script_content) if test_set.get("CorruptDataScriptVersionIn") == script: fd.write( "printf '1000' > /data/mender/scripts/version\n") if test_set.get("CorruptEtcScriptVersionIn") == script: fd.write( "printf '1000' > /etc/mender/scripts/version\n") if test_set.get("RestoreEtcScriptVersionIn") == script: fd.write("printf '2' > /etc/mender/scripts/version\n") # Callback for our custom artifact maker def make_artifact(filename, artifact_name): return image.make_module_artifact( "module-state-scripts-test", conftest.machine_name, artifact_name, filename, scripts=[artifact_script_dir], ) # Now create the artifact, and make the deployment. device_id = Helpers.ip_to_device_id_map( MenderDeviceGroup([mender_device.host_string ]))[mender_device.host_string] deployment_id = common_update_procedure( verify_status=False, devices=[device_id], scripts=[artifact_script_dir], make_artifact=make_artifact, )[0] if test_set["ExpectedStatus"] is None: # In this case we don't expect the deployment to even be # attempted, presumably due to failing Idle/Sync/Download # scripts on the client. So no deployment checking. Just wait # until there is at least one Error script in the log, which # will always be the case if ExpectedStatus is none (since one # of them is preventing the update from being attempted). def fetch_info(cmd_list): all_output = "" for cmd in cmd_list: output = mender_device.run(cmd, warn_only=True) logger.error("%s:\n%s" % (cmd, output)) all_output += "%s\n" % output return all_output info_query = [ "cat /data/test_state_scripts.log 1>&2", "journalctl -u %s" % client_service_name, "top -n5 -b", "ls -l /proc/`pgrep mender`/fd", "for fd in /proc/`pgrep mender`/fdinfo/*; do echo $fd:; cat $fd; done", ] starttime = time.time() while starttime + 60 * 60 >= time.time(): output = mender_device.run( "grep Error /data/test_state_scripts.log", warn_only=True) if output.rstrip() != "": # If it succeeds, stop. break else: fetch_info(info_query) time.sleep(10) continue else: info = fetch_info(info_query) pytest.fail( 'Waited too long for "Error" to appear in log:\n%s' % info) else: deploy.check_expected_statistics(deployment_id, test_set["ExpectedStatus"], 1) # Always give the client a little bit of time to settle in the base # state after an update. time.sleep(10) output = mender_device.run("cat /data/test_state_scripts.log") self.verify_script_log_correct(test_set, output.split("\n")) except: output = mender_device.run("cat /data/mender/deployment*.log", warn_only=True) logger.info(output) raise finally: shutil.rmtree(work_dir, ignore_errors=True) if deployment_id: try: deploy.abort(deployment_id) except: pass mender_device.run( ("systemctl stop %s && " + "rm -f /data/test_state_scripts.log && " + "rm -rf /etc/mender/scripts && " + "rm -rf /data/mender/scripts && " + "systemctl start %s") % (client_service_name, client_service_name))
def test_reboot_recovery( self, class_persistent_setup_client_state_scripts_update_module, description, test_set, ): mender_device = class_persistent_setup_client_state_scripts_update_module.device work_dir = "test_state_scripts.%s" % mender_device.host_string script_content = ( '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n' ) script_failure_content = (script_content + "sync\necho b > /proc/sysrq-trigger\n" ) # flush to disk before killing # This is only needed in the case: die commit-leave, # otherwise the device will get stuck in a boot-reboot loop script_reboot_once = """#!/bin/sh if [ $(grep -c $(basename $0) /data/test_state_scripts.log) -eq 0 ]; then echo "$(basename $0)" >> /data/test_state_scripts.log && sync && echo b > /proc/sysrq-trigger fi echo "$(basename $0)" >> /data/test_state_scripts.log exit 0""" # Put artifact-scripts in the artifact. artifact_script_dir = os.path.join(work_dir, "artifact-scripts") if os.path.exists(work_dir): shutil.rmtree(work_dir, ignore_errors=True) os.mkdir(work_dir) os.mkdir(artifact_script_dir) for script in test_set.get("ScriptOrder"): if not script.startswith("Artifact"): # Not an artifact script, skip this one. continue with open(os.path.join(artifact_script_dir, script), "w") as fd: if script in test_set.get("RebootScripts", []): fd.write(script_failure_content) if script in test_set.get("RebootOnceScripts", []): fd.write(script_reboot_once) else: fd.write(script_content) # Now create the artifact, and make the deployment. device_id = Helpers.ip_to_device_id_map( MenderDeviceGroup([mender_device.host_string ]))[mender_device.host_string] host_ip = (class_persistent_setup_client_state_scripts_update_module. get_virtual_network_host_ip()) def make_artifact(filename, artifact_name): return image.make_module_artifact( "module-state-scripts-test", conftest.machine_name, artifact_name, filename, scripts=[artifact_script_dir], ) with mender_device.get_reboot_detector(host_ip) as reboot_detector: common_update_procedure( verify_status=True, devices=[device_id], scripts=[artifact_script_dir], make_artifact=make_artifact, ) try: reboot_detector.verify_reboot_performed() # wait until the last script has been run logger.debug("Wait until the last script has been run") script_logs = "" timeout = time.time() + 60 * 60 while timeout >= time.time(): time.sleep(3) try: script_logs = mender_device.run( "cat /data/test_state_scripts.log") if test_set.get( "ExpectedScriptFlow")[-1] in script_logs: break except EOFError: # In some cases the SSH connection raises here EOF due to the # client simulating powerloss. The test will just retry pass else: pytest.fail( "Timeout waiting for ExpectedScriptFlow in state scripts. Expected %s, got %s" % ( test_set.get("ExpectedScriptFlow"), ", ".join(script_logs.rstrip().split("\n")), )) assert script_logs.split() == test_set.get( "ExpectedScriptFlow") except: output = mender_device.run("cat /data/mender/deployment*.log", warn_only=True) logger.info(output) raise finally: client_service_name = mender_device.get_client_service_name() mender_device.run( ("systemctl stop %s && " + "rm -f /data/test_state_scripts.log && " + "rm -rf /etc/mender/scripts && " + "rm -rf /data/mender/scripts && " + "systemctl start %s") % (client_service_name, client_service_name))
def test_reboot_recovery(self, standard_setup_one_client_bootstrapped, description, test_set, valid_image): mender_device = standard_setup_one_client_bootstrapped.device work_dir = "test_state_scripts.%s" % mender_device.host_string script_content = ( '#!/bin/sh\n\necho "$(basename $0)" >> /data/test_state_scripts.log\n' ) script_failure_content = (script_content + "sync\necho b > /proc/sysrq-trigger\n" ) # flush to disk before killing # This is only needed in the case: die commit-leave, # otherwise the device will get stuck in a boot-reboot loop script_reboot_once = """#!/bin/sh if [ $(grep -c $(basename $0) /data/test_state_scripts.log) -eq 0 ]; then echo "$(basename $0)" >> /data/test_state_scripts.log && sync && echo b > /proc/sysrq-trigger fi echo "$(basename $0)" >> /data/test_state_scripts.log exit 0""" # Put artifact-scripts in the artifact. artifact_script_dir = os.path.join(work_dir, "artifact-scripts") if os.path.exists(work_dir): shutil.rmtree(work_dir, ignore_errors=True) os.mkdir(work_dir) os.mkdir(artifact_script_dir) new_rootfs = os.path.join(work_dir, "rootfs.ext4") shutil.copy(valid_image, new_rootfs) ps = subprocess.Popen(["debugfs", "-w", new_rootfs], stdin=subprocess.PIPE) ps.stdin.write(b"cd /etc/mender\n" b"mkdir scripts\n" b"cd scripts\n") ps.stdin.close() ps.wait() for script in test_set.get("ScriptOrder"): if not script.startswith("Artifact"): # Not an artifact script, skip this one. continue with open(os.path.join(artifact_script_dir, script), "w") as fd: if script in test_set.get("RebootScripts", []): fd.write(script_failure_content) if script in test_set.get("RebootOnceScripts", []): fd.write(script_reboot_once) else: fd.write(script_content) # Now create the artifact, and make the deployment. device_id = Helpers.ip_to_device_id_map( MenderDeviceGroup([mender_device.host_string ]))[mender_device.host_string] host_ip = standard_setup_one_client_bootstrapped.get_virtual_network_host_ip( ) with mender_device.get_reboot_detector(host_ip) as reboot_detector: common_update_procedure( install_image=new_rootfs, verify_status=True, devices=[device_id], scripts=[artifact_script_dir], )[0] try: orig_part = mender_device.get_active_partition() # handle case where the client has not finished the update # path on the committed partition, but new partition is installed, # thus we will not get a valid entrypoint into the uncommitted parition(reboot_leave) # and the client will thus reboot straight after starting, and u-boot will # fall back to the committed partition if test_set.get("DoubleReboot", False): reboot_detector.verify_reboot_performed( number_of_reboots=2) else: reboot_detector.verify_reboot_performed() # wait until the last script has been run logger.debug("Wait until the last script has been run") script_logs = "" timeout = time.time() + 60 * 60 while timeout >= time.time(): time.sleep(3) try: script_logs = mender_device.run( "cat /data/test_state_scripts.log") if test_set.get( "ExpectedScriptFlow")[-1] in script_logs: break except EOFError: # In some cases the SSH connection raises here EOF due to the # client simulating powerloss. The test will just retry pass else: pytest.fail( "Timeout waiting for ExpectedScriptFlow in state scripts. Expected %s, got %s" % ( test_set.get("ExpectedScriptFlow"), ", ".join(script_logs.rstrip().split("\n")), )) # make sure the client ended up on the right partition if "OtherPartition" in test_set.get("ExpectedFinalPartition", []): assert orig_part != mender_device.get_active_partition() else: assert orig_part == mender_device.get_active_partition() assert script_logs.split() == test_set.get( "ExpectedScriptFlow") except: output = mender_device.run("cat /data/mender/deployment*.log", warn_only=True) logger.info(output) raise finally: client_service_name = mender_device.get_client_service_name() mender_device.run( ("systemctl stop %s && " + "rm -f /data/test_state_scripts.log && " + "rm -rf /etc/mender/scripts && " + "rm -rf /data/mender/scripts && " + "systemctl start %s") % (client_service_name, client_service_name))