def test_token_token_expiration(self): """ verify that an expired token is handled correctly (client gets a new, valid one) and that deployments are still recieved by the client """ if not env.host_string: execute(self.test_token_token_expiration, hosts=get_mender_clients()) return timeout_time = int(time.time()) + 60 while int(time.time()) < timeout_time: with quiet(): output = run("journalctl -u mender -l --no-pager | grep \"received new authorization data\"") time.sleep(1) if output.return_code == 0: logging.info("mender logs indicate new authorization data available") break if timeout_time <= int(time.time()): pytest.fail("timed out waiting for download retries") # this call verifies that the deployment process goes into an "inprogress" state # which is only possible when the client has a valid token. common_update_procedure(install_image=conftest.get_valid_image())
def test_image_download_retry_timeout( self, test_set, install_image=conftest.get_valid_image()): """ Install an update, and block storage connection when we detect it's being copied over to the inactive parition. The test should result in a successful download retry. """ if not env.host_string: execute(self.test_image_download_retry_timeout, test_set, hosts=get_mender_clients(), install_image=install_image) return # make tcp timeout quicker, none persistent changes run("echo 2 > /proc/sys/net/ipv4/tcp_keepalive_time") run("echo 2 > /proc/sys/net/ipv4/tcp_keepalive_intvl") run("echo 3 > /proc/sys/net/ipv4/tcp_syn_retries") # to speed up timeouting client connection run("echo 1 > /proc/sys/net/ipv4/tcp_keepalive_probes") inactive_part = Helpers.get_passive_partition() token = Helpers.place_reboot_token() if test_set['blockAfterStart']: # Block after we start the download. deployment_id, new_yocto_id = common_update_procedure( install_image) for _ in range(60): time.sleep(0.5) with quiet(): # make sure we are writing to the inactive partition output = run("fuser -mv %s" % (inactive_part)) if output.return_code == 0: break else: pytest.fail("Download never started?") # use iptables to block traffic to storage Helpers.gateway_connectivity(False, hosts=["s3.docker.mender.io" ]) # disable connectivity if not test_set['blockAfterStart']: # Block before we start the download. deployment_id, new_yocto_id = common_update_procedure( install_image) # re-enable connectivity after 2 retries self.wait_for_download_retry_attempts(test_set['logMessageToLookFor']) Helpers.gateway_connectivity(True, hosts=["s3.docker.mender.io" ]) # re-enable connectivity token.verify_reboot_performed() token = Helpers.place_reboot_token() assert Helpers.get_active_partition() == inactive_part assert Helpers.yocto_id_installed_on_machine() == new_yocto_id token.verify_reboot_not_performed()
def test_deployed_during_network_outage( self, install_image=conftest.get_valid_image()): """ Install a valid upgrade image while there is no network availability on the device Re-establishing the network connectivity results in the upgrade to be triggered. Emulate a flaky network connection, and ensure that the deployment still succeeds. """ if not env.host_string: execute(self.test_deployed_during_network_outage, hosts=get_mender_clients(), install_image=install_image) return Helpers.gateway_connectivity(False) deployment_id, expected_yocto_id = common_update_procedure( install_image, verify_status=False) time.sleep(60) for i in range(5): time.sleep(5) Helpers.gateway_connectivity(i % 2 == 0) Helpers.gateway_connectivity(True) logging.info("Network stabilized") Helpers.verify_reboot_performed() deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) assert Helpers.yocto_id_installed_on_machine() == expected_yocto_id
def test_multi_tenancy_deployment_aborting(self): """ Simply make sure we are able to run the multi tenancy setup and bootstrap 2 different devices to different tenants """ auth.reset_auth_token() users = [ { "email": "*****@*****.**", "password": "******", "username": "******", "container": "mender-client-deployment-aborting-1", } ] for user in users: auth.new_tenant(user["username"], user["email"], user["password"]) t = auth.current_tenant["tenant_token"] new_tenant_client(user["container"], t) adm.accept_devices(1) for user in users: deployment_id, _ = common_update_procedure(install_image=conftest.get_valid_image()) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", 1) execute(self.mender_log_contains_aborted_string, hosts=get_mender_client_by_container_name(user["container"]))
def test_update_image_id_already_installed( self, install_image=conftest.get_valid_image()): """Uploading an image with an incorrect name set results in failure and rollback.""" if not env.host_string: execute(self.test_update_image_id_already_installed, hosts=get_mender_clients(), install_image=install_image) return token = Helpers.place_reboot_token() deployment_id, expected_image_id = common_update_procedure( install_image, True) token.verify_reboot_performed() devices_accepted_id = [ device["device_id"] for device in adm.get_devices_status("accepted") ] deployment_id = deploy.trigger_deployment( name="New valid update", artifact_name=expected_image_id, devices=devices_accepted_id) deploy.check_expected_statistics(deployment_id, "already-installed", len(get_mender_clients())) deploy.check_expected_status("finished", deployment_id)
def test_reject_bootstrap(self): """Make sure a rejected device does not perform an upgrade, and that it gets it's auth token removed""" if not env.host_string: execute(self.test_reject_bootstrap, hosts=get_mender_clients()) return # iterate over devices and reject them for device in adm.get_devices(): adm.set_device_status(device["id"], "rejected") logging.info("Rejecting DeviceID: %s" % device["id"]) adm.check_expected_status("rejected", len(get_mender_clients())) try: deployment_id, _ = common_update_procedure( install_image=conftest.get_valid_image()) except AssertionError: logging.info("Failed to deploy upgrade to rejected device.") Helpers.verify_reboot_not_performed() # authtoken has been removed from mender-store run("strings /data/mender/mender-store | grep -q 'authtoken' || false" ) else: pytest.fail("No error while trying to deploy to rejected device") # re-accept device after test is done adm.accept_devices(1)
def test_image_download_retry_2(self, install_image=conftest.get_valid_image()): """ Block storage host (minio) by modifying the hosts file. """ if not env.host_string: execute(self.test_image_download_retry_2, hosts=get_mender_clients(), install_image=install_image) return inactive_part = Helpers.get_passive_partition() run("echo '1.1.1.1 s3.docker.mender.io' >> /etc/hosts" ) # break s3 connectivity before triggering deployment deployment_id, new_yocto_id = common_update_procedure(install_image) self.wait_for_download_retry_attempts() run("sed -i.bak '/1.1.1.1/d' /etc/hosts") Helpers.verify_reboot_performed() assert Helpers.get_active_partition() == inactive_part assert Helpers.yocto_id_installed_on_machine() == new_yocto_id Helpers.verify_reboot_not_performed()
def test_large_update_image(self): """Installing an image larger than the passive/active parition size should result in a failure.""" if not env.host_string: execute(self.test_large_update_image, hosts=get_mender_clients()) return with Helpers.RebootDetector() as reboot: deployment_id, _ = common_update_procedure(install_image="large_image.dat", regenerate_image_id=False, broken_image=True) deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients())) reboot.verify_reboot_not_performed() deploy.check_expected_status("finished", deployment_id)
def test_deployments_post_upgrade(self): adm.get_devices_status("accepted", 10) # perform upgrade devices_to_update = list(set([device["device_id"] for device in adm.get_devices_status("accepted", expected_devices=10)])) deployment_id, artifact_id = common_update_procedure("core-image-full-cmdline-%s.ext4" % conftest.machine_name, device_type="test", devices=devices_to_update) deploy.check_expected_status("finished", deployment_id) assert deploy.get_statistics(deployment_id)["success"] == 7 assert deploy.get_statistics(deployment_id)["failure"] == 3 deploy.get_status("finished")
def abort_deployment(self, abort_step=None, mender_performs_reboot=False): """ Trigger a deployment, and cancel it within 15 seconds, make sure no deployment is performed. Args: mender_performs_reboot: if set to False, a manual reboot is performed and checks are performed. if set to True, wait until device is rebooted. """ if not env.host_string: execute(self.abort_deployment, abort_step=abort_step, mender_performs_reboot=mender_performs_reboot, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() expected_partition = Helpers.get_active_partition() expected_image_id = Helpers.yocto_id_installed_on_machine() with Helpers.RebootDetector() as reboot: deployment_id, _ = common_update_procedure(install_image, verify_status=False) if abort_step is not None: deploy.check_expected_statistics(deployment_id, abort_step, len(get_mender_clients())) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", len(get_mender_clients())) # no deployment logs are sent by the client, is this expected? for d in auth_v2.get_devices(): deploy.get_logs(d["id"], deployment_id, expected_status=404) if mender_performs_reboot: # If Mender performs reboot, we need to wait for it to reboot # back into the original filesystem. reboot.verify_reboot_performed(number_of_reboots=2) else: # Else we reboot ourselves, just to make sure that we have not # unintentionally switched to the new partition. reboot.verify_reboot_not_performed() run("( sleep 10 ; reboot ) 2>/dev/null >/dev/null &") reboot.verify_reboot_performed() assert Helpers.get_active_partition() == expected_partition assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id)
def test_deployment_abortion_success(self): # maybe an acceptance test is enough for this check? if not env.host_string: execute(self.test_deployment_abortion_success, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() deployment_id, _ = common_update_procedure(install_image) Helpers.verify_reboot_performed() deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) deploy.abort_finished_deployment(deployment_id) deploy.check_expected_statistics(deployment_id, "success", len(get_mender_clients())) deploy.check_expected_status("finished", deployment_id)
def test_reject_bootstrap(self): """Make sure a rejected device does not perform an upgrade, and that it gets it's auth token removed""" if not env.host_string: execute(self.test_reject_bootstrap, hosts=get_mender_clients()) return # iterate over devices and reject them for device in adm.get_devices(): adm.set_device_status(device["id"], "rejected") logging.info("Rejecting DeviceID: %s" % device["id"]) adm.check_expected_status("rejected", len(get_mender_clients())) with Helpers.RebootDetector() as reboot: try: deployment_id, _ = common_update_procedure( install_image=conftest.get_valid_image()) except AssertionError: logging.info("Failed to deploy upgrade to rejected device.") reboot.verify_reboot_not_performed() else: # use assert to fail, so we can get backend logs pytest.fail( "no error while trying to deploy to rejected device") return finished = False # wait until auththoken is removed from file for _ in range(10): with settings(abort_exception=Exception): try: run("journalctl -u mender -l -n 3 | grep -q 'authentication request rejected'" ) except: time.sleep(30) else: finished = True break adm.accept_devices(1) if not finished: pytest.fail("failed to remove authtoken from mender-store file")
def test_unsigned_artifact_fails_deployment(self, standard_setup_with_signed_artifact_client): """ Make sure that an unsigned image fails, and is handled by the backend. Notice that this test needs a fresh new version of the backend, since we installed a signed image earlier without a verification key in mender.conf """ if not env.host_string: execute(self.test_unsigned_artifact_fails_deployment, standard_setup_with_signed_artifact_client, hosts=get_mender_clients()) return deployment_id, _ = common_update_procedure(install_image=conftest.get_valid_image()) deploy.check_expected_status("finished", deployment_id) deploy.check_expected_statistics(deployment_id, "failure", 1) for d in adm.get_devices(): assert "expecting signed artifact, but no signature file found" in \ deploy.get_logs(d["device_id"], deployment_id)
def abort_deployment(self, abort_step=None, mender_performs_reboot=False): """ Trigger a deployment, and cancel it within 15 seconds, make sure no deployment is performed. Args: mender_performs_reboot: if set to False, a manual reboot is performed and checks are performed. if set to True, wait until device is rebooted. """ if not env.host_string: execute(self.abort_deployment, abort_step=abort_step, mender_performs_reboot=mender_performs_reboot, hosts=get_mender_clients()) return install_image = conftest.get_valid_image() expected_partition = Helpers.get_active_partition() expected_image_id = Helpers.yocto_id_installed_on_machine() token = Helpers.place_reboot_token() deployment_id, _ = common_update_procedure(install_image, verify_status=False) if abort_step is not None: deploy.check_expected_statistics(deployment_id, abort_step, len(get_mender_clients())) deploy.abort(deployment_id) deploy.check_expected_statistics(deployment_id, "aborted", len(get_mender_clients())) # no deployment logs are sent by the client, is this expected? for d in adm.get_devices(): deploy.get_logs(d["device_id"], deployment_id, expected_status=404) if not mender_performs_reboot: token.verify_reboot_not_performed() run("( sleep 10 ; reboot ) 2>/dev/null >/dev/null &") token.verify_reboot_performed() assert Helpers.get_active_partition() == expected_partition assert Helpers.yocto_id_installed_on_machine() == expected_image_id deploy.check_expected_status("finished", deployment_id)
def test_update_image_breaks_networking( self, install_image="core-image-full-cmdline-vexpress-qemu-broken-network.ext4" ): """ Install an image without systemd-networkd binary existing. The network will not function, mender will not be able to send any logs. The expected status is the update will rollback, and be considered a failure """ if not env.host_string: execute(self.test_update_image_breaks_networking, hosts=get_mender_clients(), install_image=install_image) return deployment_id, _ = common_update_procedure(install_image) Helpers.verify_reboot_performed( ) # since the network is broken, two reboots will be performed, and the last one will be detected deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients()))
def test_update_image_recovery(self, install_image=conftest.get_valid_image()): """ Install an update, and reboot the system when we detect it's being copied over to the inactive parition. The test should result in a failure. """ if not env.host_string: execute(self.test_update_image_recovery, hosts=get_mender_clients(), install_image=install_image) return installed_yocto_id = Helpers.yocto_id_installed_on_machine() inactive_part = Helpers.get_passive_partition() token = Helpers.place_reboot_token() deployment_id, _ = common_update_procedure(install_image) active_part = Helpers.get_active_partition() for i in range(60): time.sleep(0.5) with quiet(): # make sure we are writing to the inactive partition output = run("fuser -mv %s" % (inactive_part)) if output.return_code == 0: run("killall -s 9 mender") with settings(warn_only=True): run("( sleep 3 ; reboot ) 2>/dev/null >/dev/null &") break logging.info("Waiting for system to finish reboot") token.verify_reboot_performed() assert Helpers.get_active_partition() == active_part token = Helpers.place_reboot_token() deploy.check_expected_statistics(deployment_id, "failure", len(get_mender_clients())) token.verify_reboot_not_performed() assert Helpers.yocto_id_installed_on_machine() == installed_yocto_id
"up", "-d"], cwd="../") assert ret == 0, "failed to start docker-compose" if args.deploy: # create account for management api auth.get_auth_token() # wait for 10 devices to be available devices = adm.get_devices(10) assert len(devices) == 10 # accept all devices for d in devices: adm.set_device_status(d["id"], "accepted") # make sure artifact tool in current workdir is being used os.environ["PATH"] = os.path.dirname(os.path.realpath(__file__)) + "/downloaded-tools" + os.pathsep + os.environ["PATH"] # perform upgrade devices_to_update = list(set([device["device_id"] for device in adm.get_devices_status("accepted", expected_devices=10)])) deployment_id, artifact_id = common_update_procedure("core-image-full-cmdline-vexpress-qemu.ext4", device_type="test", devices=devices_to_update) print("deployment_id=%s" % deployment_id) print("artifact_id=%s" % artifact_id) print("devices=%d" % len(devices)) if args.kill: subprocess.call(["docker-compose", "-p", "testprod", "down", "-v", "--remove-orphans"])
# accept all devices for d in devices: auth_v2.set_device_auth_set_status(d["id"], d["auth_sets"][0]["id"], "accepted") # make sure artifact tool in current workdir is being used os.environ["PATH"] = os.path.dirname(os.path.realpath( __file__)) + "/downloaded-tools" + os.pathsep + os.environ["PATH"] # perform upgrade devices_to_update = list( set([ device["id"] for device in auth_v2.get_devices_status("accepted", expected_devices=10) ])) deployment_id, artifact_id = common_update_procedure( "core-image-full-cmdline-%s.ext4" % machine_name, device_type="test", devices=devices_to_update) print("deployment_id=%s" % deployment_id) print("artifact_id=%s" % artifact_id) print("devices=%d" % len(devices)) if args.kill: subprocess.call([ "docker-compose", "-p", conftest.docker_compose_instance, "down", "-v", "--remove-orphans" ])
def test_update_device_group(self): """ Perform a successful upgrade on one group of devices, and assert that: * deployment status/logs are correct. * only the correct group is updated, not the other one. A reboot is performed, and running partitions have been swapped. Deployment status will be set as successful for device. Logs will not be retrieved, and result in 404. """ # Beware that there will two parallel things going on below, one for # each group, hence a lot of separate execute() calls for each. We aim # to update the group alpha, not beta. clients = get_mender_clients() assert(len(clients) == 2) alpha = clients[0] bravo = clients[1] ip_to_device_id = Helpers.ip_to_device_id_map(clients) id_alpha = ip_to_device_id[alpha] id_bravo = ip_to_device_id[bravo] print("ID of alpha host: %s\nID of bravo host: %s" % (id_alpha, id_bravo)) ret = execute(Helpers.get_passive_partition, hosts=clients) pass_part_alpha = ret[alpha] pass_part_bravo = ret[bravo] inv.put_device_in_group(id_alpha, "Update") @parallel def place_reboot_tokens(): return Helpers.place_reboot_token() tokens = execute(place_reboot_tokens, hosts=clients) deployment_id, expected_image_id = common_update_procedure(conftest.get_valid_image(), devices=[id_alpha]) @parallel def verify_reboot_performed_for_alpha_only(tokens): if env.host_string == alpha: tokens[alpha].verify_reboot_performed() elif env.host_string == bravo: # Extra long wait here, because a real update takes quite a lot # of time. tokens[bravo].verify_reboot_not_performed(180) else: raise Exception("verify_reboot_performed_for_alpha_only() called with unknown host") execute(verify_reboot_performed_for_alpha_only, tokens, hosts=clients) ret = execute(Helpers.get_passive_partition, hosts=clients) assert ret[alpha] != pass_part_alpha assert ret[bravo] == pass_part_bravo ret = execute(Helpers.get_active_partition, hosts=clients) assert ret[alpha] == pass_part_alpha assert ret[bravo] != pass_part_bravo deploy.check_expected_statistics(deployment_id, expected_status="success", expected_count=1) # No logs for either host: alpha because it was successful, bravo # because it should never have attempted an update in the first place. for id in [id_alpha, id_bravo]: deploy.get_logs(id, deployment_id, expected_status=404) assert execute(Helpers.yocto_id_installed_on_machine, hosts=alpha)[alpha] == expected_image_id assert execute(Helpers.yocto_id_installed_on_machine, hosts=bravo)[bravo] != expected_image_id # Important: Leave the groups as you found them: Empty. inv.delete_device_from_group(id_alpha, "Update")
def test_device_decommissioning(self, standard_setup_one_client): """ Decommission a device successfully """ if not env.host_string: execute(self.test_device_decommissioning, standard_setup_one_client, hosts=get_mender_clients()) return adm.check_expected_status("pending", len(get_mender_clients())) adm_id = adm.get_devices()[0]["id"] device_id = adm.get_devices()[0]["device_id"] adm.set_device_status(adm_id, "accepted") # wait until inventory is populated timeout = time.time() + (60 * 5) while time.time() < timeout: inventoryJSON = inv.get_devices() if "attributes" in inventoryJSON[0]: break time.sleep(.5) else: pytest.fail("never got inventory") # decommission actual device deviceauth.decommission(device_id) # now check that the device no longer exists in admissions timeout = time.time() + (60 * 5) while time.time() < timeout: newAdmissions = adm.get_devices()[0] if device_id != newAdmissions["device_id"] \ and adm_id != newAdmissions["id"]: logger.info("device [%s] not found in inventory [%s]" % (device_id, str(newAdmissions))) break else: logger.info("device [%s] found in inventory..." % (device_id)) time.sleep(.5) else: pytest.fail("decommissioned device still available in admissions") # make sure a deployment to the decommissioned device fails try: time.sleep( 120 ) # sometimes deployment microservice hasn't removed the device yet logger.info("attempting to deploy to decommissioned device: %s" % (device_id)) deployment_id, _ = common_update_procedure( install_image=conftest.get_valid_image(), devices=[device_id], verify_status=False) except AssertionError: logging.info("Failed to deploy upgrade to rejected device") # authtoken has been removed run("strings /data/mender/mender-store | grep -q 'authtoken' || false" ) else: pytest.fail("No error while trying to deploy to rejected device") """ at this point, the device will re-appear, since it's actually still online, and not actually decomissioned """ adm.check_expected_status("pending", len(get_mender_clients())) # make sure inventory is empty as well assert len(inv.get_devices()) == 0