def test_node_tunnel_fall_back_when_local_port_taken(): """Checks that a tunnel will fall back to a random port if local port is taken.""" user = USER_53 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) there = get_free_remote_port(node=node) here = get_free_local_port() tunnel_1 = node.tunnel(there=there, here=here) stack.enter_context(close_tunnel_on_exit(tunnel_1)) assert tunnel_1.here == here tunnel_2 = node.tunnel(there=there, here=here) stack.enter_context(close_tunnel_on_exit(tunnel_2)) assert tunnel_2.here != here
def test_dask_deployment_with_redeploy_failure(): user = USER_42 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) assert idact.detail.dask.deploy_dask_impl.validate_worker stored_validate_worker = \ idact.detail.dask.deploy_dask_impl.validate_worker def fake_validate_worker(worker: DaskWorkerDeployment): print("Fake worker validation.") raise ValueError("Fake worker validation fail.") try: idact.detail.dask.deploy_dask_impl.validate_worker = \ fake_validate_worker with pytest.raises(RuntimeError): with deploy_dask_on_testing_cluster(nodes): pass finally: idact.detail.dask.deploy_dask_impl.validate_worker = \ stored_validate_worker
def test_remove_runtime_dir_test(): user = USER_15 with ExitStack() as stack: stack.enter_context(set_up_key_location(user)) stack.enter_context(disable_pytest_stdin()) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) node = nodes[0] try: nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() check_will_remove_empty(node=node) check_will_ignore_non_existent(node=node) check_will_remove_files(node=node) check_will_not_remove_dotfiles(node=node) check_will_not_remove_nested_dirs(node=node) finally: node.run("rm -rf *")
def check_remote_key_and_node_access(stack: ExitStack, user: str): public_key_value = get_public_key_value() cluster = show_cluster(name=TEST_CLUSTER) node = cluster.get_access_node() with set_password(get_test_user_password(user)): assert node.run('whoami') == user assert node.run('whoami') == user node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format( public_key_value=public_key_value)) with pytest.raises(RuntimeError): node.run( "grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format( public_key_value=public_key_value)) nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) print(nodes) nodes.wait(timeout=SLURM_WAIT_TIMEOUT) node.run("grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format( public_key_value=public_key_value)) # Access to node without password works. assert nodes[0].run('whoami') == user check_direct_access_from_access_node_does_not_work(nodes[0])
def test_sbatch_arguments_unsupported_provided(): params = AllocationParameters(nodes=1, cores=2, walltime=Walltime(minutes=10), memory_per_node=GiB(1)) params.all['Provided Unsupported Param'] = 12 with pytest.raises(ValueError): SbatchArguments(params=params)
def test_allocation_parameters_create(): """Tests construction of allocation parameters.""" params = AllocationParameters(nodes=1, cores=2, memory_per_node=GiB(1), walltime=Walltime(minutes=10), native_args={ '--abc': None, '--def': '80' }) assert params.all == { 'nodes': 1, 'cores': 2, 'memory_per_node': GiB(1), 'walltime': Walltime(minutes=10) } assert params.nodes == 1 assert params.cores == 2 assert params.memory_per_node == GiB(1) assert params.walltime == Walltime(minutes=10) assert params.native_args == {'--abc': None, '--def': '80'}
def test_sbatch_arguments_create(): params = AllocationParameters(nodes=1, cores=2, memory_per_node=GiB(1), walltime=Walltime(minutes=10)) sbatch_args = SbatchArguments(params=params) assert sbatch_args.native_args == {} assert sbatch_args.args == {'--nodes': '1', '--cpus-per-task': '2', '--mem': '1048576K', '--time': '0-00:10:00'}
def test_node_tunnel(): """Allocates a node and creates a tunnel.""" user = USER_5 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) run_tunnel_test(user=user, nodes=nodes)
def test_node_tunnel_fall_back_when_local_port_free_but_fails(): """Checks that a tunnel will fall back to a random port if local port is is initially free, but tunnel cannot be created anyway (e.g. another process binds to it at the last moment).""" user = USER_54 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) there = get_free_remote_port(node=node) here = get_free_local_port() real_build_tunnel = idact.detail.nodes.node_impl.build_tunnel sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) tries = [0] def fake_build_tunnel(*args, **kwargs) -> TunnelInternal: tries[0] += 1 if tries[0] == 1: raise RuntimeError("Fake failure.") if tries[0] != 2: assert False return real_build_tunnel(*args, **kwargs) try: idact.detail.nodes.node_impl.build_tunnel = fake_build_tunnel tunnel = node.tunnel(there=there, here=here) stack.enter_context(close_tunnel_on_exit(tunnel)) assert tries[0] == 2 assert tunnel.here != here finally: idact.detail.nodes.node_impl.build_tunnel = real_build_tunnel sock.close()
def test_jupyter_deployment(): user = USER_6 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) with deploy_jupyter(nodes): pass
def test_node_tunnel_public_key(): """Allocates a node and creates a tunnel, uses public key authentication. """ user = USER_13 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context( reset_environment(user=user, auth=AuthMethod.PUBLIC_KEY)) cluster = show_cluster(name=TEST_CLUSTER) with set_password(get_test_user_password(user)): nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) run_tunnel_test(user=user, nodes=nodes)
def test_able_to_reach_nodes_when_using_password_based_authentication(): """It should be possible to connect to compute nodes even when using password-based authentication, because local public key is authorized for the compute nodes after initial connection. However, direct connection from access node should fail. Password is still used between the client and the access node.""" user = USER_10 with ExitStack() as stack: stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user=user, auth=AuthMethod.ASK)) stack.enter_context(set_password(get_test_user_password(user))) stack.enter_context(disable_pytest_stdin()) cluster = show_cluster(TEST_CLUSTER) node = cluster.get_access_node() nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) print(nodes) nodes.wait(timeout=SLURM_WAIT_TIMEOUT) compute_node = nodes[0] assert isinstance(compute_node, NodeInternal) public_key_value = get_public_key_value() # Local key was installed for the deployed sshd, allowing access # between the access node and compute nodes. assert nodes[0].run('whoami') == user # Local key was not installed for the access node with pytest.raises(RuntimeError): node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format( public_key_value=public_key_value)) # But it was installed for compute nodes. node.run("grep '{public_key_value}'" " ~/.ssh/authorized_keys.idact".format( public_key_value=public_key_value)) check_direct_access_from_access_node_does_not_work(nodes[0])
def test_basic(): user = USER_1 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) clusters = show_clusters() print(clusters) assert len(clusters) == 1 cluster = show_cluster(name=TEST_CLUSTER) print(cluster) assert clusters[TEST_CLUSTER] == cluster nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30), native_args={'--partition': 'debug'}) with cancel_on_exit(nodes): assert len(nodes) == 2 assert nodes[0] in nodes print(nodes) assert str(nodes) == repr(nodes) nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() print(nodes) print(nodes[0]) assert nodes[0].run('whoami') == user assert nodes[1].run('whoami') == user assert not nodes.running() with pytest.raises(RuntimeError): nodes.wait() with pytest.raises(RuntimeError): nodes[0].run('whoami')
def test_dask_deployment_with_redeploy_on_validation_failure(): user = USER_41 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) assert idact.detail.dask.deploy_dask_impl.validate_worker stored_validate_worker = \ idact.detail.dask.deploy_dask_impl.validate_worker fake_validation_counter = [0] # pylint: disable=unused-argument def fake_validate_worker(worker: DaskWorkerDeployment): current_count = fake_validation_counter[0] fake_validation_counter[0] = current_count + 1 print("Fake worker validation.") if current_count == 0: raise RuntimeError("Fake worker validation: First node fail.") print("Deciding the worker is valid.") try: idact.detail.dask.deploy_dask_impl.validate_worker = \ fake_validate_worker with deploy_dask_on_testing_cluster(nodes): pass assert fake_validation_counter[0] == 3 finally: idact.detail.dask.deploy_dask_impl.validate_worker = \ stored_validate_worker
def test_dask_deployment_with_absolute_scratch_path(): user = USER_24 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) cluster.config.scratch = '/home/user-24' nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=10)) stack.enter_context(cancel_on_exit(nodes)) with deploy_dask_on_testing_cluster(nodes): pass
def test_dask_deployment_with_setup_actions(): user = USER_18 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) nodes = cluster.allocate_nodes(nodes=2, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) cluster.config.setup_actions.dask = ['echo ABC > file.txt', 'mv file.txt file2.txt'] with deploy_dask_on_testing_cluster(nodes) as node: assert node.run("cat file2.txt") == "ABC"
def test_format_sbatch_allocation_request(): params = AllocationParameters(nodes=1, cores=2, memory_per_node=GiB(1), walltime=Walltime(minutes=10), native_args={'--arg1': 'def; rm -rf /abc &&', '--arg2': None, '--arg3': 'a b c', '--arg4 ||': '3', 'arg5': '3#', '--mem': '8G'}) args = SbatchArguments(params=params) formatted = format_sbatch_allocation_request( args=args, entry_point_script='/home/user/script') expected = ("sbatch" " --arg1 'def; rm -rf /abc &&'" " --arg2" " --arg3 'a b c'" " '--arg4 ||' 3" " --mem 8G" " arg5 '3#'" " --cpus-per-task 2" " --mem 1048576K" " --nodes 1" " --time 0-00:10:00" " --tasks-per-node=1" " --parsable" " --output=/dev/null" " --wrap='export IDACT_ALLOCATION_ID=$SLURM_JOB_ID" " ; srun /home/user/script'") print() print(formatted) print(expected) assert formatted == expected
def test_generic_deployment(): user = USER_7 with ExitStack() as stack: stack.enter_context(disable_pytest_stdin()) stack.enter_context(set_up_key_location(user)) stack.enter_context(reset_environment(user)) stack.enter_context(set_password(get_test_user_password(user))) cluster = show_cluster(name=TEST_CLUSTER) print(cluster) nodes = cluster.allocate_nodes(nodes=1, cores=1, memory_per_node=MiB(100), walltime=Walltime(minutes=30)) stack.enter_context(cancel_on_exit(nodes)) node = nodes[0] nodes.wait(timeout=SLURM_WAIT_TIMEOUT) assert nodes.running() assert isinstance(node, NodeInternal) runtime_dir = create_runtime_dir(node=node) stack.enter_context( remove_runtime_dir_on_failure(node=node, runtime_dir=runtime_dir)) script_contents = "echo ABC && sleep 30" assert isinstance(node, NodeInternal) deployment = deploy_generic(node=node, script_contents=script_contents, runtime_dir=runtime_dir) with cancel_on_exit(deployment): print(deployment) node.run("kill -0 {pid}".format(pid=deployment.pid)) with pytest.raises(RuntimeError): node.run("kill -0 {pid}".format(pid=deployment.pid))