Ejemplo n.º 1
0
def test_node_tunnel_fall_back_when_local_port_taken():
    """Checks that a tunnel will fall back to a random port if local port is
        taken."""
    user = USER_53
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        tunnel_1 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_1))
        assert tunnel_1.here == here

        tunnel_2 = node.tunnel(there=there, here=here)
        stack.enter_context(close_tunnel_on_exit(tunnel_2))
        assert tunnel_2.here != here
Ejemplo n.º 2
0
def test_dask_deployment_with_redeploy_failure():
    user = USER_42
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        def fake_validate_worker(worker: DaskWorkerDeployment):
            print("Fake worker validation.")
            raise ValueError("Fake worker validation fail.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with pytest.raises(RuntimeError):
                with deploy_dask_on_testing_cluster(nodes):
                    pass

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
Ejemplo n.º 3
0
def test_remove_runtime_dir_test():
    user = USER_15
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]
        try:
            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            check_will_remove_empty(node=node)
            check_will_ignore_non_existent(node=node)
            check_will_remove_files(node=node)
            check_will_not_remove_dotfiles(node=node)
            check_will_not_remove_nested_dirs(node=node)
        finally:
            node.run("rm -rf *")
Ejemplo n.º 4
0
def check_remote_key_and_node_access(stack: ExitStack, user: str):
    public_key_value = get_public_key_value()

    cluster = show_cluster(name=TEST_CLUSTER)
    node = cluster.get_access_node()
    with set_password(get_test_user_password(user)):
        assert node.run('whoami') == user
    assert node.run('whoami') == user

    node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format(
        public_key_value=public_key_value))

    with pytest.raises(RuntimeError):
        node.run(
            "grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format(
                public_key_value=public_key_value))

    nodes = cluster.allocate_nodes(nodes=2,
                                   cores=1,
                                   memory_per_node=MiB(100),
                                   walltime=Walltime(minutes=30))
    stack.enter_context(cancel_on_exit(nodes))
    print(nodes)

    nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
    node.run("grep '{public_key_value}' ~/.ssh/authorized_keys.idact".format(
        public_key_value=public_key_value))

    # Access to node without password works.
    assert nodes[0].run('whoami') == user

    check_direct_access_from_access_node_does_not_work(nodes[0])
Ejemplo n.º 5
0
def test_sbatch_arguments_unsupported_provided():
    params = AllocationParameters(nodes=1,
                                  cores=2,
                                  walltime=Walltime(minutes=10),
                                  memory_per_node=GiB(1))
    params.all['Provided Unsupported Param'] = 12
    with pytest.raises(ValueError):
        SbatchArguments(params=params)
Ejemplo n.º 6
0
def test_allocation_parameters_create():
    """Tests construction of allocation parameters."""
    params = AllocationParameters(nodes=1,
                                  cores=2,
                                  memory_per_node=GiB(1),
                                  walltime=Walltime(minutes=10),
                                  native_args={
                                      '--abc': None,
                                      '--def': '80'
                                  })

    assert params.all == {
        'nodes': 1,
        'cores': 2,
        'memory_per_node': GiB(1),
        'walltime': Walltime(minutes=10)
    }
    assert params.nodes == 1
    assert params.cores == 2
    assert params.memory_per_node == GiB(1)
    assert params.walltime == Walltime(minutes=10)
    assert params.native_args == {'--abc': None, '--def': '80'}
Ejemplo n.º 7
0
def test_sbatch_arguments_create():
    params = AllocationParameters(nodes=1,
                                  cores=2,
                                  memory_per_node=GiB(1),
                                  walltime=Walltime(minutes=10))

    sbatch_args = SbatchArguments(params=params)

    assert sbatch_args.native_args == {}
    assert sbatch_args.args == {'--nodes': '1',
                                '--cpus-per-task': '2',
                                '--mem': '1048576K',
                                '--time': '0-00:10:00'}
Ejemplo n.º 8
0
def test_node_tunnel():
    """Allocates a node and creates a tunnel."""
    user = USER_5
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
Ejemplo n.º 9
0
def test_node_tunnel_fall_back_when_local_port_free_but_fails():
    """Checks that a tunnel will fall back to a random port if local port is
        is initially free, but tunnel cannot be created anyway (e.g. another
        process binds to it at the last moment)."""
    user = USER_54
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        node = nodes[0]
        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        there = get_free_remote_port(node=node)
        here = get_free_local_port()

        real_build_tunnel = idact.detail.nodes.node_impl.build_tunnel
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

        tries = [0]

        def fake_build_tunnel(*args, **kwargs) -> TunnelInternal:
            tries[0] += 1
            if tries[0] == 1:
                raise RuntimeError("Fake failure.")
            if tries[0] != 2:
                assert False

            return real_build_tunnel(*args, **kwargs)

        try:
            idact.detail.nodes.node_impl.build_tunnel = fake_build_tunnel
            tunnel = node.tunnel(there=there, here=here)
            stack.enter_context(close_tunnel_on_exit(tunnel))
            assert tries[0] == 2
            assert tunnel.here != here
        finally:
            idact.detail.nodes.node_impl.build_tunnel = real_build_tunnel
            sock.close()
Ejemplo n.º 10
0
def test_jupyter_deployment():
    user = USER_6
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_jupyter(nodes):
            pass
Ejemplo n.º 11
0
def test_node_tunnel_public_key():
    """Allocates a node and creates a tunnel, uses public key authentication.
    """
    user = USER_13
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(
            reset_environment(user=user, auth=AuthMethod.PUBLIC_KEY))

        cluster = show_cluster(name=TEST_CLUSTER)

        with set_password(get_test_user_password(user)):
            nodes = cluster.allocate_nodes(nodes=1,
                                           cores=1,
                                           memory_per_node=MiB(100),
                                           walltime=Walltime(minutes=30))
        run_tunnel_test(user=user, nodes=nodes)
Ejemplo n.º 12
0
def test_able_to_reach_nodes_when_using_password_based_authentication():
    """It should be possible to connect to compute nodes even when using
        password-based authentication, because local public key is authorized
        for the compute nodes after initial connection.
        However, direct connection from access node should fail.
        Password is still used between the client and the access node."""
    user = USER_10
    with ExitStack() as stack:
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user=user, auth=AuthMethod.ASK))
        stack.enter_context(set_password(get_test_user_password(user)))
        stack.enter_context(disable_pytest_stdin())
        cluster = show_cluster(TEST_CLUSTER)
        node = cluster.get_access_node()

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        print(nodes)

        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)

        compute_node = nodes[0]
        assert isinstance(compute_node, NodeInternal)

        public_key_value = get_public_key_value()

        # Local key was installed for the deployed sshd, allowing access
        # between the access node and compute nodes.
        assert nodes[0].run('whoami') == user

        # Local key was not installed for the access node
        with pytest.raises(RuntimeError):
            node.run("grep '{public_key_value}' ~/.ssh/authorized_keys".format(
                public_key_value=public_key_value))

        # But it was installed for compute nodes.
        node.run("grep '{public_key_value}'"
                 " ~/.ssh/authorized_keys.idact".format(
                     public_key_value=public_key_value))

        check_direct_access_from_access_node_does_not_work(nodes[0])
Ejemplo n.º 13
0
def test_basic():
    user = USER_1
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        clusters = show_clusters()
        print(clusters)

        assert len(clusters) == 1

        cluster = show_cluster(name=TEST_CLUSTER)
        print(cluster)

        assert clusters[TEST_CLUSTER] == cluster

        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30),
                                       native_args={'--partition': 'debug'})
        with cancel_on_exit(nodes):
            assert len(nodes) == 2
            assert nodes[0] in nodes
            print(nodes)
            assert str(nodes) == repr(nodes)

            nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
            assert nodes.running()

            print(nodes)
            print(nodes[0])

            assert nodes[0].run('whoami') == user
            assert nodes[1].run('whoami') == user

        assert not nodes.running()
        with pytest.raises(RuntimeError):
            nodes.wait()
        with pytest.raises(RuntimeError):
            nodes[0].run('whoami')
Ejemplo n.º 14
0
def test_dask_deployment_with_redeploy_on_validation_failure():
    user = USER_41
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        assert idact.detail.dask.deploy_dask_impl.validate_worker
        stored_validate_worker = \
            idact.detail.dask.deploy_dask_impl.validate_worker

        fake_validation_counter = [0]

        # pylint: disable=unused-argument
        def fake_validate_worker(worker: DaskWorkerDeployment):
            current_count = fake_validation_counter[0]
            fake_validation_counter[0] = current_count + 1

            print("Fake worker validation.")
            if current_count == 0:
                raise RuntimeError("Fake worker validation: First node fail.")
            print("Deciding the worker is valid.")

        try:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                fake_validate_worker

            with deploy_dask_on_testing_cluster(nodes):
                pass

            assert fake_validation_counter[0] == 3

        finally:
            idact.detail.dask.deploy_dask_impl.validate_worker = \
                stored_validate_worker
Ejemplo n.º 15
0
def test_dask_deployment_with_absolute_scratch_path():
    user = USER_24
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        cluster.config.scratch = '/home/user-24'

        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=10))
        stack.enter_context(cancel_on_exit(nodes))

        with deploy_dask_on_testing_cluster(nodes):
            pass
Ejemplo n.º 16
0
def test_dask_deployment_with_setup_actions():
    user = USER_18
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        nodes = cluster.allocate_nodes(nodes=2,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))

        cluster.config.setup_actions.dask = ['echo ABC > file.txt',
                                             'mv file.txt file2.txt']
        with deploy_dask_on_testing_cluster(nodes) as node:
            assert node.run("cat file2.txt") == "ABC"
Ejemplo n.º 17
0
def test_format_sbatch_allocation_request():
    params = AllocationParameters(nodes=1,
                                  cores=2,
                                  memory_per_node=GiB(1),
                                  walltime=Walltime(minutes=10),
                                  native_args={'--arg1': 'def; rm -rf /abc &&',
                                               '--arg2': None,
                                               '--arg3': 'a b c',
                                               '--arg4 ||': '3',
                                               'arg5': '3#',
                                               '--mem': '8G'})

    args = SbatchArguments(params=params)

    formatted = format_sbatch_allocation_request(
        args=args,
        entry_point_script='/home/user/script')

    expected = ("sbatch"
                " --arg1 'def; rm -rf /abc &&'"
                " --arg2"
                " --arg3 'a b c'"
                " '--arg4 ||' 3"
                " --mem 8G"
                " arg5 '3#'"
                " --cpus-per-task 2"
                " --mem 1048576K"
                " --nodes 1"
                " --time 0-00:10:00"
                " --tasks-per-node=1"
                " --parsable"
                " --output=/dev/null"
                " --wrap='export IDACT_ALLOCATION_ID=$SLURM_JOB_ID"
                " ; srun /home/user/script'")
    print()
    print(formatted)
    print(expected)

    assert formatted == expected
Ejemplo n.º 18
0
def test_generic_deployment():
    user = USER_7
    with ExitStack() as stack:
        stack.enter_context(disable_pytest_stdin())
        stack.enter_context(set_up_key_location(user))
        stack.enter_context(reset_environment(user))
        stack.enter_context(set_password(get_test_user_password(user)))

        cluster = show_cluster(name=TEST_CLUSTER)
        print(cluster)
        nodes = cluster.allocate_nodes(nodes=1,
                                       cores=1,
                                       memory_per_node=MiB(100),
                                       walltime=Walltime(minutes=30))
        stack.enter_context(cancel_on_exit(nodes))
        node = nodes[0]

        nodes.wait(timeout=SLURM_WAIT_TIMEOUT)
        assert nodes.running()

        assert isinstance(node, NodeInternal)
        runtime_dir = create_runtime_dir(node=node)
        stack.enter_context(
            remove_runtime_dir_on_failure(node=node, runtime_dir=runtime_dir))
        script_contents = "echo ABC && sleep 30"

        assert isinstance(node, NodeInternal)
        deployment = deploy_generic(node=node,
                                    script_contents=script_contents,
                                    runtime_dir=runtime_dir)
        with cancel_on_exit(deployment):
            print(deployment)

            node.run("kill -0 {pid}".format(pid=deployment.pid))

        with pytest.raises(RuntimeError):
            node.run("kill -0 {pid}".format(pid=deployment.pid))