def test_job_save(helper: Helper, docker: aiodocker.Docker) -> None: job_name = f"test-job-save-{uuid4().hex[:6]}" image = f"test-image:{job_name}" image_neuro_name = f"image://{helper.cluster_name}/{helper.username}/{image}" command = "sh -c 'echo -n 123 > /test; sleep 10m'" job_id_1 = helper.run_job_and_wait_state( ALPINE_IMAGE_NAME, command=command, wait_state=JobStatus.RUNNING ) img_uri = f"image://{helper.cluster_name}/{helper.username}/{image}" captured = helper.run_cli(["job", "save", job_id_1, image_neuro_name]) out = captured.out assert f"Saving job '{job_id_1}' to image '{img_uri}'..." in out assert f"Using remote image '{img_uri}'" in out assert "Creating image from the job container" in out assert "Image created" in out assert f"Using local image '{helper.username}/{image}'" in out assert "Pushing image..." in out assert out.endswith(img_uri) # wait to free the job name: helper.run_cli(["job", "kill", job_id_1]) helper.wait_job_change_state_to(job_id_1, JobStatus.CANCELLED) command = 'sh -c \'[ "$(cat /test)" = "123" ]\'' helper.run_job_and_wait_state( image_neuro_name, command=command, wait_state=JobStatus.SUCCEEDED )
def test_job_run_volume_all(helper: Helper) -> None: root_mountpoint = "/var/neuro" cmd = " && ".join( [ f"[ -d {root_mountpoint}/{helper.username} ]", f"[ -d {root_mountpoint}/neuromation ]", # must be public f"[ $NEUROMATION_ROOT == {root_mountpoint} ]", f"[ $NEUROMATION_HOME == {root_mountpoint}/{helper.username} ]", ] ) command = f"bash -c '{cmd}'" img = UBUNTU_IMAGE_NAME with pytest.raises(subprocess.CalledProcessError) as cm: # first, run without --volume=ALL captured = helper.run_cli(["--quiet", "run", "-T", img, command]) assert cm.value.returncode == 1 # then, run with --volume=ALL captured = helper.run_cli(["run", "-T", "--volume=ALL", img, command]) msg = ( "Storage mountpoints will be available as the environment variables:\n" f" NEUROMATION_ROOT={root_mountpoint}\n" f" NEUROMATION_HOME={root_mountpoint}/{helper.username}" ) assert msg in captured.out found_job_ids = re.findall("Job ID: (job-.+)", captured.out) assert len(found_job_ids) == 1 job_id = found_job_ids[0] helper.wait_job_change_state_to( job_id, JobStatus.SUCCEEDED, stop_state=JobStatus.FAILED )
def test_job_run_home_volumes_automount(helper: Helper, fakebrowser: Any) -> None: command = "[ -d /var/storage/home -a -d /var/storage/neuromation ]" with pytest.raises(subprocess.CalledProcessError) as cm: # first, run without --volume=HOME helper.run_cli([ "-q", "job", "run", "--detach", "--preset=cpu-micro", UBUNTU_IMAGE_NAME, command, ]) assert cm.value.returncode == 125 # then, run with --volume=HOME capture = helper.run_cli([ "-q", "job", "run", "--detach", "--preset=cpu-micro", "--volume", "HOME", UBUNTU_IMAGE_NAME, command, ]) job_id_2 = capture.out helper.wait_job_change_state_to(job_id_2, JobStatus.SUCCEEDED, JobStatus.FAILED)
def test_pass_config(image: str, helper: Helper) -> None: # Let`s push image captured = helper.run_cli(["image", "push", image]) image_full_str = f"image://{helper.username}/{image}" assert captured.out.endswith(image_full_str) command = 'bash -c "neuro config show"' # Run a new job captured = helper.run_cli([ "job", "run", "-q", "-s", JOB_TINY_CONTAINER_PRESET, "--no-wait-start", "--pass-config", image_full_str, command, ]) job_id = captured.out # sleep(1) # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.SUCCEEDED) # Verify exit code is returned captured = helper.run_cli(["job", "status", job_id]) store_out = captured.out assert "Exit code: 0" in store_out
def test_job_description(helper: Helper) -> None: # Remember original running jobs captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"] ) store_out_list = captured.out.split("\n")[1:] jobs_orig = [x.split(" ")[0] for x in store_out_list] description = "Test description for a job" # Run a new job command = "bash -c 'sleep 10m; false'" captured = helper.run_cli( [ "job", "submit", *JOB_TINY_CONTAINER_PARAMS, "--http", "80", "--description", description, "--non-preemptible", "--no-wait-start", UBUNTU_IMAGE_NAME, command, ] ) match = re.match("Job ID: (.+)", captured.out) assert match is not None job_id = match.group(1) # Check it was not running before assert job_id.startswith("job-") assert job_id not in jobs_orig # Check it is in a running,pending job list now captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"] ) store_out_list = captured.out.split("\n")[1:] jobs_updated = [x.split(" ")[0] for x in store_out_list] assert job_id in jobs_updated # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.RUNNING, JobStatus.FAILED) # Check that it is in a running job list captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out # Check that description is in the list assert description in store_out assert command in store_out # Check that no description is in the list if quite captured = helper.run_cli(["-q", "job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out assert description not in store_out assert command not in store_out helper.kill_job(job_id, wait=False)
def test_job_run_exit_code(helper: Helper) -> None: # Run a new job command = 'bash -c "exit 101"' captured = helper.run_cli( ["-q", "job", "run", "--no-wait-start", UBUNTU_IMAGE_NAME, command] ) job_id = captured.out # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.FAILED) # Verify exit code is returned captured = helper.run_cli(["job", "status", job_id]) store_out = captured.out assert "Exit code: 101" in store_out
def test_pass_config(helper: Helper) -> None: captured = helper.run_cli( [ "-q", "job", "run", "--no-wait-start", "--pass-config", UBUNTU_IMAGE_NAME, 'bash -c "sleep 15 && test -f $(NEURO_STEAL_CONFIG)/db"', ] ) job_id = captured.out # fails if "test -f ..." check is not succeeded helper.wait_job_change_state_to( job_id, JobStatus.SUCCEEDED, stop_state=JobStatus.FAILED )
def test_e2e_restart_failing(request: Any, helper: Helper) -> None: captured = helper.run_cli( [ "-q", "job", "run", "--restart", "on-failure", "--detach", UBUNTU_IMAGE_NAME, "false", ] ) job_id = captured.out request.addfinalizer(lambda: helper.kill_job(job_id, wait=False)) captured = helper.run_cli(["job", "status", job_id]) assert "Restart policy: on-failure" in captured.out.splitlines() helper.wait_job_change_state_to(job_id, JobStatus.RUNNING) sleep(1) helper.assert_job_state(job_id, JobStatus.RUNNING)
def test_job_description(helper: Helper) -> None: # Remember original running jobs captured = helper.run_cli(["job", "ls", "--status", "running"]) description = str(uuid4()) # Run a new job command = "bash -c 'sleep 15m; false'" captured = helper.run_cli( [ "job", "run", "--http", "80", "--description", description, "--no-wait-start", UBUNTU_IMAGE_NAME, command, ] ) match = re.match("Job ID: (.+)", captured.out) assert match is not None job_id = match.group(1) # Check it was not running before assert job_id.startswith("job-") # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.RUNNING, JobStatus.FAILED) # Check that it is in a running job list captured = helper.run_cli( ["job", "ls", "--status", "running", "--format", "{id}, {description}"] ) store_out = captured.out assert job_id in store_out # Check that description is in the list assert description in store_out helper.kill_job(job_id, wait=False)
def test_job_run(helper: Helper) -> None: job_name = f"test-job-{os.urandom(5).hex()}" # Kill another active jobs with same name, if any # Pass --owner because --name without --owner is too slow for admin users. captured = helper.run_cli( ["-q", "job", "ls", "--owner", helper.username, "--name", job_name] ) if captured.out: jobs_same_name = captured.out.split("\n") assert len(jobs_same_name) == 1, f"found multiple active jobs named {job_name}" job_id = jobs_same_name[0] helper.kill_job(job_id) # Remember original running jobs captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"] ) store_out_list = captured.out.split("\n")[1:] jobs_orig = [x.split(" ")[0] for x in store_out_list] captured = helper.run_cli( [ "job", "run", "--http", "80", "--no-wait-start", "--restart", "never", "--name", job_name, UBUNTU_IMAGE_NAME, # use unrolled notation to check shlex.join() "bash", "-c", "sleep 10m; false", ] ) match = re.match("Job ID: (.+)", captured.out) assert match is not None job_id = match.group(1) assert job_id.startswith("job-") assert job_id not in jobs_orig assert f"Name: {job_name}" in captured.out # Check it is in a running,pending job list now captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"] ) store_out_list = captured.out.split("\n")[1:] jobs_updated = [x.split(" ")[0] for x in store_out_list] assert job_id in jobs_updated # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.RUNNING) # Check that it is in a running job list captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out # Check that the command is in the list assert "bash -c 'sleep 10m; false'" in store_out helper.kill_job(job_id, wait=False)
def test_job_lifecycle(helper: Helper) -> None: job_name = f"job-{os.urandom(5).hex()}" # Kill another active jobs with same name, if any captured = helper.run_cli(["-q", "job", "ls", "--name", job_name]) if captured.out: jobs_same_name = captured.out.split("\n") assert len(jobs_same_name ) == 1, f"found multiple active jobs named {job_name}" job_id = jobs_same_name[0] helper.run_cli(["job", "kill", job_name]) helper.wait_job_change_state_from(job_id, JobStatus.RUNNING) captured = helper.run_cli(["-q", "job", "ls", "--name", job_name]) assert not captured.out # Remember original running jobs captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"]) store_out_list = captured.out.split("\n")[1:] jobs_orig = [x.split(" ")[0] for x in store_out_list] command = 'bash -c "sleep 10m; false"' captured = helper.run_cli([ "job", "submit", *JOB_TINY_CONTAINER_PARAMS, "--http", "80", "--non-preemptible", "--no-wait-start", "--name", job_name, UBUNTU_IMAGE_NAME, command, ]) match = re.match("Job ID: (.+) Status:", captured.out) assert match is not None job_id = match.group(1) assert job_id.startswith("job-") assert job_id not in jobs_orig assert f"Name: {job_name}" in captured.out assert re.search("Http URL: http", captured.out), captured.out # Check it is in a running,pending job list now captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"]) store_out_list = captured.out.split("\n")[1:] jobs_updated = [x.split(" ")[0] for x in store_out_list] assert job_id in jobs_updated # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.RUNNING) # Check that it is in a running job list captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out # Check that the command is in the list assert command in store_out # Check that no command is in the list if quite captured = helper.run_cli(["-q", "job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out assert command not in store_out # Kill the job by name captured = helper.run_cli(["job", "kill", job_name]) # Currently we check that the job is not running anymore # TODO(adavydow): replace to succeeded check when racecon in # platform-api fixed. helper.wait_job_change_state_from(job_id, JobStatus.RUNNING) # Check that it is not in a running job list anymore captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id not in store_out # Check job ls by name captured = helper.run_cli(["job", "ls", "-n", job_name, "-s", "succeeded"]) store_out = captured.out assert job_id in store_out assert job_name in store_out # Check job status by id captured = helper.run_cli(["job", "status", job_id]) store_out = captured.out assert store_out.startswith(f"Job: {job_id}\nName: {job_name}") # Check correct exit code is returned # assert "Exit code: 0" in store_out # Check job status by name captured = helper.run_cli(["job", "status", job_name]) store_out = captured.out assert store_out.startswith(f"Job: {job_id}\nName: {job_name}")
def test_job_description(helper: Helper) -> None: # Remember original running jobs captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"]) store_out_list = captured.out.split("\n")[1:] jobs_orig = [x.split(" ")[0] for x in store_out_list] description = "Test description for a job" # Run a new job command = 'bash -c "sleep 10m; false"' captured = helper.run_cli([ "job", "submit", *JOB_TINY_CONTAINER_PARAMS, "--http", "80", "--description", description, "--non-preemptible", "--no-wait-start", UBUNTU_IMAGE_NAME, command, ]) match = re.match("Job ID: (.+) Status:", captured.out) assert match is not None job_id = match.group(1) # Check it was not running before assert job_id.startswith("job-") assert job_id not in jobs_orig # Check it is in a running,pending job list now captured = helper.run_cli( ["job", "ls", "--status", "running", "--status", "pending"]) store_out_list = captured.out.split("\n")[1:] jobs_updated = [x.split(" ")[0] for x in store_out_list] assert job_id in jobs_updated # Wait until the job is running helper.wait_job_change_state_to(job_id, JobStatus.RUNNING, JobStatus.FAILED) # Check that it is in a running job list captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out # Check that description is in the list assert description in store_out assert command in store_out # Check that no description is in the list if quite captured = helper.run_cli(["-q", "job", "ls", "--status", "running"]) store_out = captured.out assert job_id in store_out assert description not in store_out assert command not in store_out # Kill the job captured = helper.run_cli(["job", "kill", job_id]) # Currently we check that the job is not running anymore # TODO(adavydow): replace to succeeded check when racecon in # platform-api fixed. helper.wait_job_change_state_from(job_id, JobStatus.RUNNING) # Check that it is not in a running job list anymore captured = helper.run_cli(["job", "ls", "--status", "running"]) store_out = captured.out assert job_id not in store_out