def test_inference_job_use_alias_to_run(args): job_spec = utils.gen_default_job_description( "inference", args.email, args.uid, args.vc, cmd="echo dummy `whoami` ; sleep 120") def satisified(expected, times, log): """ return True on found `expected` occurs `times` times in `log` """ start = 0 for _ in range(times): end = log.find(expected, start) if end == -1: return False start = end + 1 return True expected_word = "dummy %s" % (args.email.split("@")[0]) with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling"}) assert state == "running" for _ in range(300): log = utils.get_job_log(args.rest, args.email, job.jid) if satisified(expected_word, 2, log): break time.sleep(0.5) assert satisified(expected_word, 2, log), 'log is %s' % (log)
def test_data_job_running(args): expected_state = "finished" expected_word = "wantThisInLog" cmd = "mkdir -p /tmp/dlts_test_dir; " \ "echo %s > /tmp/dlts_test_dir/testfile; " \ "cd /DataUtils; " \ "./copy_data.sh /tmp/dlts_test_dir adl://indexserveplatform-experiment-c09.azuredatalakestore.net/local/dlts_test_dir True 4194304 4 2 >/dev/null 2>&1;" \ "./copy_data.sh adl://indexserveplatform-experiment-c09.azuredatalakestore.net/local/dlts_test_dir /tmp/dlts_test_dir_copyback False 33554432 4 2 >/dev/null 2>&1;" \ "cat /tmp/dlts_test_dir_copyback/testfile; " % expected_word image = "indexserveregistry.azurecr.io/dlts-data-transfer-image:latest" job_spec = utils.gen_default_job_description("data", args.email, args.uid, args.vc, cmd=cmd, image=image) with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling", "running"}) assert expected_state == state for _ in range(10): log = utils.get_job_log(args.rest, args.email, job.jid) if expected_word in log: break time.sleep(0.5) assert expected_word in log, 'assert {} in {}'.format( expected_word, log)
def test_blobfuse(args): path = "/tmp/blob/${DLTS_JOB_ID}" cmd = "echo dummy > %s; cat %s ; rm %s ;" % (path, path, path) job_spec = utils.gen_default_job_description("regular", args.email, args.uid, args.vc, cmd=cmd) job_spec["plugins"] = utils.load_azure_blob_config(args.config, "/tmp/blob") with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling", "running"}) assert state == "finished", "state is not finished, but %s" % state for _ in range(5): log = utils.get_job_log(args.rest, args.email, job.jid) if log.find("dummy") != -1: break time.sleep(0.5) assert log.find("dummy") != -1, "could not find dummy in log %s" % ( log)
def job_status(job_id): """Get job status **Example request** .. sourcecode:: http POST /getDeploymentStatus/<job_id> HTTP/1.1 { "token": "<token>" } **Example response** .. sourcecode:: http HTTP/1.1 200 OK Content-Encoding: gzip Content-Type: application/json Server: nginx/1.1.19 Vary: Accept-Encoding { "status": "OK", "job_status": "Completed" } :jsonparam string token: Authentication token :statuscode 200: no error :statuscode 401: not authorized :>json string job_status: Job status """ res_dict = {} job = q.fetch_job(job_id) if job: if not job.id in current_user.jobs: raise PermissionDenied(current_user) status = job.get_status() res_dict['result'] = job.result else: status = 'Does not exist' if status == 'failed': res_dict['job_log'] = get_job_log(current_user.username, job_id) elif status == 'finished': status = 'Completed' res_dict['job_status'] = status return make_response(**res_dict)
def test_sudo_installed(args): cmd = "sudo ls" image = "pytorch/pytorch:latest" # no sudo installed in this image job_spec = utils.gen_default_job_description( "regular", args.email, args.uid, args.vc, cmd=cmd, image=image, ) with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling", "running"}) log = utils.get_job_log(args.rest, args.email, job.jid) assert state == "finished"
def get_log(job_id): """Get log for job **Example request** .. sourcecode:: http POST /getLog/<job_id> HTTP/1.1 { "token": "<token>", "line_num": 10 } **Example response** .. sourcecode:: http HTTP/1.1 200 OK Content-Encoding: gzip Content-Type: application/json Server: nginx/1.1.19 Vary: Accept-Encoding { "status": "OK", "log": "<log lines>" } :jsonparam string token: Authentication token :jsonparam integer line_num: Number of log lines to return(max 100, 10 default) :statuscode 200: no error :statuscode 401: not authorized :>json string log: Last logs """ data = check_args(tuple()) log = get_job_log(data['username'], job_id) return make_response(log=log)
def test_distributed_job_running(args, preemptable=False): expected = "wantThisInLog" cmd = "echo %s ; sleep 120" % expected job_spec = utils.gen_default_job_description("distributed", args.email, args.uid, args.vc, preemptable=preemptable, cmd=cmd) with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling"}) assert state == "running" for _ in range(50): log = utils.get_job_log(args.rest, args.email, job.jid) if expected in log: break time.sleep(0.5) assert expected in log, "assert {} in {}".format(expected, log)
def test_do_not_expose_private_key(args): cmd = "echo a ; printenv DLTS_SSH_PRIVATE_KEY ; echo b" job_spec = utils.gen_default_job_description("regular", args.email, args.uid, args.vc, cmd=cmd) with utils.run_job(args.rest, job_spec) as job: state = job.block_until_state_not_in( {"unapproved", "queued", "scheduling", "running"}) assert state == "finished" expected = "a\nb" for _ in range(50): log = utils.get_job_log(args.rest, args.email, job.jid) if expected in log: break time.sleep(0.5) assert expected in log, 'assert {} in {}'.format(expected, log)