Beispiel #1
0
def test_inference_job_use_alias_to_run(args):
    job_spec = utils.gen_default_job_description(
        "inference",
        args.email,
        args.uid,
        args.vc,
        cmd="echo dummy `whoami` ; sleep 120")

    def satisified(expected, times, log):
        """ return True on found `expected` occurs `times` times in `log` """
        start = 0
        for _ in range(times):
            end = log.find(expected, start)
            if end == -1:
                return False
            start = end + 1
        return True

    expected_word = "dummy %s" % (args.email.split("@")[0])

    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling"})
        assert state == "running"

        for _ in range(300):
            log = utils.get_job_log(args.rest, args.email, job.jid)
            if satisified(expected_word, 2, log):
                break
            time.sleep(0.5)

        assert satisified(expected_word, 2, log), 'log is %s' % (log)
def test_data_job_running(args):
    expected_state = "finished"
    expected_word = "wantThisInLog"
    cmd = "mkdir -p /tmp/dlts_test_dir; " \
          "echo %s > /tmp/dlts_test_dir/testfile; " \
          "cd /DataUtils; " \
          "./copy_data.sh /tmp/dlts_test_dir adl://indexserveplatform-experiment-c09.azuredatalakestore.net/local/dlts_test_dir True 4194304 4 2 >/dev/null 2>&1;" \
          "./copy_data.sh adl://indexserveplatform-experiment-c09.azuredatalakestore.net/local/dlts_test_dir /tmp/dlts_test_dir_copyback False 33554432 4 2 >/dev/null 2>&1;" \
          "cat /tmp/dlts_test_dir_copyback/testfile; " % expected_word

    image = "indexserveregistry.azurecr.io/dlts-data-transfer-image:latest"

    job_spec = utils.gen_default_job_description("data",
                                                 args.email,
                                                 args.uid,
                                                 args.vc,
                                                 cmd=cmd,
                                                 image=image)
    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling", "running"})
        assert expected_state == state

        for _ in range(10):
            log = utils.get_job_log(args.rest, args.email, job.jid)
            if expected_word in log:
                break
            time.sleep(0.5)
        assert expected_word in log, 'assert {} in {}'.format(
            expected_word, log)
def test_blobfuse(args):
    path = "/tmp/blob/${DLTS_JOB_ID}"
    cmd = "echo dummy > %s; cat %s ; rm %s ;" % (path, path, path)

    job_spec = utils.gen_default_job_description("regular",
                                                 args.email,
                                                 args.uid,
                                                 args.vc,
                                                 cmd=cmd)

    job_spec["plugins"] = utils.load_azure_blob_config(args.config,
                                                       "/tmp/blob")

    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling", "running"})
        assert state == "finished", "state is not finished, but %s" % state

        for _ in range(5):
            log = utils.get_job_log(args.rest, args.email, job.jid)
            if log.find("dummy") != -1:
                break
            time.sleep(0.5)

        assert log.find("dummy") != -1, "could not find dummy in log %s" % (
            log)
Beispiel #4
0
def job_status(job_id):
    """Get job status

    **Example request**

    .. sourcecode:: http

        POST /getDeploymentStatus/<job_id> HTTP/1.1
        {
            "token": "<token>"
        }

    **Example response**

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Encoding: gzip
        Content-Type: application/json
        Server: nginx/1.1.19
        Vary: Accept-Encoding

        {
            "status": "OK",
            "job_status": "Completed"
        }

    :jsonparam string token: Authentication token
    :statuscode 200: no error
    :statuscode 401: not authorized
    :>json string job_status: Job status
    """
    res_dict = {}

    job = q.fetch_job(job_id)
    if job:
        if not job.id in current_user.jobs:
            raise PermissionDenied(current_user)
        status = job.get_status()
        res_dict['result'] = job.result
    else:
        status = 'Does not exist'

    if status == 'failed':
        res_dict['job_log'] = get_job_log(current_user.username, job_id)
    elif status == 'finished':
        status = 'Completed'
    res_dict['job_status'] = status
    return make_response(**res_dict)
Beispiel #5
0
def job_status(job_id):
    """Get job status

    **Example request**

    .. sourcecode:: http

        POST /getDeploymentStatus/<job_id> HTTP/1.1
        {
            "token": "<token>"
        }

    **Example response**

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Encoding: gzip
        Content-Type: application/json
        Server: nginx/1.1.19
        Vary: Accept-Encoding

        {
            "status": "OK",
            "job_status": "Completed"
        }

    :jsonparam string token: Authentication token
    :statuscode 200: no error
    :statuscode 401: not authorized
    :>json string job_status: Job status
    """
    res_dict = {}

    job = q.fetch_job(job_id)
    if job:
        if not job.id in current_user.jobs:
            raise PermissionDenied(current_user)
        status = job.get_status()
        res_dict['result'] = job.result
    else:
        status = 'Does not exist'

    if status == 'failed':
        res_dict['job_log'] = get_job_log(current_user.username, job_id)
    elif status == 'finished':
        status = 'Completed'
    res_dict['job_status'] = status
    return make_response(**res_dict)
def test_sudo_installed(args):
    cmd = "sudo ls"
    image = "pytorch/pytorch:latest"  # no sudo installed in this image

    job_spec = utils.gen_default_job_description(
        "regular",
        args.email,
        args.uid,
        args.vc,
        cmd=cmd,
        image=image,
    )

    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling", "running"})
        log = utils.get_job_log(args.rest, args.email, job.jid)

        assert state == "finished"
Beispiel #7
0
def get_log(job_id):
    """Get log for job


    **Example request**

    .. sourcecode:: http

        POST /getLog/<job_id> HTTP/1.1
        {
            "token": "<token>",
            "line_num": 10
        }

    **Example response**

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Encoding: gzip
        Content-Type: application/json
        Server: nginx/1.1.19
        Vary: Accept-Encoding

        {
            "status": "OK",
            "log": "<log lines>"
        }

    :jsonparam string token: Authentication token
    :jsonparam integer line_num: Number of log lines to return(max 100, 10 default)
    :statuscode 200: no error
    :statuscode 401: not authorized
    :>json string log: Last logs
    """
    data = check_args(tuple())
    log = get_job_log(data['username'], job_id)
    return make_response(log=log)
Beispiel #8
0
def get_log(job_id):
    """Get log for job


    **Example request**

    .. sourcecode:: http

        POST /getLog/<job_id> HTTP/1.1
        {
            "token": "<token>",
            "line_num": 10
        }

    **Example response**

    .. sourcecode:: http

        HTTP/1.1 200 OK
        Content-Encoding: gzip
        Content-Type: application/json
        Server: nginx/1.1.19
        Vary: Accept-Encoding

        {
            "status": "OK",
            "log": "<log lines>"
        }

    :jsonparam string token: Authentication token
    :jsonparam integer line_num: Number of log lines to return(max 100, 10 default)
    :statuscode 200: no error
    :statuscode 401: not authorized
    :>json string log: Last logs
    """
    data = check_args(tuple())
    log = get_job_log(data['username'], job_id)
    return make_response(log=log)
def test_distributed_job_running(args, preemptable=False):
    expected = "wantThisInLog"
    cmd = "echo %s ; sleep 120" % expected

    job_spec = utils.gen_default_job_description("distributed",
                                                 args.email,
                                                 args.uid,
                                                 args.vc,
                                                 preemptable=preemptable,
                                                 cmd=cmd)
    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling"})
        assert state == "running"

        for _ in range(50):
            log = utils.get_job_log(args.rest, args.email, job.jid)

            if expected in log:
                break

            time.sleep(0.5)
        assert expected in log, "assert {} in {}".format(expected, log)
def test_do_not_expose_private_key(args):
    cmd = "echo a ; printenv DLTS_SSH_PRIVATE_KEY ; echo b"

    job_spec = utils.gen_default_job_description("regular",
                                                 args.email,
                                                 args.uid,
                                                 args.vc,
                                                 cmd=cmd)

    with utils.run_job(args.rest, job_spec) as job:
        state = job.block_until_state_not_in(
            {"unapproved", "queued", "scheduling", "running"})
        assert state == "finished"

        expected = "a\nb"

        for _ in range(50):
            log = utils.get_job_log(args.rest, args.email, job.jid)

            if expected in log:
                break

            time.sleep(0.5)
        assert expected in log, 'assert {} in {}'.format(expected, log)